Home | History | Annotate | Line # | Download | only in metadata
metadata.c revision 1.1.1.3
      1 /*	$NetBSD: metadata.c,v 1.1.1.3 2009/12/02 00:26:39 haad Exp $	*/
      2 
      3 /*
      4  * Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved.
      5  * Copyright (C) 2004-2009 Red Hat, Inc. All rights reserved.
      6  *
      7  * This file is part of LVM2.
      8  *
      9  * This copyrighted material is made available to anyone wishing to use,
     10  * modify, copy, or redistribute it subject to the terms and conditions
     11  * of the GNU Lesser General Public License v.2.1.
     12  *
     13  * You should have received a copy of the GNU Lesser General Public License
     14  * along with this program; if not, write to the Free Software Foundation,
     15  * Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
     16  */
     17 
     18 #include "lib.h"
     19 #include "device.h"
     20 #include "metadata.h"
     21 #include "toolcontext.h"
     22 #include "lvm-string.h"
     23 #include "lvm-file.h"
     24 #include "lvmcache.h"
     25 #include "memlock.h"
     26 #include "str_list.h"
     27 #include "pv_alloc.h"
     28 #include "segtype.h"
     29 #include "activate.h"
     30 #include "display.h"
     31 #include "locking.h"
     32 #include "archiver.h"
     33 #include "defaults.h"
     34 #include "filter-persistent.h"
     35 
     36 #include <sys/param.h>
     37 
     38 /*
     39  * FIXME: Check for valid handle before dereferencing field or log error?
     40  */
     41 #define pv_field(handle, field)				\
     42 	(((const struct physical_volume *)(handle))->field)
     43 
     44 static struct physical_volume *_pv_read(struct cmd_context *cmd,
     45 					struct dm_pool *pvmem,
     46 					const char *pv_name,
     47 					struct dm_list *mdas,
     48 					uint64_t *label_sector,
     49 					int warnings, int scan_label_only);
     50 
     51 static struct physical_volume *_find_pv_by_name(struct cmd_context *cmd,
     52 			 			const char *pv_name);
     53 
     54 static struct pv_list *_find_pv_in_vg(const struct volume_group *vg,
     55 				      const char *pv_name);
     56 
     57 static struct physical_volume *_find_pv_in_vg_by_uuid(const struct volume_group *vg,
     58 						      const struct id *id);
     59 
     60 static uint32_t _vg_bad_status_bits(const struct volume_group *vg,
     61 				    uint32_t status);
     62 
     63 const char _really_init[] =
     64     "Really INITIALIZE physical volume \"%s\" of volume group \"%s\" [y/n]? ";
     65 
     66 unsigned long set_pe_align(struct physical_volume *pv, unsigned long data_alignment)
     67 {
     68 	if (pv->pe_align)
     69 		goto out;
     70 
     71 	if (data_alignment)
     72 		pv->pe_align = data_alignment;
     73 	else
     74 		pv->pe_align = MAX(65536UL, lvm_getpagesize()) >> SECTOR_SHIFT;
     75 
     76 	if (!pv->dev)
     77 		goto out;
     78 
     79 	/*
     80 	 * Align to stripe-width of underlying md device if present
     81 	 */
     82 	if (find_config_tree_bool(pv->fmt->cmd, "devices/md_chunk_alignment",
     83 				  DEFAULT_MD_CHUNK_ALIGNMENT))
     84 		pv->pe_align = MAX(pv->pe_align,
     85 				   dev_md_stripe_width(pv->fmt->cmd->sysfs_dir,
     86 						       pv->dev));
     87 
     88 	/*
     89 	 * Align to topology's minimum_io_size or optimal_io_size if present
     90 	 * - minimum_io_size - the smallest request the device can perform
     91 	 *   w/o incurring a read-modify-write penalty (e.g. MD's chunk size)
     92 	 * - optimal_io_size - the device's preferred unit of receiving I/O
     93 	 *   (e.g. MD's stripe width)
     94 	 */
     95 	if (find_config_tree_bool(pv->fmt->cmd,
     96 				  "devices/data_alignment_detection",
     97 				  DEFAULT_DATA_ALIGNMENT_DETECTION)) {
     98 		pv->pe_align = MAX(pv->pe_align,
     99 				   dev_minimum_io_size(pv->fmt->cmd->sysfs_dir,
    100 						       pv->dev));
    101 
    102 		pv->pe_align = MAX(pv->pe_align,
    103 				   dev_optimal_io_size(pv->fmt->cmd->sysfs_dir,
    104 						       pv->dev));
    105 	}
    106 
    107 	log_very_verbose("%s: Setting PE alignment to %lu sectors.",
    108 			 dev_name(pv->dev), pv->pe_align);
    109 
    110 out:
    111 	return pv->pe_align;
    112 }
    113 
    114 unsigned long set_pe_align_offset(struct physical_volume *pv,
    115 				  unsigned long data_alignment_offset)
    116 {
    117 	if (pv->pe_align_offset)
    118 		goto out;
    119 
    120 	if (data_alignment_offset)
    121 		pv->pe_align_offset = data_alignment_offset;
    122 
    123 	if (!pv->dev)
    124 		goto out;
    125 
    126 	if (find_config_tree_bool(pv->fmt->cmd,
    127 				  "devices/data_alignment_offset_detection",
    128 				  DEFAULT_DATA_ALIGNMENT_OFFSET_DETECTION))
    129 		pv->pe_align_offset =
    130 			MAX(pv->pe_align_offset,
    131 			    dev_alignment_offset(pv->fmt->cmd->sysfs_dir,
    132 						 pv->dev));
    133 
    134 	log_very_verbose("%s: Setting PE alignment offset to %lu sectors.",
    135 			 dev_name(pv->dev), pv->pe_align_offset);
    136 
    137 out:
    138 	return pv->pe_align_offset;
    139 }
    140 
    141 /**
    142  * add_pv_to_vg - Add a physical volume to a volume group
    143  * @vg - volume group to add to
    144  * @pv_name - name of the pv (to be removed)
    145  * @pv - physical volume to add to volume group
    146  *
    147  * Returns:
    148  *  0 - failure
    149  *  1 - success
    150  * FIXME: remove pv_name - obtain safely from pv
    151  */
    152 int add_pv_to_vg(struct volume_group *vg, const char *pv_name,
    153 		 struct physical_volume *pv)
    154 {
    155 	struct pv_list *pvl;
    156 	struct format_instance *fid = vg->fid;
    157 	struct dm_pool *mem = vg->vgmem;
    158 
    159 	log_verbose("Adding physical volume '%s' to volume group '%s'",
    160 		    pv_name, vg->name);
    161 
    162 	if (!(pvl = dm_pool_zalloc(mem, sizeof(*pvl)))) {
    163 		log_error("pv_list allocation for '%s' failed", pv_name);
    164 		return 0;
    165 	}
    166 
    167 	if (!is_orphan_vg(pv->vg_name)) {
    168 		log_error("Physical volume '%s' is already in volume group "
    169 			  "'%s'", pv_name, pv->vg_name);
    170 		return 0;
    171 	}
    172 
    173 	if (pv->fmt != fid->fmt) {
    174 		log_error("Physical volume %s is of different format type (%s)",
    175 			  pv_name, pv->fmt->name);
    176 		return 0;
    177 	}
    178 
    179 	/* Ensure PV doesn't depend on another PV already in the VG */
    180 	if (pv_uses_vg(pv, vg)) {
    181 		log_error("Physical volume %s might be constructed from same "
    182 			  "volume group %s", pv_name, vg->name);
    183 		return 0;
    184 	}
    185 
    186 	if (!(pv->vg_name = dm_pool_strdup(mem, vg->name))) {
    187 		log_error("vg->name allocation failed for '%s'", pv_name);
    188 		return 0;
    189 	}
    190 
    191 	memcpy(&pv->vgid, &vg->id, sizeof(vg->id));
    192 
    193 	/* Units of 512-byte sectors */
    194 	pv->pe_size = vg->extent_size;
    195 
    196 	/*
    197 	 * pe_count must always be calculated by pv_setup
    198 	 */
    199 	pv->pe_alloc_count = 0;
    200 
    201 	if (!fid->fmt->ops->pv_setup(fid->fmt, UINT64_C(0), 0,
    202 				     vg->extent_size, 0, 0, 0UL, UINT64_C(0),
    203 				     &fid->metadata_areas, pv, vg)) {
    204 		log_error("Format-specific setup of physical volume '%s' "
    205 			  "failed.", pv_name);
    206 		return 0;
    207 	}
    208 
    209 	if (_find_pv_in_vg(vg, pv_name)) {
    210 		log_error("Physical volume '%s' listed more than once.",
    211 			  pv_name);
    212 		return 0;
    213 	}
    214 
    215 	if (vg->pv_count && (vg->pv_count == vg->max_pv)) {
    216 		log_error("No space for '%s' - volume group '%s' "
    217 			  "holds max %d physical volume(s).", pv_name,
    218 			  vg->name, vg->max_pv);
    219 		return 0;
    220 	}
    221 
    222 	if (!alloc_pv_segment_whole_pv(mem, pv))
    223 		return_0;
    224 
    225 	pvl->pv = pv;
    226 	dm_list_add(&vg->pvs, &pvl->list);
    227 
    228 	if ((uint64_t) vg->extent_count + pv->pe_count > UINT32_MAX) {
    229 		log_error("Unable to add %s to %s: new extent count (%"
    230 			  PRIu64 ") exceeds limit (%" PRIu32 ").",
    231 			  pv_name, vg->name,
    232 			  (uint64_t) vg->extent_count + pv->pe_count,
    233 			  UINT32_MAX);
    234 		return 0;
    235 	}
    236 
    237 	vg->pv_count++;
    238 	vg->extent_count += pv->pe_count;
    239 	vg->free_count += pv->pe_count;
    240 
    241 	return 1;
    242 }
    243 
    244 static int _copy_pv(struct dm_pool *pvmem,
    245 		    struct physical_volume *pv_to,
    246 		    struct physical_volume *pv_from)
    247 {
    248 	memcpy(pv_to, pv_from, sizeof(*pv_to));
    249 
    250 	if (!(pv_to->vg_name = dm_pool_strdup(pvmem, pv_from->vg_name)))
    251 		return_0;
    252 
    253 	if (!str_list_dup(pvmem, &pv_to->tags, &pv_from->tags))
    254 		return_0;
    255 
    256 	if (!peg_dup(pvmem, &pv_to->segments, &pv_from->segments))
    257 		return_0;
    258 
    259 	return 1;
    260 }
    261 
    262 static struct pv_list *_copy_pvl(struct dm_pool *pvmem, struct pv_list *pvl_from)
    263 {
    264 	struct pv_list *pvl_to = NULL;
    265 
    266 	if (!(pvl_to = dm_pool_zalloc(pvmem, sizeof(*pvl_to))))
    267 		return_NULL;
    268 
    269 	if (!(pvl_to->pv = dm_pool_alloc(pvmem, sizeof(*pvl_to->pv))))
    270 		goto_bad;
    271 
    272 	if(!_copy_pv(pvmem, pvl_to->pv, pvl_from->pv))
    273 		goto_bad;
    274 
    275 	return pvl_to;
    276 bad:
    277 	dm_pool_free(pvmem, pvl_to);
    278 	return NULL;
    279 }
    280 
    281 int get_pv_from_vg_by_id(const struct format_type *fmt, const char *vg_name,
    282 			 const char *vgid, const char *pvid,
    283 			 struct physical_volume *pv)
    284 {
    285 	struct volume_group *vg;
    286 	struct pv_list *pvl;
    287 	int r = 0, consistent = 0;
    288 
    289 	if (!(vg = vg_read_internal(fmt->cmd, vg_name, vgid, &consistent))) {
    290 		log_error("get_pv_from_vg_by_id: vg_read_internal failed to read VG %s",
    291 			  vg_name);
    292 		return 0;
    293 	}
    294 
    295 	if (!consistent)
    296 		log_warn("WARNING: Volume group %s is not consistent",
    297 			 vg_name);
    298 
    299 	dm_list_iterate_items(pvl, &vg->pvs) {
    300 		if (id_equal(&pvl->pv->id, (const struct id *) pvid)) {
    301 			if (!_copy_pv(fmt->cmd->mem, pv, pvl->pv)) {
    302 				log_error("internal PV duplication failed");
    303 				r = 0;
    304 				goto out;
    305 			}
    306 			r = 1;
    307 			goto out;
    308 		}
    309 	}
    310 out:
    311 	vg_release(vg);
    312 	return r;
    313 }
    314 
    315 int move_pv(struct volume_group *vg_from, struct volume_group *vg_to,
    316 	    const char *pv_name)
    317 {
    318 	struct physical_volume *pv;
    319 	struct pv_list *pvl;
    320 
    321 	/* FIXME: handle tags */
    322 	if (!(pvl = find_pv_in_vg(vg_from, pv_name))) {
    323 		log_error("Physical volume %s not in volume group %s",
    324 			  pv_name, vg_from->name);
    325 		return 0;
    326 	}
    327 
    328 	if (_vg_bad_status_bits(vg_from, RESIZEABLE_VG) ||
    329 	    _vg_bad_status_bits(vg_to, RESIZEABLE_VG))
    330 		return 0;
    331 
    332 	dm_list_move(&vg_to->pvs, &pvl->list);
    333 
    334 	vg_from->pv_count--;
    335 	vg_to->pv_count++;
    336 
    337 	pv = pvl->pv;
    338 
    339 	vg_from->extent_count -= pv_pe_count(pv);
    340 	vg_to->extent_count += pv_pe_count(pv);
    341 
    342 	vg_from->free_count -= pv_pe_count(pv) - pv_pe_alloc_count(pv);
    343 	vg_to->free_count += pv_pe_count(pv) - pv_pe_alloc_count(pv);
    344 
    345 	return 1;
    346 }
    347 
    348 int move_pvs_used_by_lv(struct volume_group *vg_from,
    349 			struct volume_group *vg_to,
    350 			const char *lv_name)
    351 {
    352 	struct lv_segment *lvseg;
    353 	unsigned s;
    354 	struct lv_list *lvl;
    355 	struct logical_volume *lv;
    356 
    357 	/* FIXME: handle tags */
    358 	if (!(lvl = find_lv_in_vg(vg_from, lv_name))) {
    359 		log_error("Logical volume %s not in volume group %s",
    360 			  lv_name, vg_from->name);
    361 		return 0;
    362 	}
    363 
    364 	if (_vg_bad_status_bits(vg_from, RESIZEABLE_VG) ||
    365 	    _vg_bad_status_bits(vg_to, RESIZEABLE_VG))
    366 		return 0;
    367 
    368 	dm_list_iterate_items(lvseg, &lvl->lv->segments) {
    369 		if (lvseg->log_lv)
    370 			if (!move_pvs_used_by_lv(vg_from, vg_to,
    371 						     lvseg->log_lv->name))
    372 				return_0;
    373 		for (s = 0; s < lvseg->area_count; s++) {
    374 			if (seg_type(lvseg, s) == AREA_PV) {
    375 				if (!move_pv(vg_from, vg_to,
    376 					      pv_dev_name(seg_pv(lvseg, s))))
    377 					return_0;
    378 			} else if (seg_type(lvseg, s) == AREA_LV) {
    379 				lv = seg_lv(lvseg, s);
    380 				if (!move_pvs_used_by_lv(vg_from, vg_to,
    381 							     lv->name))
    382 				    return_0;
    383 			}
    384 		}
    385 	}
    386 	return 1;
    387 }
    388 
    389 static int validate_new_vg_name(struct cmd_context *cmd, const char *vg_name)
    390 {
    391 	char vg_path[PATH_MAX];
    392 
    393 	if (!validate_name(vg_name))
    394 		return_0;
    395 
    396 	snprintf(vg_path, PATH_MAX, "%s%s", cmd->dev_dir, vg_name);
    397 	if (path_exists(vg_path)) {
    398 		log_error("%s: already exists in filesystem", vg_path);
    399 		return 0;
    400 	}
    401 
    402 	return 1;
    403 }
    404 
    405 int validate_vg_rename_params(struct cmd_context *cmd,
    406 			      const char *vg_name_old,
    407 			      const char *vg_name_new)
    408 {
    409 	unsigned length;
    410 	char *dev_dir;
    411 
    412 	dev_dir = cmd->dev_dir;
    413 	length = strlen(dev_dir);
    414 
    415 	/* Check sanity of new name */
    416 	if (strlen(vg_name_new) > NAME_LEN - length - 2) {
    417 		log_error("New volume group path exceeds maximum length "
    418 			  "of %d!", NAME_LEN - length - 2);
    419 		return 0;
    420 	}
    421 
    422 	if (!validate_new_vg_name(cmd, vg_name_new)) {
    423 		log_error("New volume group name \"%s\" is invalid",
    424 			  vg_name_new);
    425 		return 0;
    426 	}
    427 
    428 	if (!strcmp(vg_name_old, vg_name_new)) {
    429 		log_error("Old and new volume group names must differ");
    430 		return 0;
    431 	}
    432 
    433 	return 1;
    434 }
    435 
    436 int vg_rename(struct cmd_context *cmd, struct volume_group *vg,
    437 	      const char *new_name)
    438 {
    439 	struct dm_pool *mem = vg->vgmem;
    440 	struct pv_list *pvl;
    441 
    442 	if (!(vg->name = dm_pool_strdup(mem, new_name))) {
    443 		log_error("vg->name allocation failed for '%s'", new_name);
    444 		return 0;
    445 	}
    446 
    447 	dm_list_iterate_items(pvl, &vg->pvs) {
    448 		if (!(pvl->pv->vg_name = dm_pool_strdup(mem, new_name))) {
    449 			log_error("pv->vg_name allocation failed for '%s'",
    450 				  pv_dev_name(pvl->pv));
    451 			return 0;
    452 		}
    453 	}
    454 
    455 	return 1;
    456 }
    457 
    458 int remove_lvs_in_vg(struct cmd_context *cmd,
    459 		     struct volume_group *vg,
    460 		     force_t force)
    461 {
    462 	struct dm_list *lst;
    463 	struct lv_list *lvl;
    464 
    465 	while ((lst = dm_list_first(&vg->lvs))) {
    466 		lvl = dm_list_item(lst, struct lv_list);
    467 		if (!lv_remove_with_dependencies(cmd, lvl->lv, force))
    468 		    return 0;
    469 	}
    470 
    471 	return 1;
    472 }
    473 
    474 int vg_remove_check(struct volume_group *vg)
    475 {
    476 	unsigned lv_count;
    477 	struct pv_list *pvl, *tpvl;
    478 
    479 	if (vg_read_error(vg) || vg_missing_pv_count(vg)) {
    480 		log_error("Volume group \"%s\" not found, is inconsistent "
    481 			  "or has PVs missing.", vg ? vg->name : "");
    482 		log_error("Consider vgreduce --removemissing if metadata "
    483 			  "is inconsistent.");
    484 		return 0;
    485 	}
    486 
    487 	if (!vg_check_status(vg, EXPORTED_VG))
    488 		return 0;
    489 
    490 	lv_count = vg_visible_lvs(vg);
    491 
    492 	if (lv_count) {
    493 		log_error("Volume group \"%s\" still contains %u "
    494 			  "logical volume(s)", vg->name, lv_count);
    495 		return 0;
    496 	}
    497 
    498 	if (!archive(vg))
    499 		return 0;
    500 
    501 	dm_list_iterate_items_safe(pvl, tpvl, &vg->pvs) {
    502 		dm_list_del(&pvl->list);
    503 		dm_list_add(&vg->removed_pvs, &pvl->list);
    504 	}
    505 	return 1;
    506 }
    507 
    508 int vg_remove(struct volume_group *vg)
    509 {
    510 	struct physical_volume *pv;
    511 	struct pv_list *pvl;
    512 	int ret = 1;
    513 
    514 	if (!lock_vol(vg->cmd, VG_ORPHANS, LCK_VG_WRITE)) {
    515 		log_error("Can't get lock for orphan PVs");
    516 		return 0;
    517 	}
    518 
    519 	if (!vg_remove_mdas(vg)) {
    520 		log_error("vg_remove_mdas %s failed", vg->name);
    521 		unlock_vg(vg->cmd, VG_ORPHANS);
    522 		return 0;
    523 	}
    524 
    525 	/* init physical volumes */
    526 	dm_list_iterate_items(pvl, &vg->removed_pvs) {
    527 		pv = pvl->pv;
    528 		log_verbose("Removing physical volume \"%s\" from "
    529 			    "volume group \"%s\"", pv_dev_name(pv), vg->name);
    530 		pv->vg_name = vg->fid->fmt->orphan_vg_name;
    531 		pv->status = ALLOCATABLE_PV;
    532 
    533 		if (!dev_get_size(pv_dev(pv), &pv->size)) {
    534 			log_error("%s: Couldn't get size.", pv_dev_name(pv));
    535 			ret = 0;
    536 			continue;
    537 		}
    538 
    539 		/* FIXME Write to same sector label was read from */
    540 		if (!pv_write(vg->cmd, pv, NULL, INT64_C(-1))) {
    541 			log_error("Failed to remove physical volume \"%s\""
    542 				  " from volume group \"%s\"",
    543 				  pv_dev_name(pv), vg->name);
    544 			ret = 0;
    545 		}
    546 	}
    547 
    548 	backup_remove(vg->cmd, vg->name);
    549 
    550 	if (ret)
    551 		log_print("Volume group \"%s\" successfully removed", vg->name);
    552 	else
    553 		log_error("Volume group \"%s\" not properly removed", vg->name);
    554 
    555 	unlock_vg(vg->cmd, VG_ORPHANS);
    556 	return ret;
    557 }
    558 
    559 /*
    560  * Extend a VG by a single PV / device path
    561  *
    562  * Parameters:
    563  * - vg: handle of volume group to extend by 'pv_name'
    564  * - pv_name: device path of PV to add to VG
    565  * - pp: parameters to pass to implicit pvcreate; if NULL, do not pvcreate
    566  *
    567  */
    568 static int vg_extend_single_pv(struct volume_group *vg, char *pv_name,
    569 			       struct pvcreate_params *pp)
    570 {
    571 	struct physical_volume *pv;
    572 
    573 	pv = pv_by_path(vg->fid->fmt->cmd, pv_name);
    574 	if (!pv && !pp) {
    575 		log_error("%s not identified as an existing "
    576 			  "physical volume", pv_name);
    577 		return 0;
    578 	} else if (!pv && pp) {
    579 		pv = pvcreate_single(vg->cmd, pv_name, pp);
    580 		if (!pv)
    581 			return 0;
    582 	}
    583 	if (!add_pv_to_vg(vg, pv_name, pv))
    584 		return 0;
    585 	return 1;
    586 }
    587 
    588 /*
    589  * Extend a VG by a single PV / device path
    590  *
    591  * Parameters:
    592  * - vg: handle of volume group to extend by 'pv_name'
    593  * - pv_count: count of device paths of PVs
    594  * - pv_names: device paths of PVs to add to VG
    595  * - pp: parameters to pass to implicit pvcreate; if NULL, do not pvcreate
    596  *
    597  */
    598 int vg_extend(struct volume_group *vg, int pv_count, char **pv_names,
    599 	      struct pvcreate_params *pp)
    600 {
    601 	int i;
    602 
    603 	if (_vg_bad_status_bits(vg, RESIZEABLE_VG))
    604 		return 0;
    605 
    606 	/* attach each pv */
    607 	for (i = 0; i < pv_count; i++) {
    608 		if (!vg_extend_single_pv(vg, pv_names[i], pp))
    609 			goto bad;
    610 	}
    611 
    612 /* FIXME Decide whether to initialise and add new mdahs to format instance */
    613 
    614 	return 1;
    615 
    616       bad:
    617 	log_error("Unable to add physical volume '%s' to "
    618 		  "volume group '%s'.", pv_names[i], vg->name);
    619 	return 0;
    620 }
    621 
    622 /* FIXME: use this inside vgreduce_single? */
    623 int vg_reduce(struct volume_group *vg, char *pv_name)
    624 {
    625 	struct physical_volume *pv;
    626 	struct pv_list *pvl;
    627 
    628 	if (_vg_bad_status_bits(vg, RESIZEABLE_VG))
    629 		return 0;
    630 
    631 	if (!archive(vg))
    632 		goto bad;
    633 
    634 	/* remove each pv */
    635 	if (!(pvl = find_pv_in_vg(vg, pv_name))) {
    636 		log_error("Physical volume %s not in volume group %s.",
    637 			  pv_name, vg->name);
    638 		goto bad;
    639 	}
    640 
    641 	pv = pvl->pv;
    642 
    643 	if (pv_pe_alloc_count(pv)) {
    644 		log_error("Physical volume %s still in use.",
    645 			  pv_name);
    646 		goto bad;
    647 	}
    648 
    649 	if (!dev_get_size(pv_dev(pv), &pv->size)) {
    650 		log_error("%s: Couldn't get size.", pv_name);
    651 		goto bad;
    652 	}
    653 
    654 	vg->pv_count--;
    655 	vg->free_count -= pv_pe_count(pv) - pv_pe_alloc_count(pv);
    656 	vg->extent_count -= pv_pe_count(pv);
    657 
    658 	/* add pv to the remove_pvs list */
    659 	dm_list_del(&pvl->list);
    660 	dm_list_add(&vg->removed_pvs, &pvl->list);
    661 
    662 	return 1;
    663 
    664       bad:
    665 	log_error("Unable to remove physical volume '%s' from "
    666 		  "volume group '%s'.", pv_name, vg->name);
    667 	return 0;
    668 }
    669 
    670 const char *strip_dir(const char *vg_name, const char *dev_dir)
    671 {
    672 	size_t len = strlen(dev_dir);
    673 	if (!strncmp(vg_name, dev_dir, len))
    674 		vg_name += len;
    675 
    676 	return vg_name;
    677 }
    678 
    679 /*
    680  * Validate parameters to vg_create() before calling.
    681  * FIXME: Move inside vg_create library function.
    682  * FIXME: Change vgcreate_params struct to individual gets/sets
    683  */
    684 int vgcreate_params_validate(struct cmd_context *cmd,
    685 			     struct vgcreate_params *vp)
    686 {
    687 	if (!validate_new_vg_name(cmd, vp->vg_name)) {
    688 		log_error("New volume group name \"%s\" is invalid",
    689 			  vp->vg_name);
    690 		return 1;
    691 	}
    692 
    693 	if (vp->alloc == ALLOC_INHERIT) {
    694 		log_error("Volume Group allocation policy cannot inherit "
    695 			  "from anything");
    696 		return 1;
    697 	}
    698 
    699 	if (!vp->extent_size) {
    700 		log_error("Physical extent size may not be zero");
    701 		return 1;
    702 	}
    703 
    704 	if (!(cmd->fmt->features & FMT_UNLIMITED_VOLS)) {
    705 		if (!vp->max_lv)
    706 			vp->max_lv = 255;
    707 		if (!vp->max_pv)
    708 			vp->max_pv = 255;
    709 		if (vp->max_lv > 255 || vp->max_pv > 255) {
    710 			log_error("Number of volumes may not exceed 255");
    711 			return 1;
    712 		}
    713 	}
    714 
    715 	return 0;
    716 }
    717 
    718 /*
    719  * Create a (struct volume_group) volume group handle from a struct volume_group pointer and a
    720  * possible failure code or zero for success.
    721  */
    722 static struct volume_group *_vg_make_handle(struct cmd_context *cmd,
    723 			     struct volume_group *vg,
    724 			     uint32_t failure)
    725 {
    726 	struct dm_pool *vgmem;
    727 
    728 	if (!vg) {
    729 		if (!(vgmem = dm_pool_create("lvm2 vg_handle", VG_MEMPOOL_CHUNK)) ||
    730 		    !(vg = dm_pool_zalloc(vgmem, sizeof(*vg)))) {
    731 			log_error("Error allocating vg handle.");
    732 			if (vgmem)
    733 				dm_pool_destroy(vgmem);
    734 			return_NULL;
    735 		}
    736 		vg->vgmem = vgmem;
    737 	}
    738 
    739 	vg->read_status = failure;
    740 
    741 	return (struct volume_group *)vg;
    742 }
    743 
    744 int lv_has_unknown_segments(const struct logical_volume *lv)
    745 {
    746 	struct lv_segment *seg;
    747 	/* foreach segment */
    748 	dm_list_iterate_items(seg, &lv->segments)
    749 		if (seg_unknown(seg))
    750 			return 1;
    751 	return 0;
    752 }
    753 
    754 int vg_has_unknown_segments(const struct volume_group *vg)
    755 {
    756 	struct lv_list *lvl;
    757 
    758 	/* foreach LV */
    759 	dm_list_iterate_items(lvl, &vg->lvs)
    760 		if (lv_has_unknown_segments(lvl->lv))
    761 			return 1;
    762 	return 0;
    763 }
    764 
    765 /*
    766  * Create a VG with default parameters.
    767  * Returns:
    768  * - struct volume_group* with SUCCESS code: VG structure created
    769  * - NULL or struct volume_group* with FAILED_* code: error creating VG structure
    770  * Use vg_read_error() to determine success or failure.
    771  * FIXME: cleanup usage of _vg_make_handle()
    772  */
    773 struct volume_group *vg_create(struct cmd_context *cmd, const char *vg_name)
    774 {
    775 	struct volume_group *vg;
    776 	int consistent = 0;
    777 	struct dm_pool *mem;
    778 	uint32_t rc;
    779 
    780 	if (!validate_name(vg_name)) {
    781 		log_error("Invalid vg name %s", vg_name);
    782 		/* FIXME: use _vg_make_handle() w/proper error code */
    783 		return NULL;
    784 	}
    785 
    786 	rc = vg_lock_newname(cmd, vg_name);
    787 	if (rc != SUCCESS)
    788 		/* NOTE: let caller decide - this may be check for existence */
    789 		return _vg_make_handle(cmd, NULL, rc);
    790 
    791 	/* FIXME: Is this vg_read_internal necessary? Move it inside
    792 	   vg_lock_newname? */
    793 	/* is this vg name already in use ? */
    794 	if ((vg = vg_read_internal(cmd, vg_name, NULL, &consistent))) {
    795 		log_error("A volume group called '%s' already exists.", vg_name);
    796 		unlock_and_release_vg(cmd, vg, vg_name);
    797 		return _vg_make_handle(cmd, NULL, FAILED_EXIST);
    798 	}
    799 
    800 	if (!(mem = dm_pool_create("lvm2 vg_create", VG_MEMPOOL_CHUNK)))
    801 		goto_bad;
    802 
    803 	if (!(vg = dm_pool_zalloc(mem, sizeof(*vg))))
    804 		goto_bad;
    805 
    806 	if (!id_create(&vg->id)) {
    807 		log_error("Couldn't create uuid for volume group '%s'.",
    808 			  vg_name);
    809 		goto bad;
    810 	}
    811 
    812 	/* Strip dev_dir if present */
    813 	vg_name = strip_dir(vg_name, cmd->dev_dir);
    814 
    815 	vg->vgmem = mem;
    816 	vg->cmd = cmd;
    817 
    818 	if (!(vg->name = dm_pool_strdup(mem, vg_name)))
    819 		goto_bad;
    820 
    821 	vg->seqno = 0;
    822 
    823 	vg->status = (RESIZEABLE_VG | LVM_READ | LVM_WRITE);
    824 	if (!(vg->system_id = dm_pool_alloc(mem, NAME_LEN)))
    825 		goto_bad;
    826 
    827 	*vg->system_id = '\0';
    828 
    829 	vg->extent_size = DEFAULT_EXTENT_SIZE * 2;
    830 	vg->extent_count = 0;
    831 	vg->free_count = 0;
    832 
    833 	vg->max_lv = DEFAULT_MAX_LV;
    834 	vg->max_pv = DEFAULT_MAX_PV;
    835 
    836 	vg->alloc = DEFAULT_ALLOC_POLICY;
    837 
    838 	vg->pv_count = 0;
    839 	dm_list_init(&vg->pvs);
    840 
    841 	dm_list_init(&vg->lvs);
    842 
    843 	dm_list_init(&vg->tags);
    844 
    845 	/* initialize removed_pvs list */
    846 	dm_list_init(&vg->removed_pvs);
    847 
    848 	if (!(vg->fid = cmd->fmt->ops->create_instance(cmd->fmt, vg_name,
    849 						       NULL, NULL))) {
    850 		log_error("Failed to create format instance");
    851 		goto bad;
    852 	}
    853 
    854 	if (vg->fid->fmt->ops->vg_setup &&
    855 	    !vg->fid->fmt->ops->vg_setup(vg->fid, vg)) {
    856 		log_error("Format specific setup of volume group '%s' failed.",
    857 			  vg_name);
    858 		goto bad;
    859 	}
    860 	return _vg_make_handle(cmd, vg, SUCCESS);
    861 
    862 bad:
    863 	unlock_and_release_vg(cmd, vg, vg_name);
    864 	/* FIXME: use _vg_make_handle() w/proper error code */
    865 	return NULL;
    866 }
    867 
    868 uint64_t extents_from_size(struct cmd_context *cmd, uint64_t size,
    869 			   uint32_t extent_size)
    870 {
    871 	if (size % extent_size) {
    872 		size += extent_size - size % extent_size;
    873 		log_print("Rounding up size to full physical extent %s",
    874 			  display_size(cmd, size));
    875 	}
    876 
    877 	if (size > (uint64_t) UINT32_MAX * extent_size) {
    878 		log_error("Volume too large (%s) for extent size %s. "
    879 			  "Upper limit is %s.",
    880 			  display_size(cmd, size),
    881 			  display_size(cmd, (uint64_t) extent_size),
    882 			  display_size(cmd, (uint64_t) UINT32_MAX *
    883 				       extent_size));
    884 		return 0;
    885 	}
    886 
    887 	return (uint64_t) size / extent_size;
    888 }
    889 
    890 static int _recalc_extents(uint32_t *extents, const char *desc1,
    891 			   const char *desc2, uint32_t old_size,
    892 			   uint32_t new_size)
    893 {
    894 	uint64_t size = (uint64_t) old_size * (*extents);
    895 
    896 	if (size % new_size) {
    897 		log_error("New size %" PRIu64 " for %s%s not an exact number "
    898 			  "of new extents.", size, desc1, desc2);
    899 		return 0;
    900 	}
    901 
    902 	size /= new_size;
    903 
    904 	if (size > UINT32_MAX) {
    905 		log_error("New extent count %" PRIu64 " for %s%s exceeds "
    906 			  "32 bits.", size, desc1, desc2);
    907 		return 0;
    908 	}
    909 
    910 	*extents = (uint32_t) size;
    911 
    912 	return 1;
    913 }
    914 
    915 int vg_set_extent_size(struct volume_group *vg, uint32_t new_size)
    916 {
    917 	uint32_t old_size = vg->extent_size;
    918 	struct pv_list *pvl;
    919 	struct lv_list *lvl;
    920 	struct physical_volume *pv;
    921 	struct logical_volume *lv;
    922 	struct lv_segment *seg;
    923 	struct pv_segment *pvseg;
    924 	uint32_t s;
    925 
    926 	if (!vg_is_resizeable(vg)) {
    927 		log_error("Volume group \"%s\" must be resizeable "
    928 			  "to change PE size", vg->name);
    929 		return 0;
    930 	}
    931 
    932 	if (!new_size) {
    933 		log_error("Physical extent size may not be zero");
    934 		return 0;
    935 	}
    936 
    937 	if (new_size == vg->extent_size)
    938 		return 1;
    939 
    940 	if (new_size & (new_size - 1)) {
    941 		log_error("Physical extent size must be a power of 2.");
    942 		return 0;
    943 	}
    944 
    945 	if (new_size > vg->extent_size) {
    946 		if ((uint64_t) vg_size(vg) % new_size) {
    947 			/* FIXME Adjust used PV sizes instead */
    948 			log_error("New extent size is not a perfect fit");
    949 			return 0;
    950 		}
    951 	}
    952 
    953 	vg->extent_size = new_size;
    954 
    955 	if (vg->fid->fmt->ops->vg_setup &&
    956 	    !vg->fid->fmt->ops->vg_setup(vg->fid, vg))
    957 		return_0;
    958 
    959 	if (!_recalc_extents(&vg->extent_count, vg->name, "", old_size,
    960 			     new_size))
    961 		return_0;
    962 
    963 	if (!_recalc_extents(&vg->free_count, vg->name, " free space",
    964 			     old_size, new_size))
    965 		return_0;
    966 
    967 	/* foreach PV */
    968 	dm_list_iterate_items(pvl, &vg->pvs) {
    969 		pv = pvl->pv;
    970 
    971 		pv->pe_size = new_size;
    972 		if (!_recalc_extents(&pv->pe_count, pv_dev_name(pv), "",
    973 				     old_size, new_size))
    974 			return_0;
    975 
    976 		if (!_recalc_extents(&pv->pe_alloc_count, pv_dev_name(pv),
    977 				     " allocated space", old_size, new_size))
    978 			return_0;
    979 
    980 		/* foreach free PV Segment */
    981 		dm_list_iterate_items(pvseg, &pv->segments) {
    982 			if (pvseg_is_allocated(pvseg))
    983 				continue;
    984 
    985 			if (!_recalc_extents(&pvseg->pe, pv_dev_name(pv),
    986 					     " PV segment start", old_size,
    987 					     new_size))
    988 				return_0;
    989 			if (!_recalc_extents(&pvseg->len, pv_dev_name(pv),
    990 					     " PV segment length", old_size,
    991 					     new_size))
    992 				return_0;
    993 		}
    994 	}
    995 
    996 	/* foreach LV */
    997 	dm_list_iterate_items(lvl, &vg->lvs) {
    998 		lv = lvl->lv;
    999 
   1000 		if (!_recalc_extents(&lv->le_count, lv->name, "", old_size,
   1001 				     new_size))
   1002 			return_0;
   1003 
   1004 		dm_list_iterate_items(seg, &lv->segments) {
   1005 			if (!_recalc_extents(&seg->le, lv->name,
   1006 					     " segment start", old_size,
   1007 					     new_size))
   1008 				return_0;
   1009 
   1010 			if (!_recalc_extents(&seg->len, lv->name,
   1011 					     " segment length", old_size,
   1012 					     new_size))
   1013 				return_0;
   1014 
   1015 			if (!_recalc_extents(&seg->area_len, lv->name,
   1016 					     " area length", old_size,
   1017 					     new_size))
   1018 				return_0;
   1019 
   1020 			if (!_recalc_extents(&seg->extents_copied, lv->name,
   1021 					     " extents moved", old_size,
   1022 					     new_size))
   1023 				return_0;
   1024 
   1025 			/* foreach area */
   1026 			for (s = 0; s < seg->area_count; s++) {
   1027 				switch (seg_type(seg, s)) {
   1028 				case AREA_PV:
   1029 					if (!_recalc_extents
   1030 					    (&seg_pe(seg, s),
   1031 					     lv->name,
   1032 					     " pvseg start", old_size,
   1033 					     new_size))
   1034 						return_0;
   1035 					if (!_recalc_extents
   1036 					    (&seg_pvseg(seg, s)->len,
   1037 					     lv->name,
   1038 					     " pvseg length", old_size,
   1039 					     new_size))
   1040 						return_0;
   1041 					break;
   1042 				case AREA_LV:
   1043 					if (!_recalc_extents
   1044 					    (&seg_le(seg, s), lv->name,
   1045 					     " area start", old_size,
   1046 					     new_size))
   1047 						return_0;
   1048 					break;
   1049 				case AREA_UNASSIGNED:
   1050 					log_error("Unassigned area %u found in "
   1051 						  "segment", s);
   1052 					return 0;
   1053 				}
   1054 			}
   1055 		}
   1056 
   1057 	}
   1058 
   1059 	return 1;
   1060 }
   1061 
   1062 int vg_set_max_lv(struct volume_group *vg, uint32_t max_lv)
   1063 {
   1064 	if (!vg_is_resizeable(vg)) {
   1065 		log_error("Volume group \"%s\" must be resizeable "
   1066 			  "to change MaxLogicalVolume", vg->name);
   1067 		return 0;
   1068 	}
   1069 
   1070 	if (!(vg->fid->fmt->features & FMT_UNLIMITED_VOLS)) {
   1071 		if (!max_lv)
   1072 			max_lv = 255;
   1073 		else if (max_lv > 255) {
   1074 			log_error("MaxLogicalVolume limit is 255");
   1075 			return 0;
   1076 		}
   1077 	}
   1078 
   1079 	if (max_lv && max_lv < vg_visible_lvs(vg)) {
   1080 		log_error("MaxLogicalVolume is less than the current number "
   1081 			  "%d of LVs for %s", vg_visible_lvs(vg),
   1082 			  vg->name);
   1083 		return 0;
   1084 	}
   1085 	vg->max_lv = max_lv;
   1086 
   1087 	return 1;
   1088 }
   1089 
   1090 int vg_set_max_pv(struct volume_group *vg, uint32_t max_pv)
   1091 {
   1092 	if (!vg_is_resizeable(vg)) {
   1093 		log_error("Volume group \"%s\" must be resizeable "
   1094 			  "to change MaxPhysicalVolumes", vg->name);
   1095 		return 0;
   1096 	}
   1097 
   1098 	if (!(vg->fid->fmt->features & FMT_UNLIMITED_VOLS)) {
   1099 		if (!max_pv)
   1100 			max_pv = 255;
   1101 		else if (max_pv > 255) {
   1102 			log_error("MaxPhysicalVolume limit is 255");
   1103 			return 0;
   1104 		}
   1105 	}
   1106 
   1107 	if (max_pv && max_pv < vg->pv_count) {
   1108 		log_error("MaxPhysicalVolumes is less than the current number "
   1109 			  "%d of PVs for \"%s\"", vg->pv_count,
   1110 			  vg->name);
   1111 		return 0;
   1112 	}
   1113 	vg->max_pv = max_pv;
   1114 	return 1;
   1115 }
   1116 
   1117 int vg_set_alloc_policy(struct volume_group *vg, alloc_policy_t alloc)
   1118 {
   1119 	if (alloc == ALLOC_INHERIT) {
   1120 		log_error("Volume Group allocation policy cannot inherit "
   1121 			  "from anything");
   1122 		return 0;
   1123 	}
   1124 
   1125 	if (alloc == vg->alloc)
   1126 		return 1;
   1127 
   1128 	vg->alloc = alloc;
   1129 	return 1;
   1130 }
   1131 
   1132 int vg_set_clustered(struct volume_group *vg, int clustered)
   1133 {
   1134 	struct lv_list *lvl;
   1135 	if (clustered) {
   1136 		dm_list_iterate_items(lvl, &vg->lvs) {
   1137 			if (lv_is_origin(lvl->lv) || lv_is_cow(lvl->lv)) {
   1138 				log_error("Volume group %s contains snapshots "
   1139 					  "that are not yet supported.",
   1140 					  vg->name);
   1141 				return 0;
   1142 			}
   1143 		}
   1144 	}
   1145 
   1146 	if (clustered)
   1147 		vg->status |= CLUSTERED;
   1148 	else
   1149 		vg->status &= ~CLUSTERED;
   1150 	return 1;
   1151 }
   1152 
   1153 /*
   1154  * Separate metadata areas after splitting a VG.
   1155  * Also accepts orphan VG as destination (for vgreduce).
   1156  */
   1157 int vg_split_mdas(struct cmd_context *cmd __attribute((unused)),
   1158 		  struct volume_group *vg_from, struct volume_group *vg_to)
   1159 {
   1160 	struct metadata_area *mda, *mda2;
   1161 	struct dm_list *mdas_from, *mdas_to;
   1162 	int common_mda = 0;
   1163 
   1164 	mdas_from = &vg_from->fid->metadata_areas;
   1165 	mdas_to = &vg_to->fid->metadata_areas;
   1166 
   1167 	dm_list_iterate_items_safe(mda, mda2, mdas_from) {
   1168 		if (!mda->ops->mda_in_vg) {
   1169 			common_mda = 1;
   1170 			continue;
   1171 		}
   1172 
   1173 		if (!mda->ops->mda_in_vg(vg_from->fid, vg_from, mda)) {
   1174 			if (is_orphan_vg(vg_to->name))
   1175 				dm_list_del(&mda->list);
   1176 			else
   1177 				dm_list_move(mdas_to, &mda->list);
   1178 		}
   1179 	}
   1180 
   1181 	if (dm_list_empty(mdas_from) ||
   1182 	    (!is_orphan_vg(vg_to->name) && dm_list_empty(mdas_to)))
   1183 		return common_mda;
   1184 
   1185 	return 1;
   1186 }
   1187 
   1188 /*
   1189  * See if we may pvcreate on this device.
   1190  * 0 indicates we may not.
   1191  */
   1192 static int pvcreate_check(struct cmd_context *cmd, const char *name,
   1193 			  struct pvcreate_params *pp)
   1194 {
   1195 	struct physical_volume *pv;
   1196 	struct device *dev;
   1197 	uint64_t md_superblock, swap_signature;
   1198 	int wipe_md, wipe_swap;
   1199 
   1200 	/* FIXME Check partition type is LVM unless --force is given */
   1201 
   1202 	/* Is there a pv here already? */
   1203 	pv = pv_read(cmd, name, NULL, NULL, 0, 0);
   1204 
   1205 	/*
   1206 	 * If a PV has no MDAs it may appear to be an orphan until the
   1207 	 * metadata is read off another PV in the same VG.  Detecting
   1208 	 * this means checking every VG by scanning every PV on the
   1209 	 * system.
   1210 	 */
   1211 	if (pv && is_orphan(pv)) {
   1212 		if (!scan_vgs_for_pvs(cmd))
   1213 			return_0;
   1214 		pv = pv_read(cmd, name, NULL, NULL, 0, 0);
   1215 	}
   1216 
   1217 	/* Allow partial & exported VGs to be destroyed. */
   1218 	/* We must have -ff to overwrite a non orphan */
   1219 	if (pv && !is_orphan(pv) && pp->force != DONT_PROMPT_OVERRIDE) {
   1220 		log_error("Can't initialize physical volume \"%s\" of "
   1221 			  "volume group \"%s\" without -ff", name, pv_vg_name(pv));
   1222 		return 0;
   1223 	}
   1224 
   1225 	/* prompt */
   1226 	if (pv && !is_orphan(pv) && !pp->yes &&
   1227 	    yes_no_prompt(_really_init, name, pv_vg_name(pv)) == 'n') {
   1228 		log_print("%s: physical volume not initialized", name);
   1229 		return 0;
   1230 	}
   1231 
   1232 	if (sigint_caught())
   1233 		return 0;
   1234 
   1235 	dev = dev_cache_get(name, cmd->filter);
   1236 
   1237 	/* Is there an md superblock here? */
   1238 	if (!dev && md_filtering()) {
   1239 		unlock_vg(cmd, VG_ORPHANS);
   1240 
   1241 		persistent_filter_wipe(cmd->filter);
   1242 		lvmcache_destroy(cmd, 1);
   1243 
   1244 		init_md_filtering(0);
   1245 		if (!lock_vol(cmd, VG_ORPHANS, LCK_VG_WRITE)) {
   1246 			log_error("Can't get lock for orphan PVs");
   1247 			init_md_filtering(1);
   1248 			return 0;
   1249 		}
   1250 		dev = dev_cache_get(name, cmd->filter);
   1251 		init_md_filtering(1);
   1252 	}
   1253 
   1254 	if (!dev) {
   1255 		log_error("Device %s not found (or ignored by filtering).", name);
   1256 		return 0;
   1257 	}
   1258 
   1259 	/*
   1260 	 * This test will fail if the device belongs to an MD array.
   1261 	 */
   1262 	if (!dev_test_excl(dev)) {
   1263 		/* FIXME Detect whether device-mapper itself is still using it */
   1264 		log_error("Can't open %s exclusively.  Mounted filesystem?",
   1265 			  name);
   1266 		return 0;
   1267 	}
   1268 
   1269 	/* Wipe superblock? */
   1270 	if ((wipe_md = dev_is_md(dev, &md_superblock)) == 1 &&
   1271 	    ((!pp->idp && !pp->restorefile) || pp->yes ||
   1272 	     (yes_no_prompt("Software RAID md superblock "
   1273 			    "detected on %s. Wipe it? [y/n] ", name) == 'y'))) {
   1274 		log_print("Wiping software RAID md superblock on %s", name);
   1275 		if (!dev_set(dev, md_superblock, 4, 0)) {
   1276 			log_error("Failed to wipe RAID md superblock on %s",
   1277 				  name);
   1278 			return 0;
   1279 		}
   1280 	}
   1281 
   1282 	if (wipe_md == -1) {
   1283 		log_error("Fatal error while trying to detect software "
   1284 			  "RAID md superblock on %s", name);
   1285 		return 0;
   1286 	}
   1287 
   1288 	if ((wipe_swap = dev_is_swap(dev, &swap_signature)) == 1 &&
   1289 	    ((!pp->idp && !pp->restorefile) || pp->yes ||
   1290 	     (yes_no_prompt("Swap signature detected on %s. Wipe it? [y/n] ",
   1291 			    name) == 'y'))) {
   1292 		log_print("Wiping swap signature on %s", name);
   1293 		if (!dev_set(dev, swap_signature, 10, 0)) {
   1294 			log_error("Failed to wipe swap signature on %s", name);
   1295 			return 0;
   1296 		}
   1297 	}
   1298 
   1299 	if (wipe_swap == -1) {
   1300 		log_error("Fatal error while trying to detect swap "
   1301 			  "signature on %s", name);
   1302 		return 0;
   1303 	}
   1304 
   1305 	if (sigint_caught())
   1306 		return 0;
   1307 
   1308 	if (pv && !is_orphan(pv) && pp->force) {
   1309 		log_warn("WARNING: Forcing physical volume creation on "
   1310 			  "%s%s%s%s", name,
   1311 			  !is_orphan(pv) ? " of volume group \"" : "",
   1312 			  !is_orphan(pv) ? pv_vg_name(pv) : "",
   1313 			  !is_orphan(pv) ? "\"" : "");
   1314 	}
   1315 
   1316 	return 1;
   1317 }
   1318 
   1319 void pvcreate_params_set_defaults(struct pvcreate_params *pp)
   1320 {
   1321 	memset(pp, 0, sizeof(*pp));
   1322 	pp->zero = 1;
   1323 	pp->size = 0;
   1324 	pp->data_alignment = UINT64_C(0);
   1325 	pp->data_alignment_offset = UINT64_C(0);
   1326 	pp->pvmetadatacopies = DEFAULT_PVMETADATACOPIES;
   1327 	pp->pvmetadatasize = DEFAULT_PVMETADATASIZE;
   1328 	pp->labelsector = DEFAULT_LABELSECTOR;
   1329 	pp->idp = 0;
   1330 	pp->pe_start = 0;
   1331 	pp->extent_count = 0;
   1332 	pp->extent_size = 0;
   1333 	pp->restorefile = 0;
   1334 	pp->force = PROMPT;
   1335 	pp->yes = 0;
   1336 }
   1337 
   1338 /*
   1339  * pvcreate_single() - initialize a device with PV label and metadata area
   1340  *
   1341  * Parameters:
   1342  * - pv_name: device path to initialize
   1343  * - pp: parameters to pass to pv_create; if NULL, use default values
   1344  *
   1345  * Returns:
   1346  * NULL: error
   1347  * struct physical_volume * (non-NULL): handle to physical volume created
   1348  */
   1349 struct physical_volume * pvcreate_single(struct cmd_context *cmd,
   1350 					 const char *pv_name,
   1351 					 struct pvcreate_params *pp)
   1352 {
   1353 	void *pv;
   1354 	struct device *dev;
   1355 	struct dm_list mdas;
   1356 	struct pvcreate_params default_pp;
   1357 	char buffer[64] __attribute((aligned(8)));
   1358 
   1359 	pvcreate_params_set_defaults(&default_pp);
   1360 	if (!pp)
   1361 		pp = &default_pp;
   1362 
   1363 	if (pp->idp) {
   1364 		if ((dev = device_from_pvid(cmd, pp->idp)) &&
   1365 		    (dev != dev_cache_get(pv_name, cmd->filter))) {
   1366 			if (!id_write_format((const struct id*)&pp->idp->uuid,
   1367 			    buffer, sizeof(buffer)))
   1368 				return_NULL;
   1369 			log_error("uuid %s already in use on \"%s\"", buffer,
   1370 				  dev_name(dev));
   1371 			return NULL;
   1372 		}
   1373 	}
   1374 
   1375 	if (!pvcreate_check(cmd, pv_name, pp))
   1376 		goto error;
   1377 
   1378 	if (sigint_caught())
   1379 		goto error;
   1380 
   1381 	if (!(dev = dev_cache_get(pv_name, cmd->filter))) {
   1382 		log_error("%s: Couldn't find device.  Check your filters?",
   1383 			  pv_name);
   1384 		goto error;
   1385 	}
   1386 
   1387 	dm_list_init(&mdas);
   1388 	if (!(pv = pv_create(cmd, dev, pp->idp, pp->size,
   1389 			     pp->data_alignment, pp->data_alignment_offset,
   1390 			     pp->pe_start, pp->extent_count, pp->extent_size,
   1391 			     pp->pvmetadatacopies,
   1392 			     pp->pvmetadatasize,&mdas))) {
   1393 		log_error("Failed to setup physical volume \"%s\"", pv_name);
   1394 		goto error;
   1395 	}
   1396 
   1397 	log_verbose("Set up physical volume for \"%s\" with %" PRIu64
   1398 		    " available sectors", pv_name, pv_size(pv));
   1399 
   1400 	/* Wipe existing label first */
   1401 	if (!label_remove(pv_dev(pv))) {
   1402 		log_error("Failed to wipe existing label on %s", pv_name);
   1403 		goto error;
   1404 	}
   1405 
   1406 	if (pp->zero) {
   1407 		log_verbose("Zeroing start of device %s", pv_name);
   1408 		if (!dev_open_quiet(dev)) {
   1409 			log_error("%s not opened: device not zeroed", pv_name);
   1410 			goto error;
   1411 		}
   1412 
   1413 		if (!dev_set(dev, UINT64_C(0), (size_t) 2048, 0)) {
   1414 			log_error("%s not wiped: aborting", pv_name);
   1415 			dev_close(dev);
   1416 			goto error;
   1417 		}
   1418 		dev_close(dev);
   1419 	}
   1420 
   1421 	log_very_verbose("Writing physical volume data to disk \"%s\"",
   1422 			 pv_name);
   1423 	if (!(pv_write(cmd, (struct physical_volume *)pv, &mdas,
   1424 		       pp->labelsector))) {
   1425 		log_error("Failed to write physical volume \"%s\"", pv_name);
   1426 		goto error;
   1427 	}
   1428 
   1429 	log_print("Physical volume \"%s\" successfully created", pv_name);
   1430 
   1431 	return pv;
   1432 
   1433       error:
   1434 	return NULL;
   1435 }
   1436 
   1437 static void _free_pv(struct dm_pool *mem, struct physical_volume *pv)
   1438 {
   1439 	dm_pool_free(mem, pv);
   1440 }
   1441 
   1442 static struct physical_volume *_alloc_pv(struct dm_pool *mem, struct device *dev)
   1443 {
   1444 	struct physical_volume *pv = dm_pool_zalloc(mem, sizeof(*pv));
   1445 
   1446 	if (!pv)
   1447 		return_NULL;
   1448 
   1449 	if (!(pv->vg_name = dm_pool_zalloc(mem, NAME_LEN))) {
   1450 		dm_pool_free(mem, pv);
   1451 		return NULL;
   1452 	}
   1453 
   1454 	pv->pe_size = 0;
   1455 	pv->pe_start = 0;
   1456 	pv->pe_count = 0;
   1457 	pv->pe_alloc_count = 0;
   1458 	pv->pe_align = 0;
   1459 	pv->pe_align_offset = 0;
   1460 	pv->fmt = NULL;
   1461 	pv->dev = dev;
   1462 
   1463 	pv->status = ALLOCATABLE_PV;
   1464 
   1465 	dm_list_init(&pv->tags);
   1466 	dm_list_init(&pv->segments);
   1467 
   1468 	return pv;
   1469 }
   1470 
   1471 /**
   1472  * pv_create - initialize a physical volume for use with a volume group
   1473  *
   1474  * @fmt: format type
   1475  * @dev: PV device to initialize
   1476  * @size: size of the PV in sectors
   1477  * @data_alignment: requested alignment of data
   1478  * @data_alignment_offset: requested offset to aligned data
   1479  * @pe_start: physical extent start
   1480  * @existing_extent_count
   1481  * @existing_extent_size
   1482  * @pvmetadatacopies
   1483  * @pvmetadatasize
   1484  * @mdas
   1485  *
   1486  * Returns:
   1487  *   PV handle - physical volume initialized successfully
   1488  *   NULL - invalid parameter or problem initializing the physical volume
   1489  *
   1490  * Note:
   1491  *   FIXME: shorten argument list and replace with explict 'set' functions
   1492  */
   1493 struct physical_volume *pv_create(const struct cmd_context *cmd,
   1494 				  struct device *dev,
   1495 				  struct id *id, uint64_t size,
   1496 				  unsigned long data_alignment,
   1497 				  unsigned long data_alignment_offset,
   1498 				  uint64_t pe_start,
   1499 				  uint32_t existing_extent_count,
   1500 				  uint32_t existing_extent_size,
   1501 				  int pvmetadatacopies,
   1502 				  uint64_t pvmetadatasize, struct dm_list *mdas)
   1503 {
   1504 	const struct format_type *fmt = cmd->fmt;
   1505 	struct dm_pool *mem = fmt->cmd->mem;
   1506 	struct physical_volume *pv = _alloc_pv(mem, dev);
   1507 
   1508 	if (!pv)
   1509 		return NULL;
   1510 
   1511 	if (id)
   1512 		memcpy(&pv->id, id, sizeof(*id));
   1513 	else if (!id_create(&pv->id)) {
   1514 		log_error("Failed to create random uuid for %s.",
   1515 			  dev_name(dev));
   1516 		goto bad;
   1517 	}
   1518 
   1519 	if (!dev_get_size(pv->dev, &pv->size)) {
   1520 		log_error("%s: Couldn't get size.", pv_dev_name(pv));
   1521 		goto bad;
   1522 	}
   1523 
   1524 	if (size) {
   1525 		if (size > pv->size)
   1526 			log_warn("WARNING: %s: Overriding real size. "
   1527 				  "You could lose data.", pv_dev_name(pv));
   1528 		log_verbose("%s: Pretending size is %" PRIu64 " sectors.",
   1529 			    pv_dev_name(pv), size);
   1530 		pv->size = size;
   1531 	}
   1532 
   1533 	if (pv->size < PV_MIN_SIZE) {
   1534 		log_error("%s: Size must exceed minimum of %ld sectors.",
   1535 			  pv_dev_name(pv), PV_MIN_SIZE);
   1536 		goto bad;
   1537 	}
   1538 
   1539 	if (pv->size < data_alignment) {
   1540 		log_error("%s: Data alignment must not exceed device size.",
   1541 			  pv_dev_name(pv));
   1542 		goto bad;
   1543 	}
   1544 
   1545 	pv->fmt = fmt;
   1546 	pv->vg_name = fmt->orphan_vg_name;
   1547 
   1548 	if (!fmt->ops->pv_setup(fmt, pe_start, existing_extent_count,
   1549 				existing_extent_size, data_alignment,
   1550 				data_alignment_offset,
   1551 				pvmetadatacopies, pvmetadatasize, mdas,
   1552 				pv, NULL)) {
   1553 		log_error("%s: Format-specific setup of physical volume "
   1554 			  "failed.", pv_dev_name(pv));
   1555 		goto bad;
   1556 	}
   1557 
   1558 	return pv;
   1559 
   1560       bad:
   1561 	_free_pv(mem, pv);
   1562 	return NULL;
   1563 }
   1564 
   1565 /* FIXME: liblvm todo - make into function that returns handle */
   1566 struct pv_list *find_pv_in_vg(const struct volume_group *vg,
   1567 			      const char *pv_name)
   1568 {
   1569 	return _find_pv_in_vg(vg, pv_name);
   1570 }
   1571 
   1572 static struct pv_list *_find_pv_in_vg(const struct volume_group *vg,
   1573 				      const char *pv_name)
   1574 {
   1575 	struct pv_list *pvl;
   1576 
   1577 	dm_list_iterate_items(pvl, &vg->pvs)
   1578 		if (pvl->pv->dev == dev_cache_get(pv_name, vg->cmd->filter))
   1579 			return pvl;
   1580 
   1581 	return NULL;
   1582 }
   1583 
   1584 struct pv_list *find_pv_in_pv_list(const struct dm_list *pl,
   1585 				   const struct physical_volume *pv)
   1586 {
   1587 	struct pv_list *pvl;
   1588 
   1589 	dm_list_iterate_items(pvl, pl)
   1590 		if (pvl->pv == pv)
   1591 			return pvl;
   1592 
   1593 	return NULL;
   1594 }
   1595 
   1596 int pv_is_in_vg(struct volume_group *vg, struct physical_volume *pv)
   1597 {
   1598 	struct pv_list *pvl;
   1599 
   1600 	dm_list_iterate_items(pvl, &vg->pvs)
   1601 		if (pv == pvl->pv)
   1602 			 return 1;
   1603 
   1604 	return 0;
   1605 }
   1606 
   1607 /**
   1608  * find_pv_in_vg_by_uuid - Find PV in VG by PV UUID
   1609  * @vg: volume group to search
   1610  * @id: UUID of the PV to match
   1611  *
   1612  * Returns:
   1613  *   PV handle - if UUID of PV found in VG
   1614  *   NULL - invalid parameter or UUID of PV not found in VG
   1615  *
   1616  * Note
   1617  *   FIXME - liblvm todo - make into function that takes VG handle
   1618  */
   1619 struct physical_volume *find_pv_in_vg_by_uuid(const struct volume_group *vg,
   1620 			    const struct id *id)
   1621 {
   1622 	return _find_pv_in_vg_by_uuid(vg, id);
   1623 }
   1624 
   1625 
   1626 static struct physical_volume *_find_pv_in_vg_by_uuid(const struct volume_group *vg,
   1627 						      const struct id *id)
   1628 {
   1629 	struct pv_list *pvl;
   1630 
   1631 	dm_list_iterate_items(pvl, &vg->pvs)
   1632 		if (id_equal(&pvl->pv->id, id))
   1633 			return pvl->pv;
   1634 
   1635 	return NULL;
   1636 }
   1637 
   1638 struct lv_list *find_lv_in_vg(const struct volume_group *vg,
   1639 			      const char *lv_name)
   1640 {
   1641 	struct lv_list *lvl;
   1642 	const char *ptr;
   1643 
   1644 	/* Use last component */
   1645 	if ((ptr = strrchr(lv_name, '/')))
   1646 		ptr++;
   1647 	else
   1648 		ptr = lv_name;
   1649 
   1650 	dm_list_iterate_items(lvl, &vg->lvs)
   1651 		if (!strcmp(lvl->lv->name, ptr))
   1652 			return lvl;
   1653 
   1654 	return NULL;
   1655 }
   1656 
   1657 struct lv_list *find_lv_in_lv_list(const struct dm_list *ll,
   1658 				   const struct logical_volume *lv)
   1659 {
   1660 	struct lv_list *lvl;
   1661 
   1662 	dm_list_iterate_items(lvl, ll)
   1663 		if (lvl->lv == lv)
   1664 			return lvl;
   1665 
   1666 	return NULL;
   1667 }
   1668 
   1669 struct lv_list *find_lv_in_vg_by_lvid(struct volume_group *vg,
   1670 				      const union lvid *lvid)
   1671 {
   1672 	struct lv_list *lvl;
   1673 
   1674 	dm_list_iterate_items(lvl, &vg->lvs)
   1675 		if (!strncmp(lvl->lv->lvid.s, lvid->s, sizeof(*lvid)))
   1676 			return lvl;
   1677 
   1678 	return NULL;
   1679 }
   1680 
   1681 struct logical_volume *find_lv(const struct volume_group *vg,
   1682 			       const char *lv_name)
   1683 {
   1684 	struct lv_list *lvl = find_lv_in_vg(vg, lv_name);
   1685 	return lvl ? lvl->lv : NULL;
   1686 }
   1687 
   1688 struct physical_volume *find_pv(struct volume_group *vg, struct device *dev)
   1689 {
   1690 	struct pv_list *pvl;
   1691 
   1692 	dm_list_iterate_items(pvl, &vg->pvs)
   1693 		if (dev == pvl->pv->dev)
   1694 			return pvl->pv;
   1695 
   1696 	return NULL;
   1697 }
   1698 
   1699 /* FIXME: liblvm todo - make into function that returns handle */
   1700 struct physical_volume *find_pv_by_name(struct cmd_context *cmd,
   1701 					const char *pv_name)
   1702 {
   1703 	return _find_pv_by_name(cmd, pv_name);
   1704 }
   1705 
   1706 
   1707 static struct physical_volume *_find_pv_by_name(struct cmd_context *cmd,
   1708 			 			const char *pv_name)
   1709 {
   1710 	struct physical_volume *pv;
   1711 
   1712 	if (!(pv = _pv_read(cmd, cmd->mem, pv_name, NULL, NULL, 1, 0))) {
   1713 		log_error("Physical volume %s not found", pv_name);
   1714 		return NULL;
   1715 	}
   1716 
   1717 	if (is_orphan_vg(pv->vg_name)) {
   1718 		/* If a PV has no MDAs - need to search all VGs for it */
   1719 		if (!scan_vgs_for_pvs(cmd))
   1720 			return_NULL;
   1721 		if (!(pv = _pv_read(cmd, cmd->mem, pv_name, NULL, NULL, 1, 0))) {
   1722 			log_error("Physical volume %s not found", pv_name);
   1723 			return NULL;
   1724 		}
   1725 	}
   1726 
   1727 	if (is_orphan_vg(pv->vg_name)) {
   1728 		log_error("Physical volume %s not in a volume group", pv_name);
   1729 		return NULL;
   1730 	}
   1731 
   1732 	return pv;
   1733 }
   1734 
   1735 /* Find segment at a given logical extent in an LV */
   1736 struct lv_segment *find_seg_by_le(const struct logical_volume *lv, uint32_t le)
   1737 {
   1738 	struct lv_segment *seg;
   1739 
   1740 	dm_list_iterate_items(seg, &lv->segments)
   1741 		if (le >= seg->le && le < seg->le + seg->len)
   1742 			return seg;
   1743 
   1744 	return NULL;
   1745 }
   1746 
   1747 struct lv_segment *first_seg(const struct logical_volume *lv)
   1748 {
   1749 	struct lv_segment *seg;
   1750 
   1751 	dm_list_iterate_items(seg, &lv->segments)
   1752 		return seg;
   1753 
   1754 	return NULL;
   1755 }
   1756 
   1757 /* Find segment at a given physical extent in a PV */
   1758 struct pv_segment *find_peg_by_pe(const struct physical_volume *pv, uint32_t pe)
   1759 {
   1760 	struct pv_segment *peg;
   1761 
   1762 	dm_list_iterate_items(peg, &pv->segments)
   1763 		if (pe >= peg->pe && pe < peg->pe + peg->len)
   1764 			return peg;
   1765 
   1766 	return NULL;
   1767 }
   1768 
   1769 int vg_remove_mdas(struct volume_group *vg)
   1770 {
   1771 	struct metadata_area *mda;
   1772 
   1773 	/* FIXME Improve recovery situation? */
   1774 	/* Remove each copy of the metadata */
   1775 	dm_list_iterate_items(mda, &vg->fid->metadata_areas) {
   1776 		if (mda->ops->vg_remove &&
   1777 		    !mda->ops->vg_remove(vg->fid, vg, mda))
   1778 			return_0;
   1779 	}
   1780 
   1781 	return 1;
   1782 }
   1783 
   1784 unsigned snapshot_count(const struct volume_group *vg)
   1785 {
   1786 	struct lv_list *lvl;
   1787 	unsigned num_snapshots = 0;
   1788 
   1789 	dm_list_iterate_items(lvl, &vg->lvs)
   1790 		if (lv_is_cow(lvl->lv))
   1791 			num_snapshots++;
   1792 
   1793 	return num_snapshots;
   1794 }
   1795 
   1796 unsigned vg_visible_lvs(const struct volume_group *vg)
   1797 {
   1798 	struct lv_list *lvl;
   1799 	unsigned lv_count = 0;
   1800 
   1801 	dm_list_iterate_items(lvl, &vg->lvs) {
   1802 		if (lv_is_visible(lvl->lv))
   1803 			lv_count++;
   1804 	}
   1805 
   1806 	return lv_count;
   1807 }
   1808 
   1809 /*
   1810  * Determine whether two vgs are compatible for merging.
   1811  */
   1812 int vgs_are_compatible(struct cmd_context *cmd __attribute((unused)),
   1813 		       struct volume_group *vg_from,
   1814 		       struct volume_group *vg_to)
   1815 {
   1816 	struct lv_list *lvl1, *lvl2;
   1817 	struct pv_list *pvl;
   1818 	char *name1, *name2;
   1819 
   1820 	if (lvs_in_vg_activated(vg_from)) {
   1821 		log_error("Logical volumes in \"%s\" must be inactive",
   1822 			  vg_from->name);
   1823 		return 0;
   1824 	}
   1825 
   1826 	/* Check compatibility */
   1827 	if (vg_to->extent_size != vg_from->extent_size) {
   1828 		log_error("Extent sizes differ: %d (%s) and %d (%s)",
   1829 			  vg_to->extent_size, vg_to->name,
   1830 			  vg_from->extent_size, vg_from->name);
   1831 		return 0;
   1832 	}
   1833 
   1834 	if (vg_to->max_pv &&
   1835 	    (vg_to->max_pv < vg_to->pv_count + vg_from->pv_count)) {
   1836 		log_error("Maximum number of physical volumes (%d) exceeded "
   1837 			  " for \"%s\" and \"%s\"", vg_to->max_pv, vg_to->name,
   1838 			  vg_from->name);
   1839 		return 0;
   1840 	}
   1841 
   1842 	if (vg_to->max_lv &&
   1843 	    (vg_to->max_lv < vg_visible_lvs(vg_to) + vg_visible_lvs(vg_from))) {
   1844 		log_error("Maximum number of logical volumes (%d) exceeded "
   1845 			  " for \"%s\" and \"%s\"", vg_to->max_lv, vg_to->name,
   1846 			  vg_from->name);
   1847 		return 0;
   1848 	}
   1849 
   1850 	/* Metadata types must be the same */
   1851 	if (vg_to->fid->fmt != vg_from->fid->fmt) {
   1852 		log_error("Metadata types differ for \"%s\" and \"%s\"",
   1853 			  vg_to->name, vg_from->name);
   1854 		return 0;
   1855 	}
   1856 
   1857 	/* Clustering attribute must be the same */
   1858 	if (vg_is_clustered(vg_to) != vg_is_clustered(vg_from)) {
   1859 		log_error("Clustered attribute differs for \"%s\" and \"%s\"",
   1860 			  vg_to->name, vg_from->name);
   1861 		return 0;
   1862 	}
   1863 
   1864 	/* Check no conflicts with LV names */
   1865 	dm_list_iterate_items(lvl1, &vg_to->lvs) {
   1866 		name1 = lvl1->lv->name;
   1867 
   1868 		dm_list_iterate_items(lvl2, &vg_from->lvs) {
   1869 			name2 = lvl2->lv->name;
   1870 
   1871 			if (!strcmp(name1, name2)) {
   1872 				log_error("Duplicate logical volume "
   1873 					  "name \"%s\" "
   1874 					  "in \"%s\" and \"%s\"",
   1875 					  name1, vg_to->name, vg_from->name);
   1876 				return 0;
   1877 			}
   1878 		}
   1879 	}
   1880 
   1881 	/* Check no PVs are constructed from either VG */
   1882 	dm_list_iterate_items(pvl, &vg_to->pvs) {
   1883 		if (pv_uses_vg(pvl->pv, vg_from)) {
   1884 			log_error("Physical volume %s might be constructed "
   1885 				  "from same volume group %s.",
   1886 				  pv_dev_name(pvl->pv), vg_from->name);
   1887 			return 0;
   1888 		}
   1889 	}
   1890 
   1891 	dm_list_iterate_items(pvl, &vg_from->pvs) {
   1892 		if (pv_uses_vg(pvl->pv, vg_to)) {
   1893 			log_error("Physical volume %s might be constructed "
   1894 				  "from same volume group %s.",
   1895 				  pv_dev_name(pvl->pv), vg_to->name);
   1896 			return 0;
   1897 		}
   1898 	}
   1899 
   1900 	return 1;
   1901 }
   1902 
   1903 struct _lv_postorder_baton {
   1904 	int (*fn)(struct logical_volume *lv, void *data);
   1905 	void *data;
   1906 };
   1907 
   1908 static int _lv_postorder_visit(struct logical_volume *,
   1909 			       int (*fn)(struct logical_volume *lv, void *data),
   1910 			       void *data);
   1911 
   1912 static int _lv_postorder_level(struct logical_volume *lv, void *data)
   1913 {
   1914 	struct _lv_postorder_baton *baton = data;
   1915 	if (lv->status & POSTORDER_OPEN_FLAG)
   1916 		return 1; // a data structure loop has closed...
   1917 	lv->status |= POSTORDER_OPEN_FLAG;
   1918 	int r =_lv_postorder_visit(lv, baton->fn, baton->data);
   1919 	lv->status &= ~POSTORDER_OPEN_FLAG;
   1920 	lv->status |= POSTORDER_FLAG;
   1921 	return r;
   1922 };
   1923 
   1924 static int _lv_each_dependency(struct logical_volume *lv,
   1925 			       int (*fn)(struct logical_volume *lv, void *data),
   1926 			       void *data)
   1927 {
   1928 	int i, s;
   1929 	struct lv_segment *lvseg;
   1930 
   1931 	struct logical_volume *deps[] = {
   1932 		lv->snapshot ? lv->snapshot->origin : 0,
   1933 		lv->snapshot ? lv->snapshot->cow : 0 };
   1934 	for (i = 0; i < sizeof(deps) / sizeof(*deps); ++i) {
   1935 		if (deps[i] && !fn(deps[i], data))
   1936 			return_0;
   1937 	}
   1938 
   1939 	dm_list_iterate_items(lvseg, &lv->segments) {
   1940 		if (lvseg->log_lv && !fn(lvseg->log_lv, data))
   1941 			return_0;
   1942 		for (s = 0; s < lvseg->area_count; ++s) {
   1943 			if (seg_type(lvseg, s) == AREA_LV && !fn(seg_lv(lvseg,s), data))
   1944 				return_0;
   1945 		}
   1946 	}
   1947 	return 1;
   1948 }
   1949 
   1950 static int _lv_postorder_cleanup(struct logical_volume *lv, void *data)
   1951 {
   1952 	if (!(lv->status & POSTORDER_FLAG))
   1953 		return 1;
   1954 	lv->status &= ~POSTORDER_FLAG;
   1955 
   1956 	if (!_lv_each_dependency(lv, _lv_postorder_cleanup, data))
   1957 		return_0;
   1958 	return 1;
   1959 }
   1960 
   1961 static int _lv_postorder_visit(struct logical_volume *lv,
   1962 			       int (*fn)(struct logical_volume *lv, void *data),
   1963 			       void *data)
   1964 {
   1965 	struct _lv_postorder_baton baton;
   1966 	int r;
   1967 
   1968 	if (lv->status & POSTORDER_FLAG)
   1969 		return 1;
   1970 
   1971 	baton.fn = fn;
   1972 	baton.data = data;
   1973 	r = _lv_each_dependency(lv, _lv_postorder_level, &baton);
   1974 	if (r)
   1975 		r = fn(lv, data);
   1976 
   1977 	return r;
   1978 }
   1979 
   1980 /*
   1981  * This will walk the LV dependency graph in depth-first order and in the
   1982  * postorder, call a callback function "fn". The void *data is passed along all
   1983  * the calls. The callback may return zero to indicate an error and terminate
   1984  * the depth-first walk. The error is propagated to return value of
   1985  * _lv_postorder.
   1986  */
   1987 static int _lv_postorder(struct logical_volume *lv,
   1988 			       int (*fn)(struct logical_volume *lv, void *data),
   1989 			       void *data)
   1990 {
   1991 	int r;
   1992 	r = _lv_postorder_visit(lv, fn, data);
   1993 	_lv_postorder_cleanup(lv, 0);
   1994 	return r;
   1995 }
   1996 
   1997 struct _lv_mark_if_partial_baton {
   1998 	int partial;
   1999 };
   2000 
   2001 static int _lv_mark_if_partial_collect(struct logical_volume *lv, void *data)
   2002 {
   2003 	struct _lv_mark_if_partial_baton *baton = data;
   2004 	if (lv->status & PARTIAL_LV)
   2005 		baton->partial = 1;
   2006 
   2007 	return 1;
   2008 }
   2009 
   2010 static int _lv_mark_if_partial_single(struct logical_volume *lv, void *data)
   2011 {
   2012 	int s;
   2013 	struct _lv_mark_if_partial_baton baton;
   2014 	struct lv_segment *lvseg;
   2015 
   2016 	dm_list_iterate_items(lvseg, &lv->segments) {
   2017 		for (s = 0; s < lvseg->area_count; ++s) {
   2018 			if (seg_type(lvseg, s) == AREA_PV) {
   2019 				if (seg_pv(lvseg, s)->status & MISSING_PV)
   2020 					lv->status |= PARTIAL_LV;
   2021 			}
   2022 		}
   2023 	}
   2024 
   2025 	baton.partial = 0;
   2026 	_lv_each_dependency(lv, _lv_mark_if_partial_collect, &baton);
   2027 
   2028 	if (baton.partial)
   2029 		lv->status |= PARTIAL_LV;
   2030 
   2031 	return 1;
   2032 }
   2033 
   2034 static int _lv_mark_if_partial(struct logical_volume *lv)
   2035 {
   2036 	return _lv_postorder(lv, _lv_mark_if_partial_single, NULL);
   2037 }
   2038 
   2039 /*
   2040  * Mark LVs with missing PVs using PARTIAL_LV status flag. The flag is
   2041  * propagated transitively, so LVs referencing other LVs are marked
   2042  * partial as well, if any of their referenced LVs are marked partial.
   2043  */
   2044 static int _vg_mark_partial_lvs(struct volume_group *vg)
   2045 {
   2046 	struct logical_volume *lv;
   2047 	struct lv_list *lvl;
   2048 
   2049 	dm_list_iterate_items(lvl, &vg->lvs) {
   2050 		lv = lvl->lv;
   2051 		if (!_lv_mark_if_partial(lv))
   2052 			return_0;
   2053 	}
   2054 	return 1;
   2055 }
   2056 
   2057 /*
   2058  * Be sure that all PV devices have cached read ahead in dev-cache
   2059  * Currently it takes read_ahead from first PV segment only
   2060  */
   2061 static int _lv_read_ahead_single(struct logical_volume *lv, void *data)
   2062 {
   2063 	struct lv_segment *seg = first_seg(lv);
   2064 	uint32_t seg_read_ahead = 0, *read_ahead = data;
   2065 
   2066 	if (seg && seg->area_count && seg_type(seg, 0) == AREA_PV)
   2067 		dev_get_read_ahead(seg_pv(seg, 0)->dev, &seg_read_ahead);
   2068 
   2069 	if (seg_read_ahead > *read_ahead)
   2070 		*read_ahead = seg_read_ahead;
   2071 
   2072 	return 1;
   2073 }
   2074 
   2075 /*
   2076  * Calculate readahead for logical volume from underlying PV devices.
   2077  * If read_ahead is NULL, only ensure that readahead of PVs are preloaded
   2078  * into PV struct device in dev cache.
   2079  */
   2080 void lv_calculate_readahead(const struct logical_volume *lv, uint32_t *read_ahead)
   2081 {
   2082 	uint32_t _read_ahead = 0;
   2083 
   2084 	if (lv->read_ahead == DM_READ_AHEAD_AUTO)
   2085 		_lv_postorder((struct logical_volume *)lv, _lv_read_ahead_single, &_read_ahead);
   2086 
   2087 	if (read_ahead) {
   2088 		log_debug("Calculated readahead of LV %s is %u", lv->name, _read_ahead);
   2089 		*read_ahead = _read_ahead;
   2090 	}
   2091 }
   2092 
   2093 int vg_validate(struct volume_group *vg)
   2094 {
   2095 	struct pv_list *pvl, *pvl2;
   2096 	struct lv_list *lvl, *lvl2;
   2097 	char uuid[64] __attribute((aligned(8)));
   2098 	int r = 1;
   2099 	uint32_t hidden_lv_count = 0;
   2100 
   2101 	/* FIXME Also check there's no data/metadata overlap */
   2102 
   2103 	dm_list_iterate_items(pvl, &vg->pvs) {
   2104 		dm_list_iterate_items(pvl2, &vg->pvs) {
   2105 			if (pvl == pvl2)
   2106 				break;
   2107 			if (id_equal(&pvl->pv->id,
   2108 				     &pvl2->pv->id)) {
   2109 				if (!id_write_format(&pvl->pv->id, uuid,
   2110 						     sizeof(uuid)))
   2111 					 stack;
   2112 				log_error("Internal error: Duplicate PV id "
   2113 					  "%s detected for %s in %s.",
   2114 					  uuid, pv_dev_name(pvl->pv),
   2115 					  vg->name);
   2116 				r = 0;
   2117 			}
   2118 		}
   2119 
   2120 		if (strcmp(pvl->pv->vg_name, vg->name)) {
   2121 			log_error("Internal error: VG name for PV %s is corrupted",
   2122 				  pv_dev_name(pvl->pv));
   2123 			r = 0;
   2124 		}
   2125 	}
   2126 
   2127 	if (!check_pv_segments(vg)) {
   2128 		log_error("Internal error: PV segments corrupted in %s.",
   2129 			  vg->name);
   2130 		r = 0;
   2131 	}
   2132 
   2133 	/*
   2134 	 * Count all non-snapshot invisible LVs
   2135 	 */
   2136 	dm_list_iterate_items(lvl, &vg->lvs) {
   2137 		if (lvl->lv->status & VISIBLE_LV)
   2138 			continue;
   2139 
   2140 		/* snapshots */
   2141 		if (lv_is_cow(lvl->lv))
   2142 			continue;
   2143 
   2144 		/* virtual origins are always hidden */
   2145 		if (lv_is_origin(lvl->lv) && !lv_is_virtual_origin(lvl->lv))
   2146 			continue;
   2147 
   2148 		/* count other non-snapshot invisible volumes */
   2149 		hidden_lv_count++;
   2150 
   2151 		/*
   2152 		 *  FIXME: add check for unreferenced invisible LVs
   2153 		 *   - snapshot cow & origin
   2154 		 *   - mirror log & images
   2155 		 *   - mirror conversion volumes (_mimagetmp*)
   2156 		 */
   2157 	}
   2158 
   2159 	/*
   2160 	 * all volumes = visible LVs + snapshot_cows + invisible LVs
   2161 	 */
   2162 	if (((uint32_t) dm_list_size(&vg->lvs)) !=
   2163 	    vg_visible_lvs(vg) + snapshot_count(vg) + hidden_lv_count) {
   2164 		log_error("Internal error: #internal LVs (%u) != #LVs (%"
   2165 			  PRIu32 ") + #snapshots (%" PRIu32 ") + #internal LVs %u in VG %s",
   2166 			  dm_list_size(&vg->lvs), vg_visible_lvs(vg),
   2167 			  snapshot_count(vg), hidden_lv_count, vg->name);
   2168 		r = 0;
   2169 	}
   2170 
   2171 	dm_list_iterate_items(lvl, &vg->lvs) {
   2172 		dm_list_iterate_items(lvl2, &vg->lvs) {
   2173 			if (lvl == lvl2)
   2174 				break;
   2175 			if (!strcmp(lvl->lv->name, lvl2->lv->name)) {
   2176 				log_error("Internal error: Duplicate LV name "
   2177 					  "%s detected in %s.", lvl->lv->name,
   2178 					  vg->name);
   2179 				r = 0;
   2180 			}
   2181 			if (id_equal(&lvl->lv->lvid.id[1],
   2182 				     &lvl2->lv->lvid.id[1])) {
   2183 				if (!id_write_format(&lvl->lv->lvid.id[1], uuid,
   2184 						     sizeof(uuid)))
   2185 					 stack;
   2186 				log_error("Internal error: Duplicate LV id "
   2187 					  "%s detected for %s and %s in %s.",
   2188 					  uuid, lvl->lv->name, lvl2->lv->name,
   2189 					  vg->name);
   2190 				r = 0;
   2191 			}
   2192 		}
   2193 	}
   2194 
   2195 	dm_list_iterate_items(lvl, &vg->lvs) {
   2196 		if (!check_lv_segments(lvl->lv, 1)) {
   2197 			log_error("Internal error: LV segments corrupted in %s.",
   2198 				  lvl->lv->name);
   2199 			r = 0;
   2200 		}
   2201 	}
   2202 
   2203 	if (!(vg->fid->fmt->features & FMT_UNLIMITED_VOLS) &&
   2204 	    (!vg->max_lv || !vg->max_pv)) {
   2205 		log_error("Internal error: Volume group %s has limited PV/LV count"
   2206 			  " but limit is not set.", vg->name);
   2207 		r = 0;
   2208 	}
   2209 
   2210 	if (vg_max_lv_reached(vg))
   2211 		stack;
   2212 
   2213 	return r;
   2214 }
   2215 
   2216 /*
   2217  * After vg_write() returns success,
   2218  * caller MUST call either vg_commit() or vg_revert()
   2219  */
   2220 int vg_write(struct volume_group *vg)
   2221 {
   2222 	struct dm_list *mdah;
   2223 	struct metadata_area *mda;
   2224 
   2225 	if (!vg_validate(vg))
   2226 		return_0;
   2227 
   2228 	if (vg->status & PARTIAL_VG) {
   2229 		log_error("Cannot update partial volume group %s.", vg->name);
   2230 		return 0;
   2231 	}
   2232 
   2233 	if (vg_missing_pv_count(vg) && !vg->cmd->handles_missing_pvs) {
   2234 		log_error("Cannot update volume group %s while physical "
   2235 			  "volumes are missing.", vg->name);
   2236 		return 0;
   2237 	}
   2238 
   2239 	if (vg_has_unknown_segments(vg) && !vg->cmd->handles_unknown_segments) {
   2240 		log_error("Cannot update volume group %s with unknown segments in it!",
   2241 			  vg->name);
   2242 		return 0;
   2243 	}
   2244 
   2245 
   2246 	if (dm_list_empty(&vg->fid->metadata_areas)) {
   2247 		log_error("Aborting vg_write: No metadata areas to write to!");
   2248 		return 0;
   2249 	}
   2250 
   2251 	if (!drop_cached_metadata(vg)) {
   2252 		log_error("Unable to drop cached metadata for VG %s.", vg->name);
   2253 		return 0;
   2254 	}
   2255 
   2256 	vg->seqno++;
   2257 
   2258 	/* Write to each copy of the metadata area */
   2259 	dm_list_iterate_items(mda, &vg->fid->metadata_areas) {
   2260 		if (!mda->ops->vg_write) {
   2261 			log_error("Format does not support writing volume"
   2262 				  "group metadata areas");
   2263 			/* Revert */
   2264 			dm_list_uniterate(mdah, &vg->fid->metadata_areas, &mda->list) {
   2265 				mda = dm_list_item(mdah, struct metadata_area);
   2266 
   2267 				if (mda->ops->vg_revert &&
   2268 				    !mda->ops->vg_revert(vg->fid, vg, mda)) {
   2269 					stack;
   2270 				}
   2271 			}
   2272 			return 0;
   2273 		}
   2274 		if (!mda->ops->vg_write(vg->fid, vg, mda)) {
   2275 			stack;
   2276 			/* Revert */
   2277 			dm_list_uniterate(mdah, &vg->fid->metadata_areas, &mda->list) {
   2278 				mda = dm_list_item(mdah, struct metadata_area);
   2279 
   2280 				if (mda->ops->vg_revert &&
   2281 				    !mda->ops->vg_revert(vg->fid, vg, mda)) {
   2282 					stack;
   2283 				}
   2284 			}
   2285 			return 0;
   2286 		}
   2287 	}
   2288 
   2289 	/* Now pre-commit each copy of the new metadata */
   2290 	dm_list_iterate_items(mda, &vg->fid->metadata_areas) {
   2291 		if (mda->ops->vg_precommit &&
   2292 		    !mda->ops->vg_precommit(vg->fid, vg, mda)) {
   2293 			stack;
   2294 			/* Revert */
   2295 			dm_list_iterate_items(mda, &vg->fid->metadata_areas) {
   2296 				if (mda->ops->vg_revert &&
   2297 				    !mda->ops->vg_revert(vg->fid, vg, mda)) {
   2298 					stack;
   2299 				}
   2300 			}
   2301 			return 0;
   2302 		}
   2303 	}
   2304 
   2305 	return 1;
   2306 }
   2307 
   2308 /* Commit pending changes */
   2309 int vg_commit(struct volume_group *vg)
   2310 {
   2311 	struct metadata_area *mda;
   2312 	int cache_updated = 0;
   2313 	int failed = 0;
   2314 
   2315 	if (!vgname_is_locked(vg->name)) {
   2316 		log_error("Internal error: Attempt to write new VG metadata "
   2317 			  "without locking %s", vg->name);
   2318 		return cache_updated;
   2319 	}
   2320 
   2321 	/* Commit to each copy of the metadata area */
   2322 	dm_list_iterate_items(mda, &vg->fid->metadata_areas) {
   2323 		failed = 0;
   2324 		if (mda->ops->vg_commit &&
   2325 		    !mda->ops->vg_commit(vg->fid, vg, mda)) {
   2326 			stack;
   2327 			failed = 1;
   2328 		}
   2329 		/* Update cache first time we succeed */
   2330 		if (!failed && !cache_updated) {
   2331 			lvmcache_update_vg(vg, 0);
   2332 			cache_updated = 1;
   2333 		}
   2334 	}
   2335 
   2336 	/* If update failed, remove any cached precommitted metadata. */
   2337 	if (!cache_updated && !drop_cached_metadata(vg))
   2338 		log_error("Attempt to drop cached metadata failed "
   2339 			  "after commit for VG %s.", vg->name);
   2340 
   2341 	/* If at least one mda commit succeeded, it was committed */
   2342 	return cache_updated;
   2343 }
   2344 
   2345 /* Don't commit any pending changes */
   2346 int vg_revert(struct volume_group *vg)
   2347 {
   2348 	struct metadata_area *mda;
   2349 
   2350 	dm_list_iterate_items(mda, &vg->fid->metadata_areas) {
   2351 		if (mda->ops->vg_revert &&
   2352 		    !mda->ops->vg_revert(vg->fid, vg, mda)) {
   2353 			stack;
   2354 		}
   2355 	}
   2356 
   2357 	if (!drop_cached_metadata(vg))
   2358 		log_error("Attempt to drop cached metadata failed "
   2359 			  "after reverted update for VG %s.", vg->name);
   2360 
   2361 	return 1;
   2362 }
   2363 
   2364 /* Make orphan PVs look like a VG */
   2365 static struct volume_group *_vg_read_orphans(struct cmd_context *cmd,
   2366 					     const char *orphan_vgname)
   2367 {
   2368 	struct lvmcache_vginfo *vginfo;
   2369 	struct lvmcache_info *info;
   2370 	struct pv_list *pvl;
   2371 	struct volume_group *vg;
   2372 	struct physical_volume *pv;
   2373 	struct dm_pool *mem;
   2374 
   2375 	lvmcache_label_scan(cmd, 0);
   2376 
   2377 	if (!(vginfo = vginfo_from_vgname(orphan_vgname, NULL)))
   2378 		return_NULL;
   2379 
   2380 	if (!(mem = dm_pool_create("vg_read orphan", VG_MEMPOOL_CHUNK)))
   2381 		return_NULL;
   2382 
   2383 	if (!(vg = dm_pool_zalloc(mem, sizeof(*vg)))) {
   2384 		log_error("vg allocation failed");
   2385 		return NULL;
   2386 	}
   2387 	dm_list_init(&vg->pvs);
   2388 	dm_list_init(&vg->lvs);
   2389 	dm_list_init(&vg->tags);
   2390 	dm_list_init(&vg->removed_pvs);
   2391 	vg->vgmem = mem;
   2392 	vg->cmd = cmd;
   2393 	if (!(vg->name = dm_pool_strdup(mem, orphan_vgname))) {
   2394 		log_error("vg name allocation failed");
   2395 		goto bad;
   2396 	}
   2397 
   2398 	/* create format instance with appropriate metadata area */
   2399 	if (!(vg->fid = vginfo->fmt->ops->create_instance(vginfo->fmt,
   2400 							  orphan_vgname, NULL,
   2401 							  NULL))) {
   2402 		log_error("Failed to create format instance");
   2403 		goto bad;
   2404 	}
   2405 
   2406 	dm_list_iterate_items(info, &vginfo->infos) {
   2407 		if (!(pv = _pv_read(cmd, mem, dev_name(info->dev), NULL, NULL, 1, 0))) {
   2408 			continue;
   2409 		}
   2410 		if (!(pvl = dm_pool_zalloc(mem, sizeof(*pvl)))) {
   2411 			log_error("pv_list allocation failed");
   2412 			goto bad;
   2413 		}
   2414 		pvl->pv = pv;
   2415 		dm_list_add(&vg->pvs, &pvl->list);
   2416 		vg->pv_count++;
   2417 	}
   2418 
   2419 	return vg;
   2420 bad:
   2421 	dm_pool_destroy(mem);
   2422 	return NULL;
   2423 }
   2424 
   2425 static int _update_pv_list(struct dm_pool *pvmem, struct dm_list *all_pvs, struct volume_group *vg)
   2426 {
   2427 	struct pv_list *pvl, *pvl2;
   2428 
   2429 	dm_list_iterate_items(pvl, &vg->pvs) {
   2430 		dm_list_iterate_items(pvl2, all_pvs) {
   2431 			if (pvl->pv->dev == pvl2->pv->dev)
   2432 				goto next_pv;
   2433 		}
   2434 
   2435 		/*
   2436 		 * PV is not on list so add it.
   2437 		 */
   2438 		if (!(pvl2 = _copy_pvl(pvmem, pvl))) {
   2439 			log_error("pv_list allocation for '%s' failed",
   2440 				  pv_dev_name(pvl->pv));
   2441 			return 0;
   2442 		}
   2443 		dm_list_add(all_pvs, &pvl2->list);
   2444   next_pv:
   2445 		;
   2446 	}
   2447 
   2448 	return 1;
   2449 }
   2450 
   2451 int vg_missing_pv_count(const struct volume_group *vg)
   2452 {
   2453 	int ret = 0;
   2454 	struct pv_list *pvl;
   2455 	dm_list_iterate_items(pvl, &vg->pvs) {
   2456 		if (pvl->pv->status & MISSING_PV)
   2457 			++ ret;
   2458 	}
   2459 	return ret;
   2460 }
   2461 
   2462 /* Caller sets consistent to 1 if it's safe for vg_read_internal to correct
   2463  * inconsistent metadata on disk (i.e. the VG write lock is held).
   2464  * This guarantees only consistent metadata is returned.
   2465  * If consistent is 0, caller must check whether consistent == 1 on return
   2466  * and take appropriate action if it isn't (e.g. abort; get write lock
   2467  * and call vg_read_internal again).
   2468  *
   2469  * If precommitted is set, use precommitted metadata if present.
   2470  *
   2471  * Either of vgname or vgid may be NULL.
   2472  */
   2473 static struct volume_group *_vg_read(struct cmd_context *cmd,
   2474 				     const char *vgname,
   2475 				     const char *vgid,
   2476 				     int *consistent, unsigned precommitted)
   2477 {
   2478 	struct format_instance *fid;
   2479 	const struct format_type *fmt;
   2480 	struct volume_group *vg, *correct_vg = NULL;
   2481 	struct metadata_area *mda;
   2482 	struct lvmcache_info *info;
   2483 	int inconsistent = 0;
   2484 	int inconsistent_vgid = 0;
   2485 	int inconsistent_pvs = 0;
   2486 	unsigned use_precommitted = precommitted;
   2487 	unsigned saved_handles_missing_pvs = cmd->handles_missing_pvs;
   2488 	struct dm_list *pvids;
   2489 	struct pv_list *pvl, *pvl2;
   2490 	struct dm_list all_pvs;
   2491 	char uuid[64] __attribute((aligned(8)));
   2492 
   2493 	if (is_orphan_vg(vgname)) {
   2494 		if (use_precommitted) {
   2495 			log_error("Internal error: vg_read_internal requires vgname "
   2496 				  "with pre-commit.");
   2497 			return NULL;
   2498 		}
   2499 		*consistent = 1;
   2500 		return _vg_read_orphans(cmd, vgname);
   2501 	}
   2502 
   2503 	if ((correct_vg = lvmcache_get_vg(vgid, precommitted))) {
   2504 		if (vg_missing_pv_count(correct_vg)) {
   2505 			log_verbose("There are %d physical volumes missing.",
   2506 				    vg_missing_pv_count(correct_vg));
   2507 			_vg_mark_partial_lvs(correct_vg);
   2508 		}
   2509 		*consistent = 1;
   2510 		return correct_vg;
   2511 	}
   2512 
   2513 	/* Find the vgname in the cache */
   2514 	/* If it's not there we must do full scan to be completely sure */
   2515 	if (!(fmt = fmt_from_vgname(vgname, vgid))) {
   2516 		lvmcache_label_scan(cmd, 0);
   2517 		if (!(fmt = fmt_from_vgname(vgname, vgid))) {
   2518 			if (memlock())
   2519 				return_NULL;
   2520 			lvmcache_label_scan(cmd, 2);
   2521 			if (!(fmt = fmt_from_vgname(vgname, vgid)))
   2522 				return_NULL;
   2523 		}
   2524 	}
   2525 
   2526 	/* Now determine the correct vgname if none was supplied */
   2527 	if (!vgname && !(vgname = vgname_from_vgid(cmd->mem, vgid)))
   2528 		return_NULL;
   2529 
   2530 	if (use_precommitted && !(fmt->features & FMT_PRECOMMIT))
   2531 		use_precommitted = 0;
   2532 
   2533 	/* create format instance with appropriate metadata area */
   2534 	if (!(fid = fmt->ops->create_instance(fmt, vgname, vgid, NULL))) {
   2535 		log_error("Failed to create format instance");
   2536 		return NULL;
   2537 	}
   2538 
   2539 	/* Store pvids for later so we can check if any are missing */
   2540 	if (!(pvids = lvmcache_get_pvids(cmd, vgname, vgid)))
   2541 		return_NULL;
   2542 
   2543 	/* Ensure contents of all metadata areas match - else do recovery */
   2544 	dm_list_iterate_items(mda, &fid->metadata_areas) {
   2545 		if ((use_precommitted &&
   2546 		     !(vg = mda->ops->vg_read_precommit(fid, vgname, mda))) ||
   2547 		    (!use_precommitted &&
   2548 		     !(vg = mda->ops->vg_read(fid, vgname, mda)))) {
   2549 			inconsistent = 1;
   2550 			vg_release(vg);
   2551 			continue;
   2552 		}
   2553 		if (!correct_vg) {
   2554 			correct_vg = vg;
   2555 			continue;
   2556 		}
   2557 
   2558 		/* FIXME Also ensure contents same - checksum compare? */
   2559 		if (correct_vg->seqno != vg->seqno) {
   2560 			inconsistent = 1;
   2561 			if (vg->seqno > correct_vg->seqno) {
   2562 				vg_release(correct_vg);
   2563 				correct_vg = vg;
   2564 			}
   2565 		}
   2566 
   2567 		if (vg != correct_vg)
   2568 			vg_release(vg);
   2569 	}
   2570 
   2571 	/* Ensure every PV in the VG was in the cache */
   2572 	if (correct_vg) {
   2573 		/*
   2574 		 * If the VG has PVs without mdas, they may still be
   2575 		 * orphans in the cache: update the cache state here.
   2576 		 */
   2577 		if (!inconsistent &&
   2578 		    dm_list_size(&correct_vg->pvs) > dm_list_size(pvids)) {
   2579 			dm_list_iterate_items(pvl, &correct_vg->pvs) {
   2580 				if (!pvl->pv->dev) {
   2581 					inconsistent_pvs = 1;
   2582 					break;
   2583 				}
   2584 
   2585 				if (str_list_match_item(pvids, pvl->pv->dev->pvid))
   2586 					continue;
   2587 
   2588 				/*
   2589 				 * PV not marked as belonging to this VG in cache.
   2590 				 * Check it's an orphan without metadata area.
   2591 				 */
   2592 				if (!(info = info_from_pvid(pvl->pv->dev->pvid, 1)) ||
   2593 				   !info->vginfo || !is_orphan_vg(info->vginfo->vgname) ||
   2594 				   dm_list_size(&info->mdas)) {
   2595 					inconsistent_pvs = 1;
   2596 					break;
   2597 				}
   2598 			}
   2599 
   2600 			/* If the check passed, let's update VG and recalculate pvids */
   2601 			if (!inconsistent_pvs) {
   2602 				log_debug("Updating cache for PVs without mdas "
   2603 					  "in VG %s.", vgname);
   2604 				lvmcache_update_vg(correct_vg, use_precommitted);
   2605 
   2606 				if (!(pvids = lvmcache_get_pvids(cmd, vgname, vgid)))
   2607 					return_NULL;
   2608 			}
   2609 		}
   2610 
   2611 		if (dm_list_size(&correct_vg->pvs) != dm_list_size(pvids)
   2612 		    + vg_missing_pv_count(correct_vg)) {
   2613 			log_debug("Cached VG %s had incorrect PV list",
   2614 				  vgname);
   2615 
   2616 			if (memlock())
   2617 				inconsistent = 1;
   2618 			else {
   2619 				vg_release(correct_vg);
   2620 				correct_vg = NULL;
   2621 			}
   2622 		} else dm_list_iterate_items(pvl, &correct_vg->pvs) {
   2623 			if (pvl->pv->status & MISSING_PV)
   2624 				continue;
   2625 			if (!str_list_match_item(pvids, pvl->pv->dev->pvid)) {
   2626 				log_debug("Cached VG %s had incorrect PV list",
   2627 					  vgname);
   2628 				vg_release(correct_vg);
   2629 				correct_vg = NULL;
   2630 				break;
   2631 			}
   2632 		}
   2633 	}
   2634 
   2635 	dm_list_init(&all_pvs);
   2636 
   2637 	/* Failed to find VG where we expected it - full scan and retry */
   2638 	if (!correct_vg) {
   2639 		inconsistent = 0;
   2640 
   2641 		if (memlock())
   2642 			return_NULL;
   2643 		lvmcache_label_scan(cmd, 2);
   2644 		if (!(fmt = fmt_from_vgname(vgname, vgid)))
   2645 			return_NULL;
   2646 
   2647 		if (precommitted && !(fmt->features & FMT_PRECOMMIT))
   2648 			use_precommitted = 0;
   2649 
   2650 		/* create format instance with appropriate metadata area */
   2651 		if (!(fid = fmt->ops->create_instance(fmt, vgname, vgid, NULL))) {
   2652 			log_error("Failed to create format instance");
   2653 			return NULL;
   2654 		}
   2655 
   2656 		/* Ensure contents of all metadata areas match - else recover */
   2657 		dm_list_iterate_items(mda, &fid->metadata_areas) {
   2658 			if ((use_precommitted &&
   2659 			     !(vg = mda->ops->vg_read_precommit(fid, vgname,
   2660 								mda))) ||
   2661 			    (!use_precommitted &&
   2662 			     !(vg = mda->ops->vg_read(fid, vgname, mda)))) {
   2663 				inconsistent = 1;
   2664 				continue;
   2665 			}
   2666 			if (!correct_vg) {
   2667 				correct_vg = vg;
   2668 				if (!_update_pv_list(cmd->mem, &all_pvs, correct_vg)) {
   2669 					vg_release(vg);
   2670 					return_NULL;
   2671 				}
   2672 				continue;
   2673 			}
   2674 
   2675 			if (strncmp((char *)vg->id.uuid,
   2676 			    (char *)correct_vg->id.uuid, ID_LEN)) {
   2677 				inconsistent = 1;
   2678 				inconsistent_vgid = 1;
   2679 			}
   2680 
   2681 			/* FIXME Also ensure contents same - checksums same? */
   2682 			if (correct_vg->seqno != vg->seqno) {
   2683 				inconsistent = 1;
   2684 				if (!_update_pv_list(cmd->mem, &all_pvs, vg)) {
   2685 					vg_release(vg);
   2686 					vg_release(correct_vg);
   2687 					return_NULL;
   2688 				}
   2689 				if (vg->seqno > correct_vg->seqno) {
   2690 					vg_release(correct_vg);
   2691 					correct_vg = vg;
   2692 				}
   2693 			}
   2694 
   2695 			if (vg != correct_vg)
   2696 				vg_release(vg);
   2697 		}
   2698 
   2699 		/* Give up looking */
   2700 		if (!correct_vg)
   2701 			return_NULL;
   2702 	}
   2703 
   2704 	lvmcache_update_vg(correct_vg, use_precommitted);
   2705 
   2706 	if (inconsistent) {
   2707 		/* FIXME Test should be if we're *using* precommitted metadata not if we were searching for it */
   2708 		if (use_precommitted) {
   2709 			log_error("Inconsistent pre-commit metadata copies "
   2710 				  "for volume group %s", vgname);
   2711 			vg_release(correct_vg);
   2712 			return NULL;
   2713 		}
   2714 
   2715 		if (!*consistent)
   2716 			return correct_vg;
   2717 
   2718 		/* Don't touch if vgids didn't match */
   2719 		if (inconsistent_vgid) {
   2720 			log_error("Inconsistent metadata UUIDs found for "
   2721 				  "volume group %s", vgname);
   2722 			*consistent = 0;
   2723 			return correct_vg;
   2724 		}
   2725 
   2726 		log_warn("WARNING: Inconsistent metadata found for VG %s - updating "
   2727 			 "to use version %u", vgname, correct_vg->seqno);
   2728 
   2729 		cmd->handles_missing_pvs = 1;
   2730 		if (!vg_write(correct_vg)) {
   2731 			log_error("Automatic metadata correction failed");
   2732 			vg_release(correct_vg);
   2733 			cmd->handles_missing_pvs = saved_handles_missing_pvs;
   2734 			return NULL;
   2735 		}
   2736 		cmd->handles_missing_pvs = saved_handles_missing_pvs;
   2737 
   2738 		if (!vg_commit(correct_vg)) {
   2739 			log_error("Automatic metadata correction commit "
   2740 				  "failed");
   2741 			vg_release(correct_vg);
   2742 			return NULL;
   2743 		}
   2744 
   2745 		dm_list_iterate_items(pvl, &all_pvs) {
   2746 			dm_list_iterate_items(pvl2, &correct_vg->pvs) {
   2747 				if (pvl->pv->dev == pvl2->pv->dev)
   2748 					goto next_pv;
   2749 			}
   2750 			if (!id_write_format(&pvl->pv->id, uuid, sizeof(uuid))) {
   2751 				vg_release(correct_vg);
   2752 				return_NULL;
   2753 			}
   2754 			log_error("Removing PV %s (%s) that no longer belongs to VG %s",
   2755 				  pv_dev_name(pvl->pv), uuid, correct_vg->name);
   2756 			if (!pv_write_orphan(cmd, pvl->pv)) {
   2757 				vg_release(correct_vg);
   2758 				return_NULL;
   2759 			}
   2760       next_pv:
   2761 			;
   2762 		}
   2763 	}
   2764 
   2765 	if (vg_missing_pv_count(correct_vg)) {
   2766 		log_verbose("There are %d physical volumes missing.",
   2767 			    vg_missing_pv_count(correct_vg));
   2768 		_vg_mark_partial_lvs(correct_vg);
   2769 	}
   2770 
   2771 	if ((correct_vg->status & PVMOVE) && !pvmove_mode()) {
   2772 		log_error("WARNING: Interrupted pvmove detected in "
   2773 			  "volume group %s", correct_vg->name);
   2774 		log_error("Please restore the metadata by running "
   2775 			  "vgcfgrestore.");
   2776 		vg_release(correct_vg);
   2777 		return NULL;
   2778 	}
   2779 
   2780 	*consistent = 1;
   2781 	return correct_vg;
   2782 }
   2783 
   2784 struct volume_group *vg_read_internal(struct cmd_context *cmd, const char *vgname,
   2785 			     const char *vgid, int *consistent)
   2786 {
   2787 	struct volume_group *vg;
   2788 	struct lv_list *lvl;
   2789 
   2790 	if (!(vg = _vg_read(cmd, vgname, vgid, consistent, 0)))
   2791 		return NULL;
   2792 
   2793 	if (!check_pv_segments(vg)) {
   2794 		log_error("Internal error: PV segments corrupted in %s.",
   2795 			  vg->name);
   2796 		vg_release(vg);
   2797 		return NULL;
   2798 	}
   2799 
   2800 	dm_list_iterate_items(lvl, &vg->lvs) {
   2801 		if (!check_lv_segments(lvl->lv, 1)) {
   2802 			log_error("Internal error: LV segments corrupted in %s.",
   2803 				  lvl->lv->name);
   2804 			vg_release(vg);
   2805 			return NULL;
   2806 		}
   2807 	}
   2808 
   2809 	return vg;
   2810 }
   2811 
   2812 void vg_release(struct volume_group *vg)
   2813 {
   2814 	if (!vg || !vg->vgmem)
   2815 		return;
   2816 
   2817 	if (vg->cmd && vg->vgmem == vg->cmd->mem)
   2818 		log_error("Internal error: global memory pool used for VG %s",
   2819 			  vg->name);
   2820 
   2821 	dm_pool_destroy(vg->vgmem);
   2822 }
   2823 
   2824 /* This is only called by lv_from_lvid, which is only called from
   2825  * activate.c so we know the appropriate VG lock is already held and
   2826  * the vg_read_internal is therefore safe.
   2827  */
   2828 static struct volume_group *_vg_read_by_vgid(struct cmd_context *cmd,
   2829 					    const char *vgid,
   2830 					    unsigned precommitted)
   2831 {
   2832 	const char *vgname;
   2833 	struct dm_list *vgnames;
   2834 	struct volume_group *vg = NULL;
   2835 	struct lvmcache_vginfo *vginfo;
   2836 	struct str_list *strl;
   2837 	int consistent = 0;
   2838 
   2839 	/* Is corresponding vgname already cached? */
   2840 	if ((vginfo = vginfo_from_vgid(vgid)) &&
   2841 	    vginfo->vgname && !is_orphan_vg(vginfo->vgname)) {
   2842 		if ((vg = _vg_read(cmd, NULL, vgid,
   2843 				   &consistent, precommitted)) &&
   2844 		    !strncmp((char *)vg->id.uuid, vgid, ID_LEN)) {
   2845 
   2846 			if (!consistent) {
   2847 				log_error("Volume group %s metadata is "
   2848 					  "inconsistent", vg->name);
   2849 			}
   2850 			return vg;
   2851 		}
   2852 		vg_release(vg);
   2853 	}
   2854 
   2855 	/* Mustn't scan if memory locked: ensure cache gets pre-populated! */
   2856 	if (memlock())
   2857 		goto out;
   2858 
   2859 	/* FIXME Need a genuine read by ID here - don't vg_read_internal by name! */
   2860 	/* FIXME Disabled vgrenames while active for now because we aren't
   2861 	 *       allowed to do a full scan here any more. */
   2862 
   2863 	// The slow way - full scan required to cope with vgrename
   2864 	if (!(vgnames = get_vgnames(cmd, 2))) {
   2865 		log_error("vg_read_by_vgid: get_vgnames failed");
   2866 		goto out;
   2867 	}
   2868 
   2869 	dm_list_iterate_items(strl, vgnames) {
   2870 		vgname = strl->str;
   2871 		if (!vgname || is_orphan_vg(vgname))
   2872 			continue;	// FIXME Unnecessary?
   2873 		consistent = 0;
   2874 		if ((vg = _vg_read(cmd, vgname, vgid, &consistent,
   2875 				   precommitted)) &&
   2876 		    !strncmp((char *)vg->id.uuid, vgid, ID_LEN)) {
   2877 
   2878 			if (!consistent) {
   2879 				log_error("Volume group %s metadata is "
   2880 					  "inconsistent", vgname);
   2881 				goto out;
   2882 			}
   2883 			return vg;
   2884 		}
   2885 	}
   2886 
   2887 out:
   2888 	vg_release(vg);
   2889 	return NULL;
   2890 }
   2891 
   2892 /* Only called by activate.c */
   2893 struct logical_volume *lv_from_lvid(struct cmd_context *cmd, const char *lvid_s,
   2894 				    unsigned precommitted)
   2895 {
   2896 	struct lv_list *lvl;
   2897 	struct volume_group *vg;
   2898 	const union lvid *lvid;
   2899 
   2900 	lvid = (const union lvid *) lvid_s;
   2901 
   2902 	log_very_verbose("Finding volume group for uuid %s", lvid_s);
   2903 	if (!(vg = _vg_read_by_vgid(cmd, (char *)lvid->id[0].uuid, precommitted))) {
   2904 		log_error("Volume group for uuid not found: %s", lvid_s);
   2905 		return NULL;
   2906 	}
   2907 
   2908 	log_verbose("Found volume group \"%s\"", vg->name);
   2909 	if (vg->status & EXPORTED_VG) {
   2910 		log_error("Volume group \"%s\" is exported", vg->name);
   2911 		goto out;
   2912 	}
   2913 	if (!(lvl = find_lv_in_vg_by_lvid(vg, lvid))) {
   2914 		log_very_verbose("Can't find logical volume id %s", lvid_s);
   2915 		goto out;
   2916 	}
   2917 
   2918 	return lvl->lv;
   2919 out:
   2920 	vg_release(vg);
   2921 	return NULL;
   2922 }
   2923 
   2924 /**
   2925  * pv_read - read and return a handle to a physical volume
   2926  * @cmd: LVM command initiating the pv_read
   2927  * @pv_name: full device name of the PV, including the path
   2928  * @mdas: list of metadata areas of the PV
   2929  * @label_sector: sector number where the PV label is stored on @pv_name
   2930  * @warnings:
   2931  *
   2932  * Returns:
   2933  *   PV handle - valid pv_name and successful read of the PV, or
   2934  *   NULL - invalid parameter or error in reading the PV
   2935  *
   2936  * Note:
   2937  *   FIXME - liblvm todo - make into function that returns handle
   2938  */
   2939 struct physical_volume *pv_read(struct cmd_context *cmd, const char *pv_name,
   2940 				struct dm_list *mdas, uint64_t *label_sector,
   2941 				int warnings, int scan_label_only)
   2942 {
   2943 	return _pv_read(cmd, cmd->mem, pv_name, mdas, label_sector, warnings, scan_label_only);
   2944 }
   2945 
   2946 /* FIXME Use label functions instead of PV functions */
   2947 static struct physical_volume *_pv_read(struct cmd_context *cmd,
   2948 					struct dm_pool *pvmem,
   2949 					const char *pv_name,
   2950 					struct dm_list *mdas,
   2951 					uint64_t *label_sector,
   2952 					int warnings, int scan_label_only)
   2953 {
   2954 	struct physical_volume *pv;
   2955 	struct label *label;
   2956 	struct lvmcache_info *info;
   2957 	struct device *dev;
   2958 
   2959 	if (!(dev = dev_cache_get(pv_name, cmd->filter)))
   2960 		return_NULL;
   2961 
   2962 	if (!(label_read(dev, &label, UINT64_C(0)))) {
   2963 		if (warnings)
   2964 			log_error("No physical volume label read from %s",
   2965 				  pv_name);
   2966 		return NULL;
   2967 	}
   2968 
   2969 	info = (struct lvmcache_info *) label->info;
   2970 	if (label_sector && *label_sector)
   2971 		*label_sector = label->sector;
   2972 
   2973 	if (!(pv = dm_pool_zalloc(pvmem, sizeof(*pv)))) {
   2974 		log_error("pv allocation for '%s' failed", pv_name);
   2975 		return NULL;
   2976 	}
   2977 
   2978 	dm_list_init(&pv->tags);
   2979 	dm_list_init(&pv->segments);
   2980 
   2981 	/* FIXME Move more common code up here */
   2982 	if (!(info->fmt->ops->pv_read(info->fmt, pv_name, pv, mdas,
   2983 	      scan_label_only))) {
   2984 		log_error("Failed to read existing physical volume '%s'",
   2985 			  pv_name);
   2986 		return NULL;
   2987 	}
   2988 
   2989 	if (!pv->size)
   2990 		return NULL;
   2991 
   2992 	if (!alloc_pv_segment_whole_pv(pvmem, pv))
   2993 		return_NULL;
   2994 
   2995 	return pv;
   2996 }
   2997 
   2998 /* May return empty list */
   2999 struct dm_list *get_vgnames(struct cmd_context *cmd, int full_scan)
   3000 {
   3001 	return lvmcache_get_vgnames(cmd, full_scan);
   3002 }
   3003 
   3004 struct dm_list *get_vgids(struct cmd_context *cmd, int full_scan)
   3005 {
   3006 	return lvmcache_get_vgids(cmd, full_scan);
   3007 }
   3008 
   3009 static int _get_pvs(struct cmd_context *cmd, struct dm_list **pvslist)
   3010 {
   3011 	struct str_list *strl;
   3012 	struct dm_list * uninitialized_var(results);
   3013 	const char *vgname, *vgid;
   3014 	struct pv_list *pvl, *pvl_copy;
   3015 	struct dm_list *vgids;
   3016 	struct volume_group *vg;
   3017 	int consistent = 0;
   3018 	int old_pvmove;
   3019 
   3020 	lvmcache_label_scan(cmd, 0);
   3021 
   3022 	if (pvslist) {
   3023 		if (!(results = dm_pool_alloc(cmd->mem, sizeof(*results)))) {
   3024 			log_error("PV list allocation failed");
   3025 			return 0;
   3026 		}
   3027 
   3028 		dm_list_init(results);
   3029 	}
   3030 
   3031 	/* Get list of VGs */
   3032 	if (!(vgids = get_vgids(cmd, 0))) {
   3033 		log_error("get_pvs: get_vgids failed");
   3034 		return 0;
   3035 	}
   3036 
   3037 	/* Read every VG to ensure cache consistency */
   3038 	/* Orphan VG is last on list */
   3039 	old_pvmove = pvmove_mode();
   3040 	init_pvmove(1);
   3041 	dm_list_iterate_items(strl, vgids) {
   3042 		vgid = strl->str;
   3043 		if (!vgid)
   3044 			continue;	/* FIXME Unnecessary? */
   3045 		consistent = 0;
   3046 		if (!(vgname = vgname_from_vgid(NULL, vgid))) {
   3047 			stack;
   3048 			continue;
   3049 		}
   3050 		if (!(vg = vg_read_internal(cmd, vgname, vgid, &consistent))) {
   3051 			stack;
   3052 			continue;
   3053 		}
   3054 		if (!consistent)
   3055 			log_warn("WARNING: Volume Group %s is not consistent",
   3056 				 vgname);
   3057 
   3058 		/* Move PVs onto results list */
   3059 		if (pvslist)
   3060 			dm_list_iterate_items(pvl, &vg->pvs) {
   3061 				if (!(pvl_copy = _copy_pvl(cmd->mem, pvl))) {
   3062 					log_error("PV list allocation failed");
   3063 					vg_release(vg);
   3064 					return 0;
   3065 				}
   3066 				dm_list_add(results, &pvl_copy->list);
   3067 			}
   3068 		vg_release(vg);
   3069 	}
   3070 	init_pvmove(old_pvmove);
   3071 
   3072 	if (pvslist)
   3073 		*pvslist = results;
   3074 	else
   3075 		dm_pool_free(cmd->mem, vgids);
   3076 
   3077 	return 1;
   3078 }
   3079 
   3080 struct dm_list *get_pvs(struct cmd_context *cmd)
   3081 {
   3082 	struct dm_list *results;
   3083 
   3084 	if (!_get_pvs(cmd, &results))
   3085 		return NULL;
   3086 
   3087 	return results;
   3088 }
   3089 
   3090 int scan_vgs_for_pvs(struct cmd_context *cmd)
   3091 {
   3092 	return _get_pvs(cmd, NULL);
   3093 }
   3094 
   3095 int pv_write(struct cmd_context *cmd __attribute((unused)),
   3096 	     struct physical_volume *pv,
   3097 	     struct dm_list *mdas, int64_t label_sector)
   3098 {
   3099 	if (!pv->fmt->ops->pv_write) {
   3100 		log_error("Format does not support writing physical volumes");
   3101 		return 0;
   3102 	}
   3103 
   3104 	if (!is_orphan_vg(pv->vg_name) || pv->pe_alloc_count) {
   3105 		log_error("Assertion failed: can't _pv_write non-orphan PV "
   3106 			  "(in VG %s)", pv->vg_name);
   3107 		return 0;
   3108 	}
   3109 
   3110 	if (!pv->fmt->ops->pv_write(pv->fmt, pv, mdas, label_sector))
   3111 		return_0;
   3112 
   3113 	return 1;
   3114 }
   3115 
   3116 int pv_write_orphan(struct cmd_context *cmd, struct physical_volume *pv)
   3117 {
   3118 	const char *old_vg_name = pv->vg_name;
   3119 
   3120 	pv->vg_name = cmd->fmt->orphan_vg_name;
   3121 	pv->status = ALLOCATABLE_PV;
   3122 	pv->pe_alloc_count = 0;
   3123 
   3124 	if (!dev_get_size(pv->dev, &pv->size)) {
   3125 		log_error("%s: Couldn't get size.", pv_dev_name(pv));
   3126 		return 0;
   3127 	}
   3128 
   3129 	if (!pv_write(cmd, pv, NULL, INT64_C(-1))) {
   3130 		log_error("Failed to clear metadata from physical "
   3131 			  "volume \"%s\" after removal from \"%s\"",
   3132 			  pv_dev_name(pv), old_vg_name);
   3133 		return 0;
   3134 	}
   3135 
   3136 	return 1;
   3137 }
   3138 
   3139 /**
   3140  * is_orphan_vg - Determine whether a vg_name is an orphan
   3141  * @vg_name: pointer to the vg_name
   3142  */
   3143 int is_orphan_vg(const char *vg_name)
   3144 {
   3145 	return (vg_name && vg_name[0] == ORPHAN_PREFIX[0]) ? 1 : 0;
   3146 }
   3147 
   3148 /**
   3149  * is_orphan - Determine whether a pv is an orphan based on its vg_name
   3150  * @pv: handle to the physical volume
   3151  */
   3152 int is_orphan(const struct physical_volume *pv)
   3153 {
   3154 	return is_orphan_vg(pv_field(pv, vg_name));
   3155 }
   3156 
   3157 /**
   3158  * is_pv - Determine whether a pv is a real pv or dummy one
   3159  * @pv: handle to device
   3160  */
   3161 int is_pv(struct physical_volume *pv)
   3162 {
   3163 	return (pv_field(pv, vg_name) ? 1 : 0);
   3164 }
   3165 
   3166 /*
   3167  * Returns:
   3168  *  0 - fail
   3169  *  1 - success
   3170  */
   3171 int pv_analyze(struct cmd_context *cmd, const char *pv_name,
   3172 	       uint64_t label_sector)
   3173 {
   3174 	struct label *label;
   3175 	struct device *dev;
   3176 	struct metadata_area *mda;
   3177 	struct lvmcache_info *info;
   3178 
   3179 	dev = dev_cache_get(pv_name, cmd->filter);
   3180 	if (!dev) {
   3181 		log_error("Device %s not found (or ignored by filtering).",
   3182 			  pv_name);
   3183 		return 0;
   3184 	}
   3185 
   3186 	/*
   3187 	 * First, scan for LVM labels.
   3188 	 */
   3189 	if (!label_read(dev, &label, label_sector)) {
   3190 		log_error("Could not find LVM label on %s",
   3191 			  pv_name);
   3192 		return 0;
   3193 	}
   3194 
   3195 	log_print("Found label on %s, sector %"PRIu64", type=%s",
   3196 		  pv_name, label->sector, label->type);
   3197 
   3198 	/*
   3199 	 * Next, loop through metadata areas
   3200 	 */
   3201 	info = label->info;
   3202 	dm_list_iterate_items(mda, &info->mdas)
   3203 		mda->ops->pv_analyze_mda(info->fmt, mda);
   3204 
   3205 	return 1;
   3206 }
   3207 
   3208 /* FIXME: remove / combine this with locking? */
   3209 int vg_check_write_mode(struct volume_group *vg)
   3210 {
   3211 	if (vg->open_mode != 'w') {
   3212 		log_errno(EPERM, "Attempt to modify a read-only VG");
   3213 		return 0;
   3214 	}
   3215 	return 1;
   3216 }
   3217 
   3218 /*
   3219  * Performs a set of checks against a VG according to bits set in status
   3220  * and returns FAILED_* bits for those that aren't acceptable.
   3221  *
   3222  * FIXME Remove the unnecessary duplicate definitions and return bits directly.
   3223  */
   3224 static uint32_t _vg_bad_status_bits(const struct volume_group *vg,
   3225 				    uint32_t status)
   3226 {
   3227 	uint32_t failure = 0;
   3228 
   3229 	if ((status & CLUSTERED) &&
   3230 	    (vg_is_clustered(vg)) && !locking_is_clustered()) {
   3231 		log_error("Skipping clustered volume group %s", vg->name);
   3232 		/* Return because other flags are considered undefined. */
   3233 		return FAILED_CLUSTERED;
   3234 	}
   3235 
   3236 	if ((status & EXPORTED_VG) &&
   3237 	    vg_is_exported(vg)) {
   3238 		log_error("Volume group %s is exported", vg->name);
   3239 		failure |= FAILED_EXPORTED;
   3240 	}
   3241 
   3242 	if ((status & LVM_WRITE) &&
   3243 	    !(vg->status & LVM_WRITE)) {
   3244 		log_error("Volume group %s is read-only", vg->name);
   3245 		failure |= FAILED_READ_ONLY;
   3246 	}
   3247 
   3248 	if ((status & RESIZEABLE_VG) &&
   3249 	    !vg_is_resizeable(vg)) {
   3250 		log_error("Volume group %s is not resizeable.", vg->name);
   3251 		failure |= FAILED_RESIZEABLE;
   3252 	}
   3253 
   3254 	return failure;
   3255 }
   3256 
   3257 /**
   3258  * vg_check_status - check volume group status flags and log error
   3259  * @vg - volume group to check status flags
   3260  * @status - specific status flags to check (e.g. EXPORTED_VG)
   3261  */
   3262 int vg_check_status(const struct volume_group *vg, uint32_t status)
   3263 {
   3264 	return !_vg_bad_status_bits(vg, status);
   3265 }
   3266 
   3267 static struct volume_group *_recover_vg(struct cmd_context *cmd, const char *lock_name,
   3268 			 const char *vg_name, const char *vgid,
   3269 			 uint32_t lock_flags)
   3270 {
   3271 	int consistent = 1;
   3272 	struct volume_group *vg;
   3273 
   3274 	lock_flags &= ~LCK_TYPE_MASK;
   3275 	lock_flags |= LCK_WRITE;
   3276 
   3277 	unlock_vg(cmd, lock_name);
   3278 
   3279 	dev_close_all();
   3280 
   3281 	if (!lock_vol(cmd, lock_name, lock_flags))
   3282 		return_NULL;
   3283 
   3284 	if (!(vg = vg_read_internal(cmd, vg_name, vgid, &consistent)))
   3285 		return_NULL;
   3286 
   3287 	if (!consistent) {
   3288 		vg_release(vg);
   3289 		return_NULL;
   3290 	}
   3291 
   3292 	return (struct volume_group *)vg;
   3293 }
   3294 
   3295 /*
   3296  * Consolidated locking, reading, and status flag checking.
   3297  *
   3298  * If the metadata is inconsistent, setting READ_ALLOW_INCONSISTENT in
   3299  * misc_flags will return it with FAILED_INCONSISTENT set instead of
   3300  * giving you nothing.
   3301  *
   3302  * Use vg_read_error(vg) to determine the result.  Nonzero means there were
   3303  * problems reading the volume group.
   3304  * Zero value means that the VG is open and appropriate locks are held.
   3305  */
   3306 static struct volume_group *_vg_lock_and_read(struct cmd_context *cmd, const char *vg_name,
   3307 			       const char *vgid, uint32_t lock_flags,
   3308 			       uint32_t status_flags, uint32_t misc_flags)
   3309 {
   3310 	struct volume_group *vg = NULL;
   3311 	const char *lock_name;
   3312  	int consistent = 1;
   3313 	int consistent_in;
   3314 	uint32_t failure = 0;
   3315 	int already_locked;
   3316 
   3317 	if (misc_flags & READ_ALLOW_INCONSISTENT || !(lock_flags & LCK_WRITE))
   3318 		consistent = 0;
   3319 
   3320 	if (!validate_name(vg_name) && !is_orphan_vg(vg_name)) {
   3321 		log_error("Volume group name %s has invalid characters",
   3322 			  vg_name);
   3323 		return NULL;
   3324 	}
   3325 
   3326 	lock_name = is_orphan_vg(vg_name) ? VG_ORPHANS : vg_name;
   3327 	already_locked = vgname_is_locked(lock_name);
   3328 
   3329 	if (!already_locked && !(misc_flags & READ_WITHOUT_LOCK) &&
   3330 	    !lock_vol(cmd, lock_name, lock_flags)) {
   3331 		log_error("Can't get lock for %s", vg_name);
   3332 		return _vg_make_handle(cmd, vg, FAILED_LOCKING);
   3333 	}
   3334 
   3335 	if (is_orphan_vg(vg_name))
   3336 		status_flags &= ~LVM_WRITE;
   3337 
   3338 	consistent_in = consistent;
   3339 
   3340 	/* If consistent == 1, we get NULL here if correction fails. */
   3341 	if (!(vg = vg_read_internal(cmd, vg_name, vgid, &consistent))) {
   3342 		if (consistent_in && !consistent) {
   3343 			log_error("Volume group \"%s\" inconsistent.", vg_name);
   3344 			failure |= FAILED_INCONSISTENT;
   3345 			goto_bad;
   3346 		}
   3347 
   3348 		log_error("Volume group \"%s\" not found", vg_name);
   3349 
   3350 		failure |= FAILED_NOTFOUND;
   3351 		goto_bad;
   3352 	}
   3353 
   3354 	if (vg_is_clustered(vg) && !locking_is_clustered()) {
   3355 		log_error("Skipping clustered volume group %s", vg->name);
   3356 		failure |= FAILED_CLUSTERED;
   3357 		goto_bad;
   3358 	}
   3359 
   3360 	/* consistent == 0 when VG is not found, but failed == FAILED_NOTFOUND */
   3361 	if (!consistent && !failure) {
   3362 		vg_release(vg);
   3363 		if (!(vg = _recover_vg(cmd, lock_name, vg_name, vgid, lock_flags))) {
   3364 			log_error("Recovery of volume group \"%s\" failed.",
   3365 				  vg_name);
   3366 			failure |= FAILED_INCONSISTENT;
   3367 			goto_bad;
   3368 		}
   3369 	}
   3370 
   3371 	/*
   3372 	 * Check that the tool can handle tricky cases -- missing PVs and
   3373 	 * unknown segment types.
   3374 	 */
   3375 
   3376 	if (!cmd->handles_missing_pvs && vg_missing_pv_count(vg) &&
   3377 	    (lock_flags & LCK_WRITE)) {
   3378 		log_error("Cannot change VG %s while PVs are missing.", vg->name);
   3379 		log_error("Consider vgreduce --removemissing.");
   3380 		failure |= FAILED_INCONSISTENT; /* FIXME new failure code here? */
   3381 		goto_bad;
   3382 	}
   3383 
   3384 	if (!cmd->handles_unknown_segments && vg_has_unknown_segments(vg) &&
   3385 	    (lock_flags & LCK_WRITE)) {
   3386 		log_error("Cannot change VG %s with unknown segments in it!",
   3387 			  vg->name);
   3388 		failure |= FAILED_INCONSISTENT; /* FIXME new failure code here? */
   3389 		goto_bad;
   3390 	}
   3391 
   3392 	failure |= _vg_bad_status_bits(vg, status_flags);
   3393 	if (failure)
   3394 		goto_bad;
   3395 
   3396 	return _vg_make_handle(cmd, vg, failure);
   3397 
   3398 bad:
   3399 	if (!already_locked && !(misc_flags & READ_WITHOUT_LOCK))
   3400 		unlock_vg(cmd, lock_name);
   3401 
   3402 	return _vg_make_handle(cmd, vg, failure);
   3403 }
   3404 
   3405 /*
   3406  * vg_read: High-level volume group metadata read function.
   3407  *
   3408  * vg_read_error() must be used on any handle returned to check for errors.
   3409  *
   3410  *  - metadata inconsistent and automatic correction failed: FAILED_INCONSISTENT
   3411  *  - VG is read-only: FAILED_READ_ONLY
   3412  *  - VG is EXPORTED, unless flags has READ_ALLOW_EXPORTED: FAILED_EXPORTED
   3413  *  - VG is not RESIZEABLE: FAILED_RESIZEABLE
   3414  *  - locking failed: FAILED_LOCKING
   3415  *
   3416  * On failures, all locks are released, unless one of the following applies:
   3417  *  - vgname_is_locked(lock_name) is true
   3418  * FIXME: remove the above 2 conditions if possible and make an error always
   3419  * release the lock.
   3420  *
   3421  * Volume groups are opened read-only unless flags contains READ_FOR_UPDATE.
   3422  *
   3423  * Checking for VG existence:
   3424  *
   3425  * FIXME: We want vg_read to attempt automatic recovery after acquiring a
   3426  * temporary write lock: if that fails, we bail out as usual, with failed &
   3427  * FAILED_INCONSISTENT. If it works, we are good to go. Code that's been in
   3428  * toollib just set lock_flags to LCK_VG_WRITE and called vg_read_internal with
   3429  * *consistent = 1.
   3430  */
   3431 struct volume_group *vg_read(struct cmd_context *cmd, const char *vg_name,
   3432 	      const char *vgid, uint32_t flags)
   3433 {
   3434 	uint32_t status = 0;
   3435 	uint32_t lock_flags = LCK_VG_READ;
   3436 
   3437 	if (flags & READ_FOR_UPDATE) {
   3438 		status |= EXPORTED_VG | LVM_WRITE;
   3439 		lock_flags = LCK_VG_WRITE;
   3440 	}
   3441 
   3442 	if (flags & READ_ALLOW_EXPORTED)
   3443 		status &= ~EXPORTED_VG;
   3444 
   3445 	return _vg_lock_and_read(cmd, vg_name, vgid, lock_flags, status, flags);
   3446 }
   3447 
   3448 /*
   3449  * A high-level volume group metadata reading function. Open a volume group for
   3450  * later update (this means the user code can change the metadata and later
   3451  * request the new metadata to be written and committed).
   3452  */
   3453 struct volume_group *vg_read_for_update(struct cmd_context *cmd, const char *vg_name,
   3454 			 const char *vgid, uint32_t flags)
   3455 {
   3456 	return vg_read(cmd, vg_name, vgid, flags | READ_FOR_UPDATE);
   3457 }
   3458 
   3459 /*
   3460  * Test the validity of a VG handle returned by vg_read() or vg_read_for_update().
   3461  */
   3462 uint32_t vg_read_error(struct volume_group *vg_handle)
   3463 {
   3464 	if (!vg_handle)
   3465 		return FAILED_ALLOCATION;
   3466 
   3467 	return vg_handle->read_status;
   3468 }
   3469 
   3470 /*
   3471  * Lock a vgname and/or check for existence.
   3472  * Takes a WRITE lock on the vgname before scanning.
   3473  * If scanning fails or vgname found, release the lock.
   3474  * NOTE: If you find the return codes confusing, you might think of this
   3475  * function as similar to an open() call with O_CREAT and O_EXCL flags
   3476  * (open returns fail with -EEXIST if file already exists).
   3477  *
   3478  * Returns:
   3479  * FAILED_LOCKING - Cannot lock name
   3480  * FAILED_EXIST - VG name already exists - cannot reserve
   3481  * SUCCESS - VG name does not exist in system and WRITE lock held
   3482  */
   3483 uint32_t vg_lock_newname(struct cmd_context *cmd, const char *vgname)
   3484 {
   3485 	if (!lock_vol(cmd, vgname, LCK_VG_WRITE)) {
   3486 		return FAILED_LOCKING;
   3487 	}
   3488 
   3489 	/* Find the vgname in the cache */
   3490 	/* If it's not there we must do full scan to be completely sure */
   3491 	if (!fmt_from_vgname(vgname, NULL)) {
   3492 		lvmcache_label_scan(cmd, 0);
   3493 		if (!fmt_from_vgname(vgname, NULL)) {
   3494 			if (memlock()) {
   3495 				/*
   3496 				 * FIXME: Disallow calling this function if
   3497 				 * memlock() is true.
   3498 				 */
   3499 				unlock_vg(cmd, vgname);
   3500 				return FAILED_LOCKING;
   3501 			}
   3502 			lvmcache_label_scan(cmd, 2);
   3503 			if (!fmt_from_vgname(vgname, NULL)) {
   3504 				/* vgname not found after scanning */
   3505 				return SUCCESS;
   3506 			}
   3507 		}
   3508 	}
   3509 
   3510 	/* Found vgname so cannot reserve. */
   3511 	unlock_vg(cmd, vgname);
   3512 	return FAILED_EXIST;
   3513 }
   3514 
   3515 /*
   3516  * Gets/Sets for external LVM library
   3517  */
   3518 struct id pv_id(const struct physical_volume *pv)
   3519 {
   3520 	return pv_field(pv, id);
   3521 }
   3522 
   3523 const struct format_type *pv_format_type(const struct physical_volume *pv)
   3524 {
   3525 	return pv_field(pv, fmt);
   3526 }
   3527 
   3528 struct id pv_vgid(const struct physical_volume *pv)
   3529 {
   3530 	return pv_field(pv, vgid);
   3531 }
   3532 
   3533 struct device *pv_dev(const struct physical_volume *pv)
   3534 {
   3535 	return pv_field(pv, dev);
   3536 }
   3537 
   3538 const char *pv_vg_name(const struct physical_volume *pv)
   3539 {
   3540 	return pv_field(pv, vg_name);
   3541 }
   3542 
   3543 const char *pv_dev_name(const struct physical_volume *pv)
   3544 {
   3545 	return dev_name(pv_dev(pv));
   3546 }
   3547 
   3548 uint64_t pv_size(const struct physical_volume *pv)
   3549 {
   3550 	return pv_field(pv, size);
   3551 }
   3552 
   3553 uint32_t pv_status(const struct physical_volume *pv)
   3554 {
   3555 	return pv_field(pv, status);
   3556 }
   3557 
   3558 uint32_t pv_pe_size(const struct physical_volume *pv)
   3559 {
   3560 	return pv_field(pv, pe_size);
   3561 }
   3562 
   3563 uint64_t pv_pe_start(const struct physical_volume *pv)
   3564 {
   3565 	return pv_field(pv, pe_start);
   3566 }
   3567 
   3568 uint32_t pv_pe_count(const struct physical_volume *pv)
   3569 {
   3570 	return pv_field(pv, pe_count);
   3571 }
   3572 
   3573 uint32_t pv_pe_alloc_count(const struct physical_volume *pv)
   3574 {
   3575 	return pv_field(pv, pe_alloc_count);
   3576 }
   3577 
   3578 uint32_t pv_mda_count(const struct physical_volume *pv)
   3579 {
   3580 	struct lvmcache_info *info;
   3581 
   3582 	info = info_from_pvid((const char *)&pv->id.uuid, 0);
   3583 	return info ? dm_list_size(&info->mdas) : UINT64_C(0);
   3584 }
   3585 
   3586 uint32_t vg_seqno(const struct volume_group *vg)
   3587 {
   3588 	return vg->seqno;
   3589 }
   3590 
   3591 uint32_t vg_status(const struct volume_group *vg)
   3592 {
   3593 	return vg->status;
   3594 }
   3595 
   3596 uint64_t vg_size(const struct volume_group *vg)
   3597 {
   3598 	return (uint64_t) vg->extent_count * vg->extent_size;
   3599 }
   3600 
   3601 uint64_t vg_free(const struct volume_group *vg)
   3602 {
   3603 	return (uint64_t) vg->free_count * vg->extent_size;
   3604 }
   3605 
   3606 uint64_t vg_extent_size(const struct volume_group *vg)
   3607 {
   3608 	return (uint64_t) vg->extent_size;
   3609 }
   3610 
   3611 uint64_t vg_extent_count(const struct volume_group *vg)
   3612 {
   3613 	return (uint64_t) vg->extent_count;
   3614 }
   3615 
   3616 uint64_t vg_free_count(const struct volume_group *vg)
   3617 {
   3618 	return (uint64_t) vg->free_count;
   3619 }
   3620 
   3621 uint64_t vg_pv_count(const struct volume_group *vg)
   3622 {
   3623 	return (uint64_t) vg->pv_count;
   3624 }
   3625 
   3626 uint64_t vg_max_pv(const struct volume_group *vg)
   3627 {
   3628 	return (uint64_t) vg->max_pv;
   3629 }
   3630 
   3631 uint64_t vg_max_lv(const struct volume_group *vg)
   3632 {
   3633 	return (uint64_t) vg->max_lv;
   3634 }
   3635 
   3636 uint32_t vg_mda_count(const struct volume_group *vg)
   3637 {
   3638 	return dm_list_size(&vg->fid->metadata_areas);
   3639 }
   3640 
   3641 uint64_t lv_size(const struct logical_volume *lv)
   3642 {
   3643 	return lv->size;
   3644 }
   3645 
   3646 /**
   3647  * pv_by_path - Given a device path return a PV handle if it is a PV
   3648  * @cmd - handle to the LVM command instance
   3649  * @pv_name - device path to read for the PV
   3650  *
   3651  * Returns:
   3652  *  NULL - device path does not contain a valid PV
   3653  *  non-NULL - PV handle corresponding to device path
   3654  *
   3655  * FIXME: merge with find_pv_by_name ?
   3656  */
   3657 struct physical_volume *pv_by_path(struct cmd_context *cmd, const char *pv_name)
   3658 {
   3659 	struct dm_list mdas;
   3660 
   3661 	dm_list_init(&mdas);
   3662 	return _pv_read(cmd, cmd->mem, pv_name, &mdas, NULL, 1, 0);
   3663 }
   3664