Home | History | Annotate | Line # | Download | only in zfs
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 /*
     22  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
     23  * Copyright (c) 2012, 2015 by Delphix. All rights reserved.
     24  * Copyright (c) 2013 Steven Hartland. All rights reserved.
     25  * Copyright (c) 2013 by Joyent, Inc. All rights reserved.
     26  * Copyright (c) 2014 Integros [integros.com]
     27  */
     28 
     29 #include <sys/zfs_context.h>
     30 #include <sys/dsl_userhold.h>
     31 #include <sys/dsl_dataset.h>
     32 #include <sys/dsl_synctask.h>
     33 #include <sys/dmu_tx.h>
     34 #include <sys/dsl_pool.h>
     35 #include <sys/dsl_dir.h>
     36 #include <sys/dmu_traverse.h>
     37 #include <sys/dsl_scan.h>
     38 #include <sys/dmu_objset.h>
     39 #include <sys/zap.h>
     40 #include <sys/zfeature.h>
     41 #include <sys/zfs_ioctl.h>
     42 #include <sys/dsl_deleg.h>
     43 #include <sys/dmu_impl.h>
     44 
     45 typedef struct dmu_snapshots_destroy_arg {
     46 	nvlist_t *dsda_snaps;
     47 	nvlist_t *dsda_successful_snaps;
     48 	boolean_t dsda_defer;
     49 	nvlist_t *dsda_errlist;
     50 } dmu_snapshots_destroy_arg_t;
     51 
     52 int
     53 dsl_destroy_snapshot_check_impl(dsl_dataset_t *ds, boolean_t defer)
     54 {
     55 	if (!ds->ds_is_snapshot)
     56 		return (SET_ERROR(EINVAL));
     57 
     58 	if (dsl_dataset_long_held(ds))
     59 		return (SET_ERROR(EBUSY));
     60 
     61 	/*
     62 	 * Only allow deferred destroy on pools that support it.
     63 	 * NOTE: deferred destroy is only supported on snapshots.
     64 	 */
     65 	if (defer) {
     66 		if (spa_version(ds->ds_dir->dd_pool->dp_spa) <
     67 		    SPA_VERSION_USERREFS)
     68 			return (SET_ERROR(ENOTSUP));
     69 		return (0);
     70 	}
     71 
     72 	/*
     73 	 * If this snapshot has an elevated user reference count,
     74 	 * we can't destroy it yet.
     75 	 */
     76 	if (ds->ds_userrefs > 0)
     77 		return (SET_ERROR(EBUSY));
     78 
     79 	/*
     80 	 * Can't delete a branch point.
     81 	 */
     82 	if (dsl_dataset_phys(ds)->ds_num_children > 1)
     83 		return (SET_ERROR(EEXIST));
     84 
     85 	return (0);
     86 }
     87 
     88 static int
     89 dsl_destroy_snapshot_check(void *arg, dmu_tx_t *tx)
     90 {
     91 	dmu_snapshots_destroy_arg_t *dsda = arg;
     92 	dsl_pool_t *dp = dmu_tx_pool(tx);
     93 	nvpair_t *pair;
     94 	int error = 0;
     95 
     96 	if (!dmu_tx_is_syncing(tx))
     97 		return (0);
     98 
     99 	for (pair = nvlist_next_nvpair(dsda->dsda_snaps, NULL);
    100 	    pair != NULL; pair = nvlist_next_nvpair(dsda->dsda_snaps, pair)) {
    101 		dsl_dataset_t *ds;
    102 
    103 		error = dsl_dataset_hold(dp, nvpair_name(pair),
    104 		    FTAG, &ds);
    105 
    106 		/*
    107 		 * If the snapshot does not exist, silently ignore it
    108 		 * (it's "already destroyed").
    109 		 */
    110 		if (error == ENOENT)
    111 			continue;
    112 
    113 		if (error == 0) {
    114 			error = dsl_destroy_snapshot_check_impl(ds,
    115 			    dsda->dsda_defer);
    116 			dsl_dataset_rele(ds, FTAG);
    117 		}
    118 
    119 		if (error == 0) {
    120 			fnvlist_add_boolean(dsda->dsda_successful_snaps,
    121 			    nvpair_name(pair));
    122 		} else {
    123 			fnvlist_add_int32(dsda->dsda_errlist,
    124 			    nvpair_name(pair), error);
    125 		}
    126 	}
    127 
    128 	pair = nvlist_next_nvpair(dsda->dsda_errlist, NULL);
    129 	if (pair != NULL)
    130 		return (fnvpair_value_int32(pair));
    131 
    132 	return (0);
    133 }
    134 
    135 struct process_old_arg {
    136 	dsl_dataset_t *ds;
    137 	dsl_dataset_t *ds_prev;
    138 	boolean_t after_branch_point;
    139 	zio_t *pio;
    140 	uint64_t used, comp, uncomp;
    141 };
    142 
    143 static int
    144 process_old_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx)
    145 {
    146 	struct process_old_arg *poa = arg;
    147 	dsl_pool_t *dp = poa->ds->ds_dir->dd_pool;
    148 
    149 	ASSERT(!BP_IS_HOLE(bp));
    150 
    151 	if (bp->blk_birth <= dsl_dataset_phys(poa->ds)->ds_prev_snap_txg) {
    152 		dsl_deadlist_insert(&poa->ds->ds_deadlist, bp, tx);
    153 		if (poa->ds_prev && !poa->after_branch_point &&
    154 		    bp->blk_birth >
    155 		    dsl_dataset_phys(poa->ds_prev)->ds_prev_snap_txg) {
    156 			dsl_dataset_phys(poa->ds_prev)->ds_unique_bytes +=
    157 			    bp_get_dsize_sync(dp->dp_spa, bp);
    158 		}
    159 	} else {
    160 		poa->used += bp_get_dsize_sync(dp->dp_spa, bp);
    161 		poa->comp += BP_GET_PSIZE(bp);
    162 		poa->uncomp += BP_GET_UCSIZE(bp);
    163 		dsl_free_sync(poa->pio, dp, tx->tx_txg, bp);
    164 	}
    165 	return (0);
    166 }
    167 
    168 static void
    169 process_old_deadlist(dsl_dataset_t *ds, dsl_dataset_t *ds_prev,
    170     dsl_dataset_t *ds_next, boolean_t after_branch_point, dmu_tx_t *tx)
    171 {
    172 	struct process_old_arg poa = { 0 };
    173 	dsl_pool_t *dp = ds->ds_dir->dd_pool;
    174 	objset_t *mos = dp->dp_meta_objset;
    175 	uint64_t deadlist_obj;
    176 
    177 	ASSERT(ds->ds_deadlist.dl_oldfmt);
    178 	ASSERT(ds_next->ds_deadlist.dl_oldfmt);
    179 
    180 	poa.ds = ds;
    181 	poa.ds_prev = ds_prev;
    182 	poa.after_branch_point = after_branch_point;
    183 	poa.pio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED);
    184 	VERIFY0(bpobj_iterate(&ds_next->ds_deadlist.dl_bpobj,
    185 	    process_old_cb, &poa, tx));
    186 	VERIFY0(zio_wait(poa.pio));
    187 	ASSERT3U(poa.used, ==, dsl_dataset_phys(ds)->ds_unique_bytes);
    188 
    189 	/* change snapused */
    190 	dsl_dir_diduse_space(ds->ds_dir, DD_USED_SNAP,
    191 	    -poa.used, -poa.comp, -poa.uncomp, tx);
    192 
    193 	/* swap next's deadlist to our deadlist */
    194 	dsl_deadlist_close(&ds->ds_deadlist);
    195 	dsl_deadlist_close(&ds_next->ds_deadlist);
    196 	deadlist_obj = dsl_dataset_phys(ds)->ds_deadlist_obj;
    197 	dsl_dataset_phys(ds)->ds_deadlist_obj =
    198 	    dsl_dataset_phys(ds_next)->ds_deadlist_obj;
    199 	dsl_dataset_phys(ds_next)->ds_deadlist_obj = deadlist_obj;
    200 	dsl_deadlist_open(&ds->ds_deadlist, mos,
    201 	    dsl_dataset_phys(ds)->ds_deadlist_obj);
    202 	dsl_deadlist_open(&ds_next->ds_deadlist, mos,
    203 	    dsl_dataset_phys(ds_next)->ds_deadlist_obj);
    204 }
    205 
    206 static void
    207 dsl_dataset_remove_clones_key(dsl_dataset_t *ds, uint64_t mintxg, dmu_tx_t *tx)
    208 {
    209 	objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
    210 	zap_cursor_t zc;
    211 	zap_attribute_t za;
    212 
    213 	/*
    214 	 * If it is the old version, dd_clones doesn't exist so we can't
    215 	 * find the clones, but dsl_deadlist_remove_key() is a no-op so it
    216 	 * doesn't matter.
    217 	 */
    218 	if (dsl_dir_phys(ds->ds_dir)->dd_clones == 0)
    219 		return;
    220 
    221 	for (zap_cursor_init(&zc, mos, dsl_dir_phys(ds->ds_dir)->dd_clones);
    222 	    zap_cursor_retrieve(&zc, &za) == 0;
    223 	    zap_cursor_advance(&zc)) {
    224 		dsl_dataset_t *clone;
    225 
    226 		VERIFY0(dsl_dataset_hold_obj(ds->ds_dir->dd_pool,
    227 		    za.za_first_integer, FTAG, &clone));
    228 		if (clone->ds_dir->dd_origin_txg > mintxg) {
    229 			dsl_deadlist_remove_key(&clone->ds_deadlist,
    230 			    mintxg, tx);
    231 			dsl_dataset_remove_clones_key(clone, mintxg, tx);
    232 		}
    233 		dsl_dataset_rele(clone, FTAG);
    234 	}
    235 	zap_cursor_fini(&zc);
    236 }
    237 
    238 void
    239 dsl_destroy_snapshot_sync_impl(dsl_dataset_t *ds, boolean_t defer, dmu_tx_t *tx)
    240 {
    241 	int err;
    242 	int after_branch_point = FALSE;
    243 	dsl_pool_t *dp = ds->ds_dir->dd_pool;
    244 	objset_t *mos = dp->dp_meta_objset;
    245 	dsl_dataset_t *ds_prev = NULL;
    246 	uint64_t obj;
    247 
    248 	ASSERT(RRW_WRITE_HELD(&dp->dp_config_rwlock));
    249 	rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG);
    250 	ASSERT3U(dsl_dataset_phys(ds)->ds_bp.blk_birth, <=, tx->tx_txg);
    251 	rrw_exit(&ds->ds_bp_rwlock, FTAG);
    252 	ASSERT(refcount_is_zero(&ds->ds_longholds));
    253 
    254 	if (defer &&
    255 	    (ds->ds_userrefs > 0 ||
    256 	    dsl_dataset_phys(ds)->ds_num_children > 1)) {
    257 		ASSERT(spa_version(dp->dp_spa) >= SPA_VERSION_USERREFS);
    258 		dmu_buf_will_dirty(ds->ds_dbuf, tx);
    259 		dsl_dataset_phys(ds)->ds_flags |= DS_FLAG_DEFER_DESTROY;
    260 		spa_history_log_internal_ds(ds, "defer_destroy", tx, "");
    261 		return;
    262 	}
    263 
    264 	ASSERT3U(dsl_dataset_phys(ds)->ds_num_children, <=, 1);
    265 
    266 	/* We need to log before removing it from the namespace. */
    267 	spa_history_log_internal_ds(ds, "destroy", tx, "");
    268 
    269 	dsl_scan_ds_destroyed(ds, tx);
    270 
    271 	obj = ds->ds_object;
    272 
    273 	for (spa_feature_t f = 0; f < SPA_FEATURES; f++) {
    274 		if (ds->ds_feature_inuse[f]) {
    275 			dsl_dataset_deactivate_feature(obj, f, tx);
    276 			ds->ds_feature_inuse[f] = B_FALSE;
    277 		}
    278 	}
    279 	if (dsl_dataset_phys(ds)->ds_prev_snap_obj != 0) {
    280 		ASSERT3P(ds->ds_prev, ==, NULL);
    281 		VERIFY0(dsl_dataset_hold_obj(dp,
    282 		    dsl_dataset_phys(ds)->ds_prev_snap_obj, FTAG, &ds_prev));
    283 		after_branch_point =
    284 		    (dsl_dataset_phys(ds_prev)->ds_next_snap_obj != obj);
    285 
    286 		dmu_buf_will_dirty(ds_prev->ds_dbuf, tx);
    287 		if (after_branch_point &&
    288 		    dsl_dataset_phys(ds_prev)->ds_next_clones_obj != 0) {
    289 			dsl_dataset_remove_from_next_clones(ds_prev, obj, tx);
    290 			if (dsl_dataset_phys(ds)->ds_next_snap_obj != 0) {
    291 				VERIFY0(zap_add_int(mos,
    292 				    dsl_dataset_phys(ds_prev)->
    293 				    ds_next_clones_obj,
    294 				    dsl_dataset_phys(ds)->ds_next_snap_obj,
    295 				    tx));
    296 			}
    297 		}
    298 		if (!after_branch_point) {
    299 			dsl_dataset_phys(ds_prev)->ds_next_snap_obj =
    300 			    dsl_dataset_phys(ds)->ds_next_snap_obj;
    301 		}
    302 	}
    303 
    304 	dsl_dataset_t *ds_next;
    305 	uint64_t old_unique;
    306 	uint64_t used = 0, comp = 0, uncomp = 0;
    307 
    308 	VERIFY0(dsl_dataset_hold_obj(dp,
    309 	    dsl_dataset_phys(ds)->ds_next_snap_obj, FTAG, &ds_next));
    310 	ASSERT3U(dsl_dataset_phys(ds_next)->ds_prev_snap_obj, ==, obj);
    311 
    312 	old_unique = dsl_dataset_phys(ds_next)->ds_unique_bytes;
    313 
    314 	dmu_buf_will_dirty(ds_next->ds_dbuf, tx);
    315 	dsl_dataset_phys(ds_next)->ds_prev_snap_obj =
    316 	    dsl_dataset_phys(ds)->ds_prev_snap_obj;
    317 	dsl_dataset_phys(ds_next)->ds_prev_snap_txg =
    318 	    dsl_dataset_phys(ds)->ds_prev_snap_txg;
    319 	ASSERT3U(dsl_dataset_phys(ds)->ds_prev_snap_txg, ==,
    320 	    ds_prev ? dsl_dataset_phys(ds_prev)->ds_creation_txg : 0);
    321 
    322 	if (ds_next->ds_deadlist.dl_oldfmt) {
    323 		process_old_deadlist(ds, ds_prev, ds_next,
    324 		    after_branch_point, tx);
    325 	} else {
    326 		/* Adjust prev's unique space. */
    327 		if (ds_prev && !after_branch_point) {
    328 			dsl_deadlist_space_range(&ds_next->ds_deadlist,
    329 			    dsl_dataset_phys(ds_prev)->ds_prev_snap_txg,
    330 			    dsl_dataset_phys(ds)->ds_prev_snap_txg,
    331 			    &used, &comp, &uncomp);
    332 			dsl_dataset_phys(ds_prev)->ds_unique_bytes += used;
    333 		}
    334 
    335 		/* Adjust snapused. */
    336 		dsl_deadlist_space_range(&ds_next->ds_deadlist,
    337 		    dsl_dataset_phys(ds)->ds_prev_snap_txg, UINT64_MAX,
    338 		    &used, &comp, &uncomp);
    339 		dsl_dir_diduse_space(ds->ds_dir, DD_USED_SNAP,
    340 		    -used, -comp, -uncomp, tx);
    341 
    342 		/* Move blocks to be freed to pool's free list. */
    343 		dsl_deadlist_move_bpobj(&ds_next->ds_deadlist,
    344 		    &dp->dp_free_bpobj, dsl_dataset_phys(ds)->ds_prev_snap_txg,
    345 		    tx);
    346 		dsl_dir_diduse_space(tx->tx_pool->dp_free_dir,
    347 		    DD_USED_HEAD, used, comp, uncomp, tx);
    348 
    349 		/* Merge our deadlist into next's and free it. */
    350 		dsl_deadlist_merge(&ds_next->ds_deadlist,
    351 		    dsl_dataset_phys(ds)->ds_deadlist_obj, tx);
    352 	}
    353 	dsl_deadlist_close(&ds->ds_deadlist);
    354 	dsl_deadlist_free(mos, dsl_dataset_phys(ds)->ds_deadlist_obj, tx);
    355 	dmu_buf_will_dirty(ds->ds_dbuf, tx);
    356 	dsl_dataset_phys(ds)->ds_deadlist_obj = 0;
    357 
    358 	/* Collapse range in clone heads */
    359 	dsl_dataset_remove_clones_key(ds,
    360 	    dsl_dataset_phys(ds)->ds_creation_txg, tx);
    361 
    362 	if (ds_next->ds_is_snapshot) {
    363 		dsl_dataset_t *ds_nextnext;
    364 
    365 		/*
    366 		 * Update next's unique to include blocks which
    367 		 * were previously shared by only this snapshot
    368 		 * and it.  Those blocks will be born after the
    369 		 * prev snap and before this snap, and will have
    370 		 * died after the next snap and before the one
    371 		 * after that (ie. be on the snap after next's
    372 		 * deadlist).
    373 		 */
    374 		VERIFY0(dsl_dataset_hold_obj(dp,
    375 		    dsl_dataset_phys(ds_next)->ds_next_snap_obj,
    376 		    FTAG, &ds_nextnext));
    377 		dsl_deadlist_space_range(&ds_nextnext->ds_deadlist,
    378 		    dsl_dataset_phys(ds)->ds_prev_snap_txg,
    379 		    dsl_dataset_phys(ds)->ds_creation_txg,
    380 		    &used, &comp, &uncomp);
    381 		dsl_dataset_phys(ds_next)->ds_unique_bytes += used;
    382 		dsl_dataset_rele(ds_nextnext, FTAG);
    383 		ASSERT3P(ds_next->ds_prev, ==, NULL);
    384 
    385 		/* Collapse range in this head. */
    386 		dsl_dataset_t *hds;
    387 		VERIFY0(dsl_dataset_hold_obj(dp,
    388 		    dsl_dir_phys(ds->ds_dir)->dd_head_dataset_obj, FTAG, &hds));
    389 		dsl_deadlist_remove_key(&hds->ds_deadlist,
    390 		    dsl_dataset_phys(ds)->ds_creation_txg, tx);
    391 		dsl_dataset_rele(hds, FTAG);
    392 
    393 	} else {
    394 		ASSERT3P(ds_next->ds_prev, ==, ds);
    395 		dsl_dataset_rele(ds_next->ds_prev, ds_next);
    396 		ds_next->ds_prev = NULL;
    397 		if (ds_prev) {
    398 			VERIFY0(dsl_dataset_hold_obj(dp,
    399 			    dsl_dataset_phys(ds)->ds_prev_snap_obj,
    400 			    ds_next, &ds_next->ds_prev));
    401 		}
    402 
    403 		dsl_dataset_recalc_head_uniq(ds_next);
    404 
    405 		/*
    406 		 * Reduce the amount of our unconsumed refreservation
    407 		 * being charged to our parent by the amount of
    408 		 * new unique data we have gained.
    409 		 */
    410 		if (old_unique < ds_next->ds_reserved) {
    411 			int64_t mrsdelta;
    412 			uint64_t new_unique =
    413 			    dsl_dataset_phys(ds_next)->ds_unique_bytes;
    414 
    415 			ASSERT(old_unique <= new_unique);
    416 			mrsdelta = MIN(new_unique - old_unique,
    417 			    ds_next->ds_reserved - old_unique);
    418 			dsl_dir_diduse_space(ds->ds_dir,
    419 			    DD_USED_REFRSRV, -mrsdelta, 0, 0, tx);
    420 		}
    421 	}
    422 	dsl_dataset_rele(ds_next, FTAG);
    423 
    424 	/*
    425 	 * This must be done after the dsl_traverse(), because it will
    426 	 * re-open the objset.
    427 	 */
    428 	if (ds->ds_objset) {
    429 		dmu_objset_evict(ds->ds_objset);
    430 		ds->ds_objset = NULL;
    431 	}
    432 
    433 	/* remove from snapshot namespace */
    434 	dsl_dataset_t *ds_head;
    435 	ASSERT(dsl_dataset_phys(ds)->ds_snapnames_zapobj == 0);
    436 	VERIFY0(dsl_dataset_hold_obj(dp,
    437 	    dsl_dir_phys(ds->ds_dir)->dd_head_dataset_obj, FTAG, &ds_head));
    438 	VERIFY0(dsl_dataset_get_snapname(ds));
    439 #ifdef ZFS_DEBUG
    440 	{
    441 		uint64_t val;
    442 
    443 		err = dsl_dataset_snap_lookup(ds_head,
    444 		    ds->ds_snapname, &val);
    445 		ASSERT0(err);
    446 		ASSERT3U(val, ==, obj);
    447 	}
    448 #endif
    449 	VERIFY0(dsl_dataset_snap_remove(ds_head, ds->ds_snapname, tx, B_TRUE));
    450 	dsl_dataset_rele(ds_head, FTAG);
    451 
    452 	if (ds_prev != NULL)
    453 		dsl_dataset_rele(ds_prev, FTAG);
    454 
    455 	spa_prop_clear_bootfs(dp->dp_spa, ds->ds_object, tx);
    456 
    457 	if (dsl_dataset_phys(ds)->ds_next_clones_obj != 0) {
    458 		uint64_t count;
    459 		ASSERT0(zap_count(mos,
    460 		    dsl_dataset_phys(ds)->ds_next_clones_obj, &count) &&
    461 		    count == 0);
    462 		VERIFY0(dmu_object_free(mos,
    463 		    dsl_dataset_phys(ds)->ds_next_clones_obj, tx));
    464 	}
    465 	if (dsl_dataset_phys(ds)->ds_props_obj != 0)
    466 		VERIFY0(zap_destroy(mos, dsl_dataset_phys(ds)->ds_props_obj,
    467 		    tx));
    468 	if (dsl_dataset_phys(ds)->ds_userrefs_obj != 0)
    469 		VERIFY0(zap_destroy(mos, dsl_dataset_phys(ds)->ds_userrefs_obj,
    470 		    tx));
    471 	dsl_dir_rele(ds->ds_dir, ds);
    472 	ds->ds_dir = NULL;
    473 	dmu_object_free_zapified(mos, obj, tx);
    474 }
    475 
    476 static void
    477 dsl_destroy_snapshot_sync(void *arg, dmu_tx_t *tx)
    478 {
    479 	dmu_snapshots_destroy_arg_t *dsda = arg;
    480 	dsl_pool_t *dp = dmu_tx_pool(tx);
    481 	nvpair_t *pair;
    482 
    483 	for (pair = nvlist_next_nvpair(dsda->dsda_successful_snaps, NULL);
    484 	    pair != NULL;
    485 	    pair = nvlist_next_nvpair(dsda->dsda_successful_snaps, pair)) {
    486 		dsl_dataset_t *ds;
    487 
    488 		VERIFY0(dsl_dataset_hold(dp, nvpair_name(pair), FTAG, &ds));
    489 
    490 		dsl_destroy_snapshot_sync_impl(ds, dsda->dsda_defer, tx);
    491 		dsl_dataset_rele(ds, FTAG);
    492 	}
    493 }
    494 
    495 /*
    496  * The semantics of this function are described in the comment above
    497  * lzc_destroy_snaps().  To summarize:
    498  *
    499  * The snapshots must all be in the same pool.
    500  *
    501  * Snapshots that don't exist will be silently ignored (considered to be
    502  * "already deleted").
    503  *
    504  * On success, all snaps will be destroyed and this will return 0.
    505  * On failure, no snaps will be destroyed, the errlist will be filled in,
    506  * and this will return an errno.
    507  */
    508 int
    509 dsl_destroy_snapshots_nvl(nvlist_t *snaps, boolean_t defer,
    510     nvlist_t *errlist)
    511 {
    512 	dmu_snapshots_destroy_arg_t dsda;
    513 	int error;
    514 	nvpair_t *pair;
    515 
    516 	pair = nvlist_next_nvpair(snaps, NULL);
    517 	if (pair == NULL)
    518 		return (0);
    519 
    520 	dsda.dsda_snaps = snaps;
    521 	dsda.dsda_successful_snaps = fnvlist_alloc();
    522 	dsda.dsda_defer = defer;
    523 	dsda.dsda_errlist = errlist;
    524 
    525 	error = dsl_sync_task(nvpair_name(pair),
    526 	    dsl_destroy_snapshot_check, dsl_destroy_snapshot_sync,
    527 	    &dsda, 0, ZFS_SPACE_CHECK_NONE);
    528 	fnvlist_free(dsda.dsda_successful_snaps);
    529 
    530 	return (error);
    531 }
    532 
    533 int
    534 dsl_destroy_snapshot(const char *name, boolean_t defer)
    535 {
    536 	int error;
    537 	nvlist_t *nvl = fnvlist_alloc();
    538 	nvlist_t *errlist = fnvlist_alloc();
    539 
    540 	fnvlist_add_boolean(nvl, name);
    541 	error = dsl_destroy_snapshots_nvl(nvl, defer, errlist);
    542 	fnvlist_free(errlist);
    543 	fnvlist_free(nvl);
    544 	return (error);
    545 }
    546 
    547 struct killarg {
    548 	dsl_dataset_t *ds;
    549 	dmu_tx_t *tx;
    550 };
    551 
    552 /* ARGSUSED */
    553 static int
    554 kill_blkptr(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
    555     const zbookmark_phys_t *zb, const dnode_phys_t *dnp, void *arg)
    556 {
    557 	struct killarg *ka = arg;
    558 	dmu_tx_t *tx = ka->tx;
    559 
    560 	if (bp == NULL || BP_IS_HOLE(bp) || BP_IS_EMBEDDED(bp))
    561 		return (0);
    562 
    563 	if (zb->zb_level == ZB_ZIL_LEVEL) {
    564 		ASSERT(zilog != NULL);
    565 		/*
    566 		 * It's a block in the intent log.  It has no
    567 		 * accounting, so just free it.
    568 		 */
    569 		dsl_free(ka->tx->tx_pool, ka->tx->tx_txg, bp);
    570 	} else {
    571 		ASSERT(zilog == NULL);
    572 		ASSERT3U(bp->blk_birth, >,
    573 		    dsl_dataset_phys(ka->ds)->ds_prev_snap_txg);
    574 		(void) dsl_dataset_block_kill(ka->ds, bp, tx, B_FALSE);
    575 	}
    576 
    577 	return (0);
    578 }
    579 
    580 static void
    581 old_synchronous_dataset_destroy(dsl_dataset_t *ds, dmu_tx_t *tx)
    582 {
    583 	struct killarg ka;
    584 
    585 	/*
    586 	 * Free everything that we point to (that's born after
    587 	 * the previous snapshot, if we are a clone)
    588 	 *
    589 	 * NB: this should be very quick, because we already
    590 	 * freed all the objects in open context.
    591 	 */
    592 	ka.ds = ds;
    593 	ka.tx = tx;
    594 	VERIFY0(traverse_dataset(ds,
    595 	    dsl_dataset_phys(ds)->ds_prev_snap_txg, TRAVERSE_POST,
    596 	    kill_blkptr, &ka));
    597 	ASSERT(!DS_UNIQUE_IS_ACCURATE(ds) ||
    598 	    dsl_dataset_phys(ds)->ds_unique_bytes == 0);
    599 }
    600 
    601 typedef struct dsl_destroy_head_arg {
    602 	const char *ddha_name;
    603 } dsl_destroy_head_arg_t;
    604 
    605 int
    606 dsl_destroy_head_check_impl(dsl_dataset_t *ds, int expected_holds)
    607 {
    608 	int error;
    609 	uint64_t count;
    610 	objset_t *mos;
    611 
    612 	ASSERT(!ds->ds_is_snapshot);
    613 	if (ds->ds_is_snapshot)
    614 		return (SET_ERROR(EINVAL));
    615 
    616 	if (refcount_count(&ds->ds_longholds) != expected_holds)
    617 		return (SET_ERROR(EBUSY));
    618 
    619 	mos = ds->ds_dir->dd_pool->dp_meta_objset;
    620 
    621 	/*
    622 	 * Can't delete a head dataset if there are snapshots of it.
    623 	 * (Except if the only snapshots are from the branch we cloned
    624 	 * from.)
    625 	 */
    626 	if (ds->ds_prev != NULL &&
    627 	    dsl_dataset_phys(ds->ds_prev)->ds_next_snap_obj == ds->ds_object)
    628 		return (SET_ERROR(EBUSY));
    629 
    630 	/*
    631 	 * Can't delete if there are children of this fs.
    632 	 */
    633 	error = zap_count(mos,
    634 	    dsl_dir_phys(ds->ds_dir)->dd_child_dir_zapobj, &count);
    635 	if (error != 0)
    636 		return (error);
    637 	if (count != 0)
    638 		return (SET_ERROR(EEXIST));
    639 
    640 	if (dsl_dir_is_clone(ds->ds_dir) && DS_IS_DEFER_DESTROY(ds->ds_prev) &&
    641 	    dsl_dataset_phys(ds->ds_prev)->ds_num_children == 2 &&
    642 	    ds->ds_prev->ds_userrefs == 0) {
    643 		/* We need to remove the origin snapshot as well. */
    644 		if (!refcount_is_zero(&ds->ds_prev->ds_longholds))
    645 			return (SET_ERROR(EBUSY));
    646 	}
    647 	return (0);
    648 }
    649 
    650 static int
    651 dsl_destroy_head_check(void *arg, dmu_tx_t *tx)
    652 {
    653 	dsl_destroy_head_arg_t *ddha = arg;
    654 	dsl_pool_t *dp = dmu_tx_pool(tx);
    655 	dsl_dataset_t *ds;
    656 	int error;
    657 
    658 	error = dsl_dataset_hold(dp, ddha->ddha_name, FTAG, &ds);
    659 	if (error != 0)
    660 		return (error);
    661 
    662 	error = dsl_destroy_head_check_impl(ds, 0);
    663 	dsl_dataset_rele(ds, FTAG);
    664 	return (error);
    665 }
    666 
    667 static void
    668 dsl_dir_destroy_sync(uint64_t ddobj, dmu_tx_t *tx)
    669 {
    670 	dsl_dir_t *dd;
    671 	dsl_pool_t *dp = dmu_tx_pool(tx);
    672 	objset_t *mos = dp->dp_meta_objset;
    673 	dd_used_t t;
    674 
    675 	ASSERT(RRW_WRITE_HELD(&dmu_tx_pool(tx)->dp_config_rwlock));
    676 
    677 	VERIFY0(dsl_dir_hold_obj(dp, ddobj, NULL, FTAG, &dd));
    678 
    679 	ASSERT0(dsl_dir_phys(dd)->dd_head_dataset_obj);
    680 
    681 	/*
    682 	 * Decrement the filesystem count for all parent filesystems.
    683 	 *
    684 	 * When we receive an incremental stream into a filesystem that already
    685 	 * exists, a temporary clone is created.  We never count this temporary
    686 	 * clone, whose name begins with a '%'.
    687 	 */
    688 	if (dd->dd_myname[0] != '%' && dd->dd_parent != NULL)
    689 		dsl_fs_ss_count_adjust(dd->dd_parent, -1,
    690 		    DD_FIELD_FILESYSTEM_COUNT, tx);
    691 
    692 	/*
    693 	 * Remove our reservation. The impl() routine avoids setting the
    694 	 * actual property, which would require the (already destroyed) ds.
    695 	 */
    696 	dsl_dir_set_reservation_sync_impl(dd, 0, tx);
    697 
    698 	ASSERT0(dsl_dir_phys(dd)->dd_used_bytes);
    699 	ASSERT0(dsl_dir_phys(dd)->dd_reserved);
    700 	for (t = 0; t < DD_USED_NUM; t++)
    701 		ASSERT0(dsl_dir_phys(dd)->dd_used_breakdown[t]);
    702 
    703 	VERIFY0(zap_destroy(mos, dsl_dir_phys(dd)->dd_child_dir_zapobj, tx));
    704 	VERIFY0(zap_destroy(mos, dsl_dir_phys(dd)->dd_props_zapobj, tx));
    705 	VERIFY0(dsl_deleg_destroy(mos, dsl_dir_phys(dd)->dd_deleg_zapobj, tx));
    706 	VERIFY0(zap_remove(mos,
    707 	    dsl_dir_phys(dd->dd_parent)->dd_child_dir_zapobj,
    708 	    dd->dd_myname, tx));
    709 
    710 	dsl_dir_rele(dd, FTAG);
    711 	dmu_object_free_zapified(mos, ddobj, tx);
    712 }
    713 
    714 void
    715 dsl_destroy_head_sync_impl(dsl_dataset_t *ds, dmu_tx_t *tx)
    716 {
    717 	dsl_pool_t *dp = dmu_tx_pool(tx);
    718 	objset_t *mos = dp->dp_meta_objset;
    719 	uint64_t obj, ddobj, prevobj = 0;
    720 	boolean_t rmorigin;
    721 
    722 	ASSERT3U(dsl_dataset_phys(ds)->ds_num_children, <=, 1);
    723 	ASSERT(ds->ds_prev == NULL ||
    724 	    dsl_dataset_phys(ds->ds_prev)->ds_next_snap_obj != ds->ds_object);
    725 	rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG);
    726 	ASSERT3U(dsl_dataset_phys(ds)->ds_bp.blk_birth, <=, tx->tx_txg);
    727 	rrw_exit(&ds->ds_bp_rwlock, FTAG);
    728 	ASSERT(RRW_WRITE_HELD(&dp->dp_config_rwlock));
    729 
    730 	/* We need to log before removing it from the namespace. */
    731 	spa_history_log_internal_ds(ds, "destroy", tx, "");
    732 
    733 	rmorigin = (dsl_dir_is_clone(ds->ds_dir) &&
    734 	    DS_IS_DEFER_DESTROY(ds->ds_prev) &&
    735 	    dsl_dataset_phys(ds->ds_prev)->ds_num_children == 2 &&
    736 	    ds->ds_prev->ds_userrefs == 0);
    737 
    738 	/* Remove our reservation. */
    739 	if (ds->ds_reserved != 0) {
    740 		dsl_dataset_set_refreservation_sync_impl(ds,
    741 		    (ZPROP_SRC_NONE | ZPROP_SRC_LOCAL | ZPROP_SRC_RECEIVED),
    742 		    0, tx);
    743 		ASSERT0(ds->ds_reserved);
    744 	}
    745 
    746 	obj = ds->ds_object;
    747 
    748 	for (spa_feature_t f = 0; f < SPA_FEATURES; f++) {
    749 		if (ds->ds_feature_inuse[f]) {
    750 			dsl_dataset_deactivate_feature(obj, f, tx);
    751 			ds->ds_feature_inuse[f] = B_FALSE;
    752 		}
    753 	}
    754 
    755 	dsl_scan_ds_destroyed(ds, tx);
    756 
    757 	if (dsl_dataset_phys(ds)->ds_prev_snap_obj != 0) {
    758 		/* This is a clone */
    759 		ASSERT(ds->ds_prev != NULL);
    760 		ASSERT3U(dsl_dataset_phys(ds->ds_prev)->ds_next_snap_obj, !=,
    761 		    obj);
    762 		ASSERT0(dsl_dataset_phys(ds)->ds_next_snap_obj);
    763 
    764 		dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx);
    765 		if (dsl_dataset_phys(ds->ds_prev)->ds_next_clones_obj != 0) {
    766 			dsl_dataset_remove_from_next_clones(ds->ds_prev,
    767 			    obj, tx);
    768 		}
    769 
    770 		ASSERT3U(dsl_dataset_phys(ds->ds_prev)->ds_num_children, >, 1);
    771 		dsl_dataset_phys(ds->ds_prev)->ds_num_children--;
    772 	}
    773 
    774 	/*
    775 	 * Destroy the deadlist.  Unless it's a clone, the
    776 	 * deadlist should be empty.  (If it's a clone, it's
    777 	 * safe to ignore the deadlist contents.)
    778 	 */
    779 	dsl_deadlist_close(&ds->ds_deadlist);
    780 	dsl_deadlist_free(mos, dsl_dataset_phys(ds)->ds_deadlist_obj, tx);
    781 	dmu_buf_will_dirty(ds->ds_dbuf, tx);
    782 	dsl_dataset_phys(ds)->ds_deadlist_obj = 0;
    783 
    784 	objset_t *os;
    785 	VERIFY0(dmu_objset_from_ds(ds, &os));
    786 
    787 	if (!spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_ASYNC_DESTROY)) {
    788 		old_synchronous_dataset_destroy(ds, tx);
    789 	} else {
    790 		/*
    791 		 * Move the bptree into the pool's list of trees to
    792 		 * clean up and update space accounting information.
    793 		 */
    794 		uint64_t used, comp, uncomp;
    795 
    796 		zil_destroy_sync(dmu_objset_zil(os), tx);
    797 
    798 		if (!spa_feature_is_active(dp->dp_spa,
    799 		    SPA_FEATURE_ASYNC_DESTROY)) {
    800 			dsl_scan_t *scn = dp->dp_scan;
    801 			spa_feature_incr(dp->dp_spa, SPA_FEATURE_ASYNC_DESTROY,
    802 			    tx);
    803 			dp->dp_bptree_obj = bptree_alloc(mos, tx);
    804 			VERIFY0(zap_add(mos,
    805 			    DMU_POOL_DIRECTORY_OBJECT,
    806 			    DMU_POOL_BPTREE_OBJ, sizeof (uint64_t), 1,
    807 			    &dp->dp_bptree_obj, tx));
    808 			ASSERT(!scn->scn_async_destroying);
    809 			scn->scn_async_destroying = B_TRUE;
    810 		}
    811 
    812 		used = dsl_dir_phys(ds->ds_dir)->dd_used_bytes;
    813 		comp = dsl_dir_phys(ds->ds_dir)->dd_compressed_bytes;
    814 		uncomp = dsl_dir_phys(ds->ds_dir)->dd_uncompressed_bytes;
    815 
    816 		ASSERT(!DS_UNIQUE_IS_ACCURATE(ds) ||
    817 		    dsl_dataset_phys(ds)->ds_unique_bytes == used);
    818 
    819 		rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG);
    820 		bptree_add(mos, dp->dp_bptree_obj,
    821 		    &dsl_dataset_phys(ds)->ds_bp,
    822 		    dsl_dataset_phys(ds)->ds_prev_snap_txg,
    823 		    used, comp, uncomp, tx);
    824 		rrw_exit(&ds->ds_bp_rwlock, FTAG);
    825 		dsl_dir_diduse_space(ds->ds_dir, DD_USED_HEAD,
    826 		    -used, -comp, -uncomp, tx);
    827 		dsl_dir_diduse_space(dp->dp_free_dir, DD_USED_HEAD,
    828 		    used, comp, uncomp, tx);
    829 	}
    830 
    831 	if (ds->ds_prev != NULL) {
    832 		if (spa_version(dp->dp_spa) >= SPA_VERSION_DIR_CLONES) {
    833 			VERIFY0(zap_remove_int(mos,
    834 			    dsl_dir_phys(ds->ds_prev->ds_dir)->dd_clones,
    835 			    ds->ds_object, tx));
    836 		}
    837 		prevobj = ds->ds_prev->ds_object;
    838 		dsl_dataset_rele(ds->ds_prev, ds);
    839 		ds->ds_prev = NULL;
    840 	}
    841 
    842 	/*
    843 	 * This must be done after the dsl_traverse(), because it will
    844 	 * re-open the objset.
    845 	 */
    846 	if (ds->ds_objset) {
    847 		dmu_objset_evict(ds->ds_objset);
    848 		ds->ds_objset = NULL;
    849 	}
    850 
    851 	/* Erase the link in the dir */
    852 	dmu_buf_will_dirty(ds->ds_dir->dd_dbuf, tx);
    853 	dsl_dir_phys(ds->ds_dir)->dd_head_dataset_obj = 0;
    854 	ddobj = ds->ds_dir->dd_object;
    855 	ASSERT(dsl_dataset_phys(ds)->ds_snapnames_zapobj != 0);
    856 	VERIFY0(zap_destroy(mos,
    857 	    dsl_dataset_phys(ds)->ds_snapnames_zapobj, tx));
    858 
    859 	if (ds->ds_bookmarks != 0) {
    860 		VERIFY0(zap_destroy(mos, ds->ds_bookmarks, tx));
    861 		spa_feature_decr(dp->dp_spa, SPA_FEATURE_BOOKMARKS, tx);
    862 	}
    863 
    864 	spa_prop_clear_bootfs(dp->dp_spa, ds->ds_object, tx);
    865 
    866 	ASSERT0(dsl_dataset_phys(ds)->ds_next_clones_obj);
    867 	ASSERT0(dsl_dataset_phys(ds)->ds_props_obj);
    868 	ASSERT0(dsl_dataset_phys(ds)->ds_userrefs_obj);
    869 	dsl_dir_rele(ds->ds_dir, ds);
    870 	ds->ds_dir = NULL;
    871 	dmu_object_free_zapified(mos, obj, tx);
    872 
    873 	dsl_dir_destroy_sync(ddobj, tx);
    874 
    875 	if (rmorigin) {
    876 		dsl_dataset_t *prev;
    877 		VERIFY0(dsl_dataset_hold_obj(dp, prevobj, FTAG, &prev));
    878 		dsl_destroy_snapshot_sync_impl(prev, B_FALSE, tx);
    879 		dsl_dataset_rele(prev, FTAG);
    880 	}
    881 }
    882 
    883 static void
    884 dsl_destroy_head_sync(void *arg, dmu_tx_t *tx)
    885 {
    886 	dsl_destroy_head_arg_t *ddha = arg;
    887 	dsl_pool_t *dp = dmu_tx_pool(tx);
    888 	dsl_dataset_t *ds;
    889 
    890 	VERIFY0(dsl_dataset_hold(dp, ddha->ddha_name, FTAG, &ds));
    891 	dsl_destroy_head_sync_impl(ds, tx);
    892 	dsl_dataset_rele(ds, FTAG);
    893 }
    894 
    895 static void
    896 dsl_destroy_head_begin_sync(void *arg, dmu_tx_t *tx)
    897 {
    898 	dsl_destroy_head_arg_t *ddha = arg;
    899 	dsl_pool_t *dp = dmu_tx_pool(tx);
    900 	dsl_dataset_t *ds;
    901 
    902 	VERIFY0(dsl_dataset_hold(dp, ddha->ddha_name, FTAG, &ds));
    903 
    904 	/* Mark it as inconsistent on-disk, in case we crash */
    905 	dmu_buf_will_dirty(ds->ds_dbuf, tx);
    906 	dsl_dataset_phys(ds)->ds_flags |= DS_FLAG_INCONSISTENT;
    907 
    908 	spa_history_log_internal_ds(ds, "destroy begin", tx, "");
    909 	dsl_dataset_rele(ds, FTAG);
    910 }
    911 
    912 int
    913 dsl_destroy_head(const char *name)
    914 {
    915 	dsl_destroy_head_arg_t ddha;
    916 	int error;
    917 	spa_t *spa;
    918 	boolean_t isenabled;
    919 
    920 #ifdef _KERNEL
    921 	zfs_destroy_unmount_origin(name);
    922 #endif
    923 
    924 	error = spa_open(name, &spa, FTAG);
    925 	if (error != 0)
    926 		return (error);
    927 	isenabled = spa_feature_is_enabled(spa, SPA_FEATURE_ASYNC_DESTROY);
    928 	spa_close(spa, FTAG);
    929 
    930 	ddha.ddha_name = name;
    931 
    932 	if (!isenabled) {
    933 		objset_t *os;
    934 
    935 		error = dsl_sync_task(name, dsl_destroy_head_check,
    936 		    dsl_destroy_head_begin_sync, &ddha,
    937 		    0, ZFS_SPACE_CHECK_NONE);
    938 		if (error != 0)
    939 			return (error);
    940 
    941 		/*
    942 		 * Head deletion is processed in one txg on old pools;
    943 		 * remove the objects from open context so that the txg sync
    944 		 * is not too long.
    945 		 */
    946 		error = dmu_objset_own(name, DMU_OST_ANY, B_FALSE, FTAG, &os);
    947 		if (error == 0) {
    948 			uint64_t prev_snap_txg =
    949 			    dsl_dataset_phys(dmu_objset_ds(os))->
    950 			    ds_prev_snap_txg;
    951 			for (uint64_t obj = 0; error == 0;
    952 			    error = dmu_object_next(os, &obj, FALSE,
    953 			    prev_snap_txg))
    954 				(void) dmu_free_long_object(os, obj);
    955 			/* sync out all frees */
    956 			txg_wait_synced(dmu_objset_pool(os), 0);
    957 			dmu_objset_disown(os, FTAG);
    958 		}
    959 	}
    960 
    961 	return (dsl_sync_task(name, dsl_destroy_head_check,
    962 	    dsl_destroy_head_sync, &ddha, 0, ZFS_SPACE_CHECK_NONE));
    963 }
    964 
    965 /*
    966  * Note, this function is used as the callback for dmu_objset_find().  We
    967  * always return 0 so that we will continue to find and process
    968  * inconsistent datasets, even if we encounter an error trying to
    969  * process one of them.
    970  */
    971 /* ARGSUSED */
    972 int
    973 dsl_destroy_inconsistent(const char *dsname, void *arg)
    974 {
    975 	objset_t *os;
    976 
    977 	if (dmu_objset_hold(dsname, FTAG, &os) == 0) {
    978 		boolean_t need_destroy = DS_IS_INCONSISTENT(dmu_objset_ds(os));
    979 
    980 		/*
    981 		 * If the dataset is inconsistent because a resumable receive
    982 		 * has failed, then do not destroy it.
    983 		 */
    984 		if (dsl_dataset_has_resume_receive_state(dmu_objset_ds(os)))
    985 			need_destroy = B_FALSE;
    986 
    987 		dmu_objset_rele(os, FTAG);
    988 		if (need_destroy)
    989 			(void) dsl_destroy_head(dsname);
    990 	}
    991 	return (0);
    992 }
    993