Home | History | Annotate | Line # | Download | only in common
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 
     22 /*
     23  * Copyright (c) 2012, 2014 by Delphix. All rights reserved.
     24  * Copyright (c) 2013 Steven Hartland. All rights reserved.
     25  * Copyright (c) 2014 Integros [integros.com]
     26  */
     27 
     28 /*
     29  * LibZFS_Core (lzc) is intended to replace most functionality in libzfs.
     30  * It has the following characteristics:
     31  *
     32  *  - Thread Safe.  libzfs_core is accessible concurrently from multiple
     33  *  threads.  This is accomplished primarily by avoiding global data
     34  *  (e.g. caching).  Since it's thread-safe, there is no reason for a
     35  *  process to have multiple libzfs "instances".  Therefore, we store
     36  *  our few pieces of data (e.g. the file descriptor) in global
     37  *  variables.  The fd is reference-counted so that the libzfs_core
     38  *  library can be "initialized" multiple times (e.g. by different
     39  *  consumers within the same process).
     40  *
     41  *  - Committed Interface.  The libzfs_core interface will be committed,
     42  *  therefore consumers can compile against it and be confident that
     43  *  their code will continue to work on future releases of this code.
     44  *  Currently, the interface is Evolving (not Committed), but we intend
     45  *  to commit to it once it is more complete and we determine that it
     46  *  meets the needs of all consumers.
     47  *
     48  *  - Programatic Error Handling.  libzfs_core communicates errors with
     49  *  defined error numbers, and doesn't print anything to stdout/stderr.
     50  *
     51  *  - Thin Layer.  libzfs_core is a thin layer, marshaling arguments
     52  *  to/from the kernel ioctls.  There is generally a 1:1 correspondence
     53  *  between libzfs_core functions and ioctls to /dev/zfs.
     54  *
     55  *  - Clear Atomicity.  Because libzfs_core functions are generally 1:1
     56  *  with kernel ioctls, and kernel ioctls are general atomic, each
     57  *  libzfs_core function is atomic.  For example, creating multiple
     58  *  snapshots with a single call to lzc_snapshot() is atomic -- it
     59  *  can't fail with only some of the requested snapshots created, even
     60  *  in the event of power loss or system crash.
     61  *
     62  *  - Continued libzfs Support.  Some higher-level operations (e.g.
     63  *  support for "zfs send -R") are too complicated to fit the scope of
     64  *  libzfs_core.  This functionality will continue to live in libzfs.
     65  *  Where appropriate, libzfs will use the underlying atomic operations
     66  *  of libzfs_core.  For example, libzfs may implement "zfs send -R |
     67  *  zfs receive" by using individual "send one snapshot", rename,
     68  *  destroy, and "receive one snapshot" operations in libzfs_core.
     69  *  /sbin/zfs and /zbin/zpool will link with both libzfs and
     70  *  libzfs_core.  Other consumers should aim to use only libzfs_core,
     71  *  since that will be the supported, stable interface going forwards.
     72  */
     73 
     74 #define _IN_LIBZFS_CORE_
     75 
     76 #include <libzfs_core.h>
     77 #include <ctype.h>
     78 #include <unistd.h>
     79 #include <stdlib.h>
     80 #include <string.h>
     81 #include <errno.h>
     82 #include <fcntl.h>
     83 #include <pthread.h>
     84 #include <sys/nvpair.h>
     85 #include <sys/param.h>
     86 #include <sys/types.h>
     87 #include <sys/stat.h>
     88 #include <sys/zfs_ioctl.h>
     89 #include "libzfs_core_compat.h"
     90 #include "libzfs_compat.h"
     91 
     92 #ifdef __FreeBSD__
     93 extern int zfs_ioctl_version;
     94 #endif
     95 
     96 static int g_fd;
     97 static pthread_mutex_t g_lock = PTHREAD_MUTEX_INITIALIZER;
     98 static int g_refcount;
     99 
    100 int
    101 libzfs_core_init(void)
    102 {
    103 	(void) pthread_mutex_lock(&g_lock);
    104 	if (g_refcount == 0) {
    105 		g_fd = open("/dev/zfs", O_RDWR);
    106 		if (g_fd < 0) {
    107 			(void) pthread_mutex_unlock(&g_lock);
    108 			return (errno);
    109 		}
    110 	}
    111 	g_refcount++;
    112 	(void) pthread_mutex_unlock(&g_lock);
    113 
    114 	return (0);
    115 }
    116 
    117 void
    118 libzfs_core_fini(void)
    119 {
    120 	(void) pthread_mutex_lock(&g_lock);
    121 	ASSERT3S(g_refcount, >, 0);
    122 	g_refcount--;
    123 	if (g_refcount == 0)
    124 		(void) close(g_fd);
    125 	(void) pthread_mutex_unlock(&g_lock);
    126 }
    127 
    128 static int
    129 lzc_ioctl(zfs_ioc_t ioc, const char *name,
    130     nvlist_t *source, nvlist_t **resultp)
    131 {
    132 	zfs_cmd_t zc = { 0 };
    133 	int error = 0;
    134 	char *packed;
    135 #ifdef __FreeBSD__
    136 	nvlist_t *oldsource;
    137 #endif
    138 	size_t size;
    139 
    140 	ASSERT3S(g_refcount, >, 0);
    141 
    142 	(void) strlcpy(zc.zc_name, name, sizeof (zc.zc_name));
    143 
    144 #ifdef __FreeBSD__
    145 	if (zfs_ioctl_version == ZFS_IOCVER_UNDEF)
    146 		zfs_ioctl_version = get_zfs_ioctl_version();
    147 
    148 	if (zfs_ioctl_version < ZFS_IOCVER_LZC) {
    149 		oldsource = source;
    150 		error = lzc_compat_pre(&zc, &ioc, &source);
    151 		if (error)
    152 			return (error);
    153 	}
    154 #endif
    155 
    156 	packed = fnvlist_pack(source, &size);
    157 	zc.zc_nvlist_src = (uint64_t)(uintptr_t)packed;
    158 	zc.zc_nvlist_src_size = size;
    159 
    160 	if (resultp != NULL) {
    161 		*resultp = NULL;
    162 		zc.zc_nvlist_dst_size = MAX(size * 2, 128 * 1024);
    163 		zc.zc_nvlist_dst = (uint64_t)(uintptr_t)
    164 		    malloc(zc.zc_nvlist_dst_size);
    165 #ifdef illumos
    166 		if (zc.zc_nvlist_dst == NULL) {
    167 #else
    168 		if (zc.zc_nvlist_dst == 0) {
    169 #endif
    170 			error = ENOMEM;
    171 			goto out;
    172 		}
    173 	}
    174 
    175 	while (ioctl(g_fd, ioc, &zc) != 0) {
    176 		if (errno == ENOMEM && resultp != NULL) {
    177 			free((void *)(uintptr_t)zc.zc_nvlist_dst);
    178 			zc.zc_nvlist_dst_size *= 2;
    179 			zc.zc_nvlist_dst = (uint64_t)(uintptr_t)
    180 			    malloc(zc.zc_nvlist_dst_size);
    181 #ifdef illumos
    182 			if (zc.zc_nvlist_dst == NULL) {
    183 #else
    184 			if (zc.zc_nvlist_dst == 0) {
    185 #endif
    186 				error = ENOMEM;
    187 				goto out;
    188 			}
    189 		} else {
    190 			error = errno;
    191 			break;
    192 		}
    193 	}
    194 
    195 #ifdef __FreeBSD__
    196 	if (zfs_ioctl_version < ZFS_IOCVER_LZC)
    197 		lzc_compat_post(&zc, ioc);
    198 #endif
    199 	if (zc.zc_nvlist_dst_filled) {
    200 		*resultp = fnvlist_unpack((void *)(uintptr_t)zc.zc_nvlist_dst,
    201 		    zc.zc_nvlist_dst_size);
    202 	}
    203 #ifdef __FreeBSD__
    204 	if (zfs_ioctl_version < ZFS_IOCVER_LZC)
    205 		lzc_compat_outnvl(&zc, ioc, resultp);
    206 #endif
    207 out:
    208 #ifdef __FreeBSD__
    209 	if (zfs_ioctl_version < ZFS_IOCVER_LZC) {
    210 		if (source != oldsource)
    211 			nvlist_free(source);
    212 		source = oldsource;
    213 	}
    214 #endif
    215 	fnvlist_pack_free(packed, size);
    216 	free((void *)(uintptr_t)zc.zc_nvlist_dst);
    217 	return (error);
    218 }
    219 
    220 int
    221 lzc_create(const char *fsname, enum lzc_dataset_type type, nvlist_t *props)
    222 {
    223 	int error;
    224 	nvlist_t *args = fnvlist_alloc();
    225 	fnvlist_add_int32(args, "type", (dmu_objset_type_t)type);
    226 	if (props != NULL)
    227 		fnvlist_add_nvlist(args, "props", props);
    228 	error = lzc_ioctl(ZFS_IOC_CREATE, fsname, args, NULL);
    229 	nvlist_free(args);
    230 	return (error);
    231 }
    232 
    233 int
    234 lzc_clone(const char *fsname, const char *origin,
    235     nvlist_t *props)
    236 {
    237 	int error;
    238 	nvlist_t *args = fnvlist_alloc();
    239 	fnvlist_add_string(args, "origin", origin);
    240 	if (props != NULL)
    241 		fnvlist_add_nvlist(args, "props", props);
    242 	error = lzc_ioctl(ZFS_IOC_CLONE, fsname, args, NULL);
    243 	nvlist_free(args);
    244 	return (error);
    245 }
    246 
    247 /*
    248  * Creates snapshots.
    249  *
    250  * The keys in the snaps nvlist are the snapshots to be created.
    251  * They must all be in the same pool.
    252  *
    253  * The props nvlist is properties to set.  Currently only user properties
    254  * are supported.  { user:prop_name -> string value }
    255  *
    256  * The returned results nvlist will have an entry for each snapshot that failed.
    257  * The value will be the (int32) error code.
    258  *
    259  * The return value will be 0 if all snapshots were created, otherwise it will
    260  * be the errno of a (unspecified) snapshot that failed.
    261  */
    262 int
    263 lzc_snapshot(nvlist_t *snaps, nvlist_t *props, nvlist_t **errlist)
    264 {
    265 	nvpair_t *elem;
    266 	nvlist_t *args;
    267 	int error;
    268 	char pool[ZFS_MAX_DATASET_NAME_LEN];
    269 
    270 	*errlist = NULL;
    271 
    272 	/* determine the pool name */
    273 	elem = nvlist_next_nvpair(snaps, NULL);
    274 	if (elem == NULL)
    275 		return (0);
    276 	(void) strlcpy(pool, nvpair_name(elem), sizeof (pool));
    277 	pool[strcspn(pool, "/@")] = '\0';
    278 
    279 	args = fnvlist_alloc();
    280 	fnvlist_add_nvlist(args, "snaps", snaps);
    281 	if (props != NULL)
    282 		fnvlist_add_nvlist(args, "props", props);
    283 
    284 	error = lzc_ioctl(ZFS_IOC_SNAPSHOT, pool, args, errlist);
    285 	nvlist_free(args);
    286 
    287 	return (error);
    288 }
    289 
    290 /*
    291  * Destroys snapshots.
    292  *
    293  * The keys in the snaps nvlist are the snapshots to be destroyed.
    294  * They must all be in the same pool.
    295  *
    296  * Snapshots that do not exist will be silently ignored.
    297  *
    298  * If 'defer' is not set, and a snapshot has user holds or clones, the
    299  * destroy operation will fail and none of the snapshots will be
    300  * destroyed.
    301  *
    302  * If 'defer' is set, and a snapshot has user holds or clones, it will be
    303  * marked for deferred destruction, and will be destroyed when the last hold
    304  * or clone is removed/destroyed.
    305  *
    306  * The return value will be 0 if all snapshots were destroyed (or marked for
    307  * later destruction if 'defer' is set) or didn't exist to begin with.
    308  *
    309  * Otherwise the return value will be the errno of a (unspecified) snapshot
    310  * that failed, no snapshots will be destroyed, and the errlist will have an
    311  * entry for each snapshot that failed.  The value in the errlist will be
    312  * the (int32) error code.
    313  */
    314 int
    315 lzc_destroy_snaps(nvlist_t *snaps, boolean_t defer, nvlist_t **errlist)
    316 {
    317 	nvpair_t *elem;
    318 	nvlist_t *args;
    319 	int error;
    320 	char pool[ZFS_MAX_DATASET_NAME_LEN];
    321 
    322 	/* determine the pool name */
    323 	elem = nvlist_next_nvpair(snaps, NULL);
    324 	if (elem == NULL)
    325 		return (0);
    326 	(void) strlcpy(pool, nvpair_name(elem), sizeof (pool));
    327 	pool[strcspn(pool, "/@")] = '\0';
    328 
    329 	args = fnvlist_alloc();
    330 	fnvlist_add_nvlist(args, "snaps", snaps);
    331 	if (defer)
    332 		fnvlist_add_boolean(args, "defer");
    333 
    334 	error = lzc_ioctl(ZFS_IOC_DESTROY_SNAPS, pool, args, errlist);
    335 	nvlist_free(args);
    336 
    337 	return (error);
    338 }
    339 
    340 int
    341 lzc_snaprange_space(const char *firstsnap, const char *lastsnap,
    342     uint64_t *usedp)
    343 {
    344 	nvlist_t *args;
    345 	nvlist_t *result;
    346 	int err;
    347 	char fs[ZFS_MAX_DATASET_NAME_LEN];
    348 	char *atp;
    349 
    350 	/* determine the fs name */
    351 	(void) strlcpy(fs, firstsnap, sizeof (fs));
    352 	atp = strchr(fs, '@');
    353 	if (atp == NULL)
    354 		return (EINVAL);
    355 	*atp = '\0';
    356 
    357 	args = fnvlist_alloc();
    358 	fnvlist_add_string(args, "firstsnap", firstsnap);
    359 
    360 	err = lzc_ioctl(ZFS_IOC_SPACE_SNAPS, lastsnap, args, &result);
    361 	nvlist_free(args);
    362 	if (err == 0)
    363 		*usedp = fnvlist_lookup_uint64(result, "used");
    364 	fnvlist_free(result);
    365 
    366 	return (err);
    367 }
    368 
    369 boolean_t
    370 lzc_exists(const char *dataset)
    371 {
    372 	/*
    373 	 * The objset_stats ioctl is still legacy, so we need to construct our
    374 	 * own zfs_cmd_t rather than using zfsc_ioctl().
    375 	 */
    376 	zfs_cmd_t zc = { 0 };
    377 
    378 	(void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name));
    379 	return (ioctl(g_fd, ZFS_IOC_OBJSET_STATS, &zc) == 0);
    380 }
    381 
    382 /*
    383  * Create "user holds" on snapshots.  If there is a hold on a snapshot,
    384  * the snapshot can not be destroyed.  (However, it can be marked for deletion
    385  * by lzc_destroy_snaps(defer=B_TRUE).)
    386  *
    387  * The keys in the nvlist are snapshot names.
    388  * The snapshots must all be in the same pool.
    389  * The value is the name of the hold (string type).
    390  *
    391  * If cleanup_fd is not -1, it must be the result of open("/dev/zfs", O_EXCL).
    392  * In this case, when the cleanup_fd is closed (including on process
    393  * termination), the holds will be released.  If the system is shut down
    394  * uncleanly, the holds will be released when the pool is next opened
    395  * or imported.
    396  *
    397  * Holds for snapshots which don't exist will be skipped and have an entry
    398  * added to errlist, but will not cause an overall failure.
    399  *
    400  * The return value will be 0 if all holds, for snapshots that existed,
    401  * were succesfully created.
    402  *
    403  * Otherwise the return value will be the errno of a (unspecified) hold that
    404  * failed and no holds will be created.
    405  *
    406  * In all cases the errlist will have an entry for each hold that failed
    407  * (name = snapshot), with its value being the error code (int32).
    408  */
    409 int
    410 lzc_hold(nvlist_t *holds, int cleanup_fd, nvlist_t **errlist)
    411 {
    412 	char pool[ZFS_MAX_DATASET_NAME_LEN];
    413 	nvlist_t *args;
    414 	nvpair_t *elem;
    415 	int error;
    416 
    417 	/* determine the pool name */
    418 	elem = nvlist_next_nvpair(holds, NULL);
    419 	if (elem == NULL)
    420 		return (0);
    421 	(void) strlcpy(pool, nvpair_name(elem), sizeof (pool));
    422 	pool[strcspn(pool, "/@")] = '\0';
    423 
    424 	args = fnvlist_alloc();
    425 	fnvlist_add_nvlist(args, "holds", holds);
    426 	if (cleanup_fd != -1)
    427 		fnvlist_add_int32(args, "cleanup_fd", cleanup_fd);
    428 
    429 	error = lzc_ioctl(ZFS_IOC_HOLD, pool, args, errlist);
    430 	nvlist_free(args);
    431 	return (error);
    432 }
    433 
    434 /*
    435  * Release "user holds" on snapshots.  If the snapshot has been marked for
    436  * deferred destroy (by lzc_destroy_snaps(defer=B_TRUE)), it does not have
    437  * any clones, and all the user holds are removed, then the snapshot will be
    438  * destroyed.
    439  *
    440  * The keys in the nvlist are snapshot names.
    441  * The snapshots must all be in the same pool.
    442  * The value is a nvlist whose keys are the holds to remove.
    443  *
    444  * Holds which failed to release because they didn't exist will have an entry
    445  * added to errlist, but will not cause an overall failure.
    446  *
    447  * The return value will be 0 if the nvl holds was empty or all holds that
    448  * existed, were successfully removed.
    449  *
    450  * Otherwise the return value will be the errno of a (unspecified) hold that
    451  * failed to release and no holds will be released.
    452  *
    453  * In all cases the errlist will have an entry for each hold that failed to
    454  * to release.
    455  */
    456 int
    457 lzc_release(nvlist_t *holds, nvlist_t **errlist)
    458 {
    459 	char pool[ZFS_MAX_DATASET_NAME_LEN];
    460 	nvpair_t *elem;
    461 
    462 	/* determine the pool name */
    463 	elem = nvlist_next_nvpair(holds, NULL);
    464 	if (elem == NULL)
    465 		return (0);
    466 	(void) strlcpy(pool, nvpair_name(elem), sizeof (pool));
    467 	pool[strcspn(pool, "/@")] = '\0';
    468 
    469 	return (lzc_ioctl(ZFS_IOC_RELEASE, pool, holds, errlist));
    470 }
    471 
    472 /*
    473  * Retrieve list of user holds on the specified snapshot.
    474  *
    475  * On success, *holdsp will be set to a nvlist which the caller must free.
    476  * The keys are the names of the holds, and the value is the creation time
    477  * of the hold (uint64) in seconds since the epoch.
    478  */
    479 int
    480 lzc_get_holds(const char *snapname, nvlist_t **holdsp)
    481 {
    482 	int error;
    483 	nvlist_t *innvl = fnvlist_alloc();
    484 	error = lzc_ioctl(ZFS_IOC_GET_HOLDS, snapname, innvl, holdsp);
    485 	fnvlist_free(innvl);
    486 	return (error);
    487 }
    488 
    489 /*
    490  * Generate a zfs send stream for the specified snapshot and write it to
    491  * the specified file descriptor.
    492  *
    493  * "snapname" is the full name of the snapshot to send (e.g. "pool/fs@snap")
    494  *
    495  * If "from" is NULL, a full (non-incremental) stream will be sent.
    496  * If "from" is non-NULL, it must be the full name of a snapshot or
    497  * bookmark to send an incremental from (e.g. "pool/fs@earlier_snap" or
    498  * "pool/fs#earlier_bmark").  If non-NULL, the specified snapshot or
    499  * bookmark must represent an earlier point in the history of "snapname").
    500  * It can be an earlier snapshot in the same filesystem or zvol as "snapname",
    501  * or it can be the origin of "snapname"'s filesystem, or an earlier
    502  * snapshot in the origin, etc.
    503  *
    504  * "fd" is the file descriptor to write the send stream to.
    505  *
    506  * If "flags" contains LZC_SEND_FLAG_LARGE_BLOCK, the stream is permitted
    507  * to contain DRR_WRITE records with drr_length > 128K, and DRR_OBJECT
    508  * records with drr_blksz > 128K.
    509  *
    510  * If "flags" contains LZC_SEND_FLAG_EMBED_DATA, the stream is permitted
    511  * to contain DRR_WRITE_EMBEDDED records with drr_etype==BP_EMBEDDED_TYPE_DATA,
    512  * which the receiving system must support (as indicated by support
    513  * for the "embedded_data" feature).
    514  */
    515 int
    516 lzc_send(const char *snapname, const char *from, int fd,
    517     enum lzc_send_flags flags)
    518 {
    519 	return (lzc_send_resume(snapname, from, fd, flags, 0, 0));
    520 }
    521 
    522 int
    523 lzc_send_resume(const char *snapname, const char *from, int fd,
    524     enum lzc_send_flags flags, uint64_t resumeobj, uint64_t resumeoff)
    525 {
    526 	nvlist_t *args;
    527 	int err;
    528 
    529 	args = fnvlist_alloc();
    530 	fnvlist_add_int32(args, "fd", fd);
    531 	if (from != NULL)
    532 		fnvlist_add_string(args, "fromsnap", from);
    533 	if (flags & LZC_SEND_FLAG_LARGE_BLOCK)
    534 		fnvlist_add_boolean(args, "largeblockok");
    535 	if (flags & LZC_SEND_FLAG_EMBED_DATA)
    536 		fnvlist_add_boolean(args, "embedok");
    537 	if (resumeobj != 0 || resumeoff != 0) {
    538 		fnvlist_add_uint64(args, "resume_object", resumeobj);
    539 		fnvlist_add_uint64(args, "resume_offset", resumeoff);
    540 	}
    541 	err = lzc_ioctl(ZFS_IOC_SEND_NEW, snapname, args, NULL);
    542 	nvlist_free(args);
    543 	return (err);
    544 }
    545 
    546 /*
    547  * "from" can be NULL, a snapshot, or a bookmark.
    548  *
    549  * If from is NULL, a full (non-incremental) stream will be estimated.  This
    550  * is calculated very efficiently.
    551  *
    552  * If from is a snapshot, lzc_send_space uses the deadlists attached to
    553  * each snapshot to efficiently estimate the stream size.
    554  *
    555  * If from is a bookmark, the indirect blocks in the destination snapshot
    556  * are traversed, looking for blocks with a birth time since the creation TXG of
    557  * the snapshot this bookmark was created from.  This will result in
    558  * significantly more I/O and be less efficient than a send space estimation on
    559  * an equivalent snapshot.
    560  */
    561 int
    562 lzc_send_space(const char *snapname, const char *from, uint64_t *spacep)
    563 {
    564 	nvlist_t *args;
    565 	nvlist_t *result;
    566 	int err;
    567 
    568 	args = fnvlist_alloc();
    569 	if (from != NULL)
    570 		fnvlist_add_string(args, "from", from);
    571 	err = lzc_ioctl(ZFS_IOC_SEND_SPACE, snapname, args, &result);
    572 	nvlist_free(args);
    573 	if (err == 0)
    574 		*spacep = fnvlist_lookup_uint64(result, "space");
    575 	nvlist_free(result);
    576 	return (err);
    577 }
    578 
    579 static int
    580 recv_read(int fd, void *buf, int ilen)
    581 {
    582 	char *cp = buf;
    583 	int rv;
    584 	int len = ilen;
    585 
    586 	do {
    587 		rv = read(fd, cp, len);
    588 		cp += rv;
    589 		len -= rv;
    590 	} while (rv > 0);
    591 
    592 	if (rv < 0 || len != 0)
    593 		return (EIO);
    594 
    595 	return (0);
    596 }
    597 
    598 static int
    599 recv_impl(const char *snapname, nvlist_t *props, const char *origin,
    600     boolean_t force, boolean_t resumable, int fd,
    601     const dmu_replay_record_t *begin_record)
    602 {
    603 	/*
    604 	 * The receive ioctl is still legacy, so we need to construct our own
    605 	 * zfs_cmd_t rather than using zfsc_ioctl().
    606 	 */
    607 	zfs_cmd_t zc = { 0 };
    608 	char *atp;
    609 	char *packed = NULL;
    610 	size_t size;
    611 	int error;
    612 
    613 	ASSERT3S(g_refcount, >, 0);
    614 
    615 	/* zc_name is name of containing filesystem */
    616 	(void) strlcpy(zc.zc_name, snapname, sizeof (zc.zc_name));
    617 	atp = strchr(zc.zc_name, '@');
    618 	if (atp == NULL)
    619 		return (EINVAL);
    620 	*atp = '\0';
    621 
    622 	/* if the fs does not exist, try its parent. */
    623 	if (!lzc_exists(zc.zc_name)) {
    624 		char *slashp = strrchr(zc.zc_name, '/');
    625 		if (slashp == NULL)
    626 			return (ENOENT);
    627 		*slashp = '\0';
    628 
    629 	}
    630 
    631 	/* zc_value is full name of the snapshot to create */
    632 	(void) strlcpy(zc.zc_value, snapname, sizeof (zc.zc_value));
    633 
    634 	if (props != NULL) {
    635 		/* zc_nvlist_src is props to set */
    636 		packed = fnvlist_pack(props, &size);
    637 		zc.zc_nvlist_src = (uint64_t)(uintptr_t)packed;
    638 		zc.zc_nvlist_src_size = size;
    639 	}
    640 
    641 	/* zc_string is name of clone origin (if DRR_FLAG_CLONE) */
    642 	if (origin != NULL)
    643 		(void) strlcpy(zc.zc_string, origin, sizeof (zc.zc_string));
    644 
    645 	/* zc_begin_record is non-byteswapped BEGIN record */
    646 	if (begin_record == NULL) {
    647 		error = recv_read(fd, &zc.zc_begin_record,
    648 		    sizeof (zc.zc_begin_record));
    649 		if (error != 0)
    650 			goto out;
    651 	} else {
    652 		zc.zc_begin_record = *begin_record;
    653 	}
    654 
    655 	/* zc_cookie is fd to read from */
    656 	zc.zc_cookie = fd;
    657 
    658 	/* zc guid is force flag */
    659 	zc.zc_guid = force;
    660 
    661 	zc.zc_resumable = resumable;
    662 
    663 	/* zc_cleanup_fd is unused */
    664 	zc.zc_cleanup_fd = -1;
    665 
    666 	error = ioctl(g_fd, ZFS_IOC_RECV, &zc);
    667 	if (error != 0)
    668 		error = errno;
    669 
    670 out:
    671 	if (packed != NULL)
    672 		fnvlist_pack_free(packed, size);
    673 	free((void*)(uintptr_t)zc.zc_nvlist_dst);
    674 	return (error);
    675 }
    676 
    677 /*
    678  * The simplest receive case: receive from the specified fd, creating the
    679  * specified snapshot.  Apply the specified properties as "received" properties
    680  * (which can be overridden by locally-set properties).  If the stream is a
    681  * clone, its origin snapshot must be specified by 'origin'.  The 'force'
    682  * flag will cause the target filesystem to be rolled back or destroyed if
    683  * necessary to receive.
    684  *
    685  * Return 0 on success or an errno on failure.
    686  *
    687  * Note: this interface does not work on dedup'd streams
    688  * (those with DMU_BACKUP_FEATURE_DEDUP).
    689  */
    690 int
    691 lzc_receive(const char *snapname, nvlist_t *props, const char *origin,
    692     boolean_t force, int fd)
    693 {
    694 	return (recv_impl(snapname, props, origin, force, B_FALSE, fd, NULL));
    695 }
    696 
    697 /*
    698  * Like lzc_receive, but if the receive fails due to premature stream
    699  * termination, the intermediate state will be preserved on disk.  In this
    700  * case, ECKSUM will be returned.  The receive may subsequently be resumed
    701  * with a resuming send stream generated by lzc_send_resume().
    702  */
    703 int
    704 lzc_receive_resumable(const char *snapname, nvlist_t *props, const char *origin,
    705     boolean_t force, int fd)
    706 {
    707 	return (recv_impl(snapname, props, origin, force, B_TRUE, fd, NULL));
    708 }
    709 
    710 /*
    711  * Like lzc_receive, but allows the caller to read the begin record and then to
    712  * pass it in.  That could be useful if the caller wants to derive, for example,
    713  * the snapname or the origin parameters based on the information contained in
    714  * the begin record.
    715  * The begin record must be in its original form as read from the stream,
    716  * in other words, it should not be byteswapped.
    717  *
    718  * The 'resumable' parameter allows to obtain the same behavior as with
    719  * lzc_receive_resumable.
    720  */
    721 int
    722 lzc_receive_with_header(const char *snapname, nvlist_t *props,
    723     const char *origin, boolean_t force, boolean_t resumable, int fd,
    724     const dmu_replay_record_t *begin_record)
    725 {
    726 	if (begin_record == NULL)
    727 		return (EINVAL);
    728 	return (recv_impl(snapname, props, origin, force, resumable, fd,
    729 	    begin_record));
    730 }
    731 
    732 /*
    733  * Roll back this filesystem or volume to its most recent snapshot.
    734  * If snapnamebuf is not NULL, it will be filled in with the name
    735  * of the most recent snapshot.
    736  *
    737  * Return 0 on success or an errno on failure.
    738  */
    739 int
    740 lzc_rollback(const char *fsname, char *snapnamebuf, int snapnamelen)
    741 {
    742 	nvlist_t *args;
    743 	nvlist_t *result;
    744 	int err;
    745 
    746 	args = fnvlist_alloc();
    747 	err = lzc_ioctl(ZFS_IOC_ROLLBACK, fsname, args, &result);
    748 	nvlist_free(args);
    749 	if (err == 0 && snapnamebuf != NULL) {
    750 		const char *snapname = fnvlist_lookup_string(result, "target");
    751 		(void) strlcpy(snapnamebuf, snapname, snapnamelen);
    752 	}
    753 	return (err);
    754 }
    755 
    756 /*
    757  * Creates bookmarks.
    758  *
    759  * The bookmarks nvlist maps from name of the bookmark (e.g. "pool/fs#bmark") to
    760  * the name of the snapshot (e.g. "pool/fs@snap").  All the bookmarks and
    761  * snapshots must be in the same pool.
    762  *
    763  * The returned results nvlist will have an entry for each bookmark that failed.
    764  * The value will be the (int32) error code.
    765  *
    766  * The return value will be 0 if all bookmarks were created, otherwise it will
    767  * be the errno of a (undetermined) bookmarks that failed.
    768  */
    769 int
    770 lzc_bookmark(nvlist_t *bookmarks, nvlist_t **errlist)
    771 {
    772 	nvpair_t *elem;
    773 	int error;
    774 	char pool[ZFS_MAX_DATASET_NAME_LEN];
    775 
    776 	/* determine the pool name */
    777 	elem = nvlist_next_nvpair(bookmarks, NULL);
    778 	if (elem == NULL)
    779 		return (0);
    780 	(void) strlcpy(pool, nvpair_name(elem), sizeof (pool));
    781 	pool[strcspn(pool, "/#")] = '\0';
    782 
    783 	error = lzc_ioctl(ZFS_IOC_BOOKMARK, pool, bookmarks, errlist);
    784 
    785 	return (error);
    786 }
    787 
    788 /*
    789  * Retrieve bookmarks.
    790  *
    791  * Retrieve the list of bookmarks for the given file system. The props
    792  * parameter is an nvlist of property names (with no values) that will be
    793  * returned for each bookmark.
    794  *
    795  * The following are valid properties on bookmarks, all of which are numbers
    796  * (represented as uint64 in the nvlist)
    797  *
    798  * "guid" - globally unique identifier of the snapshot it refers to
    799  * "createtxg" - txg when the snapshot it refers to was created
    800  * "creation" - timestamp when the snapshot it refers to was created
    801  *
    802  * The format of the returned nvlist as follows:
    803  * <short name of bookmark> -> {
    804  *     <name of property> -> {
    805  *         "value" -> uint64
    806  *     }
    807  *  }
    808  */
    809 int
    810 lzc_get_bookmarks(const char *fsname, nvlist_t *props, nvlist_t **bmarks)
    811 {
    812 	return (lzc_ioctl(ZFS_IOC_GET_BOOKMARKS, fsname, props, bmarks));
    813 }
    814 
    815 /*
    816  * Destroys bookmarks.
    817  *
    818  * The keys in the bmarks nvlist are the bookmarks to be destroyed.
    819  * They must all be in the same pool.  Bookmarks are specified as
    820  * <fs>#<bmark>.
    821  *
    822  * Bookmarks that do not exist will be silently ignored.
    823  *
    824  * The return value will be 0 if all bookmarks that existed were destroyed.
    825  *
    826  * Otherwise the return value will be the errno of a (undetermined) bookmark
    827  * that failed, no bookmarks will be destroyed, and the errlist will have an
    828  * entry for each bookmarks that failed.  The value in the errlist will be
    829  * the (int32) error code.
    830  */
    831 int
    832 lzc_destroy_bookmarks(nvlist_t *bmarks, nvlist_t **errlist)
    833 {
    834 	nvpair_t *elem;
    835 	int error;
    836 	char pool[ZFS_MAX_DATASET_NAME_LEN];
    837 
    838 	/* determine the pool name */
    839 	elem = nvlist_next_nvpair(bmarks, NULL);
    840 	if (elem == NULL)
    841 		return (0);
    842 	(void) strlcpy(pool, nvpair_name(elem), sizeof (pool));
    843 	pool[strcspn(pool, "/#")] = '\0';
    844 
    845 	error = lzc_ioctl(ZFS_IOC_DESTROY_BOOKMARKS, pool, bmarks, errlist);
    846 
    847 	return (error);
    848 }
    849