Home | History | Annotate | Line # | Download | only in zfs
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 /*
     22  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
     23  * Use is subject to license terms.
     24  */
     25 /*
     26  * Copyright (c) 2012 by Delphix. All rights reserved.
     27  */
     28 
     29 /*
     30  * This file contains the code to implement file range locking in
     31  * ZFS, although there isn't much specific to ZFS (all that comes to mind is
     32  * support for growing the blocksize).
     33  *
     34  * Interface
     35  * ---------
     36  * Defined in zfs_rlock.h but essentially:
     37  *	rl = zfs_range_lock(zp, off, len, lock_type);
     38  *	zfs_range_unlock(rl);
     39  *	zfs_range_reduce(rl, off, len);
     40  *
     41  * AVL tree
     42  * --------
     43  * An AVL tree is used to maintain the state of the existing ranges
     44  * that are locked for exclusive (writer) or shared (reader) use.
     45  * The starting range offset is used for searching and sorting the tree.
     46  *
     47  * Common case
     48  * -----------
     49  * The (hopefully) usual case is of no overlaps or contention for
     50  * locks. On entry to zfs_lock_range() a rl_t is allocated; the tree
     51  * searched that finds no overlap, and *this* rl_t is placed in the tree.
     52  *
     53  * Overlaps/Reference counting/Proxy locks
     54  * ---------------------------------------
     55  * The avl code only allows one node at a particular offset. Also it's very
     56  * inefficient to search through all previous entries looking for overlaps
     57  * (because the very 1st in the ordered list might be at offset 0 but
     58  * cover the whole file).
     59  * So this implementation uses reference counts and proxy range locks.
     60  * Firstly, only reader locks use reference counts and proxy locks,
     61  * because writer locks are exclusive.
     62  * When a reader lock overlaps with another then a proxy lock is created
     63  * for that range and replaces the original lock. If the overlap
     64  * is exact then the reference count of the proxy is simply incremented.
     65  * Otherwise, the proxy lock is split into smaller lock ranges and
     66  * new proxy locks created for non overlapping ranges.
     67  * The reference counts are adjusted accordingly.
     68  * Meanwhile, the orginal lock is kept around (this is the callers handle)
     69  * and its offset and length are used when releasing the lock.
     70  *
     71  * Thread coordination
     72  * -------------------
     73  * In order to make wakeups efficient and to ensure multiple continuous
     74  * readers on a range don't starve a writer for the same range lock,
     75  * two condition variables are allocated in each rl_t.
     76  * If a writer (or reader) can't get a range it initialises the writer
     77  * (or reader) cv; sets a flag saying there's a writer (or reader) waiting;
     78  * and waits on that cv. When a thread unlocks that range it wakes up all
     79  * writers then all readers before destroying the lock.
     80  *
     81  * Append mode writes
     82  * ------------------
     83  * Append mode writes need to lock a range at the end of a file.
     84  * The offset of the end of the file is determined under the
     85  * range locking mutex, and the lock type converted from RL_APPEND to
     86  * RL_WRITER and the range locked.
     87  *
     88  * Grow block handling
     89  * -------------------
     90  * ZFS supports multiple block sizes currently upto 128K. The smallest
     91  * block size is used for the file which is grown as needed. During this
     92  * growth all other writers and readers must be excluded.
     93  * So if the block size needs to be grown then the whole file is
     94  * exclusively locked, then later the caller will reduce the lock
     95  * range to just the range to be written using zfs_reduce_range.
     96  */
     97 
     98 #include <sys/zfs_rlock.h>
     99 
    100 /*
    101  * Check if a write lock can be grabbed, or wait and recheck until available.
    102  */
    103 static boolean_t
    104 zfs_range_lock_writer(znode_t *zp, rl_t *new, boolean_t nonblock)
    105 {
    106 	avl_tree_t *tree = &zp->z_range_avl;
    107 	rl_t *rl;
    108 	avl_index_t where;
    109 	uint64_t end_size;
    110 	uint64_t off = new->r_off;
    111 	uint64_t len = new->r_len;
    112 
    113 	for (;;) {
    114 		/*
    115 		 * Range locking is also used by zvol and uses a
    116 		 * dummied up znode. However, for zvol, we don't need to
    117 		 * append or grow blocksize, and besides we don't have
    118 		 * a "sa" data or z_zfsvfs - so skip that processing.
    119 		 *
    120 		 * Yes, this is ugly, and would be solved by not handling
    121 		 * grow or append in range lock code. If that was done then
    122 		 * we could make the range locking code generically available
    123 		 * to other non-zfs consumers.
    124 		 */
    125 		if (zp->z_vnode) { /* caller is ZPL */
    126 			/*
    127 			 * If in append mode pick up the current end of file.
    128 			 * This is done under z_range_lock to avoid races.
    129 			 */
    130 			if (new->r_type == RL_APPEND)
    131 				new->r_off = zp->z_size;
    132 
    133 			/*
    134 			 * If we need to grow the block size then grab the whole
    135 			 * file range. This is also done under z_range_lock to
    136 			 * avoid races.
    137 			 */
    138 			end_size = MAX(zp->z_size, new->r_off + len);
    139 			if (end_size > zp->z_blksz && (!ISP2(zp->z_blksz) ||
    140 			    zp->z_blksz < zp->z_zfsvfs->z_max_blksz)) {
    141 				new->r_off = 0;
    142 				new->r_len = UINT64_MAX;
    143 			}
    144 		}
    145 
    146 		/*
    147 		 * First check for the usual case of no locks
    148 		 */
    149 		if (avl_numnodes(tree) == 0) {
    150 			new->r_type = RL_WRITER; /* convert to writer */
    151 			avl_add(tree, new);
    152 			return (B_TRUE);
    153 		}
    154 
    155 		/*
    156 		 * Look for any locks in the range.
    157 		 */
    158 		rl = avl_find(tree, new, &where);
    159 		if (rl)
    160 			goto wait; /* already locked at same offset */
    161 
    162 		rl = (rl_t *)avl_nearest(tree, where, AVL_AFTER);
    163 		if (rl && (rl->r_off < new->r_off + new->r_len))
    164 			goto wait;
    165 
    166 		rl = (rl_t *)avl_nearest(tree, where, AVL_BEFORE);
    167 		if (rl && rl->r_off + rl->r_len > new->r_off)
    168 			goto wait;
    169 
    170 		new->r_type = RL_WRITER; /* convert possible RL_APPEND */
    171 		avl_insert(tree, new, where);
    172 		return (B_TRUE);
    173 wait:
    174 		if (nonblock)
    175 			return (B_FALSE);
    176 		if (!rl->r_write_wanted) {
    177 			cv_init(&rl->r_wr_cv, NULL, CV_DEFAULT, NULL);
    178 			rl->r_write_wanted = B_TRUE;
    179 		}
    180 		cv_wait(&rl->r_wr_cv, &zp->z_range_lock);
    181 
    182 		/* reset to original */
    183 		new->r_off = off;
    184 		new->r_len = len;
    185 	}
    186 }
    187 
    188 /*
    189  * If this is an original (non-proxy) lock then replace it by
    190  * a proxy and return the proxy.
    191  */
    192 static rl_t *
    193 zfs_range_proxify(avl_tree_t *tree, rl_t *rl)
    194 {
    195 	rl_t *proxy;
    196 
    197 	if (rl->r_proxy)
    198 		return (rl); /* already a proxy */
    199 
    200 	ASSERT3U(rl->r_cnt, ==, 1);
    201 	ASSERT(rl->r_write_wanted == B_FALSE);
    202 	ASSERT(rl->r_read_wanted == B_FALSE);
    203 	avl_remove(tree, rl);
    204 	rl->r_cnt = 0;
    205 
    206 	/* create a proxy range lock */
    207 	proxy = kmem_alloc(sizeof (rl_t), KM_SLEEP);
    208 	proxy->r_off = rl->r_off;
    209 	proxy->r_len = rl->r_len;
    210 	proxy->r_cnt = 1;
    211 	proxy->r_type = RL_READER;
    212 	proxy->r_proxy = B_TRUE;
    213 	proxy->r_write_wanted = B_FALSE;
    214 	proxy->r_read_wanted = B_FALSE;
    215 	avl_add(tree, proxy);
    216 
    217 	return (proxy);
    218 }
    219 
    220 /*
    221  * Split the range lock at the supplied offset
    222  * returning the *front* proxy.
    223  */
    224 static rl_t *
    225 zfs_range_split(avl_tree_t *tree, rl_t *rl, uint64_t off)
    226 {
    227 	rl_t *front, *rear;
    228 
    229 	ASSERT3U(rl->r_len, >, 1);
    230 	ASSERT3U(off, >, rl->r_off);
    231 	ASSERT3U(off, <, rl->r_off + rl->r_len);
    232 	ASSERT(rl->r_write_wanted == B_FALSE);
    233 	ASSERT(rl->r_read_wanted == B_FALSE);
    234 
    235 	/* create the rear proxy range lock */
    236 	rear = kmem_alloc(sizeof (rl_t), KM_SLEEP);
    237 	rear->r_off = off;
    238 	rear->r_len = rl->r_off + rl->r_len - off;
    239 	rear->r_cnt = rl->r_cnt;
    240 	rear->r_type = RL_READER;
    241 	rear->r_proxy = B_TRUE;
    242 	rear->r_write_wanted = B_FALSE;
    243 	rear->r_read_wanted = B_FALSE;
    244 
    245 	front = zfs_range_proxify(tree, rl);
    246 	front->r_len = off - rl->r_off;
    247 
    248 	avl_insert_here(tree, rear, front, AVL_AFTER);
    249 	return (front);
    250 }
    251 
    252 /*
    253  * Create and add a new proxy range lock for the supplied range.
    254  */
    255 static void
    256 zfs_range_new_proxy(avl_tree_t *tree, uint64_t off, uint64_t len)
    257 {
    258 	rl_t *rl;
    259 
    260 	ASSERT(len);
    261 	rl = kmem_alloc(sizeof (rl_t), KM_SLEEP);
    262 	rl->r_off = off;
    263 	rl->r_len = len;
    264 	rl->r_cnt = 1;
    265 	rl->r_type = RL_READER;
    266 	rl->r_proxy = B_TRUE;
    267 	rl->r_write_wanted = B_FALSE;
    268 	rl->r_read_wanted = B_FALSE;
    269 	avl_add(tree, rl);
    270 }
    271 
    272 static void
    273 zfs_range_add_reader(avl_tree_t *tree, rl_t *new, rl_t *prev, avl_index_t where)
    274 {
    275 	rl_t *next;
    276 	uint64_t off = new->r_off;
    277 	uint64_t len = new->r_len;
    278 
    279 	/*
    280 	 * prev arrives either:
    281 	 * - pointing to an entry at the same offset
    282 	 * - pointing to the entry with the closest previous offset whose
    283 	 *   range may overlap with the new range
    284 	 * - null, if there were no ranges starting before the new one
    285 	 */
    286 	if (prev) {
    287 		if (prev->r_off + prev->r_len <= off) {
    288 			prev = NULL;
    289 		} else if (prev->r_off != off) {
    290 			/*
    291 			 * convert to proxy if needed then
    292 			 * split this entry and bump ref count
    293 			 */
    294 			prev = zfs_range_split(tree, prev, off);
    295 			prev = AVL_NEXT(tree, prev); /* move to rear range */
    296 		}
    297 	}
    298 	ASSERT((prev == NULL) || (prev->r_off == off));
    299 
    300 	if (prev)
    301 		next = prev;
    302 	else
    303 		next = (rl_t *)avl_nearest(tree, where, AVL_AFTER);
    304 
    305 	if (next == NULL || off + len <= next->r_off) {
    306 		/* no overlaps, use the original new rl_t in the tree */
    307 		avl_insert(tree, new, where);
    308 		return;
    309 	}
    310 
    311 	if (off < next->r_off) {
    312 		/* Add a proxy for initial range before the overlap */
    313 		zfs_range_new_proxy(tree, off, next->r_off - off);
    314 	}
    315 
    316 	new->r_cnt = 0; /* will use proxies in tree */
    317 	/*
    318 	 * We now search forward through the ranges, until we go past the end
    319 	 * of the new range. For each entry we make it a proxy if it
    320 	 * isn't already, then bump its reference count. If there's any
    321 	 * gaps between the ranges then we create a new proxy range.
    322 	 */
    323 	for (prev = NULL; next; prev = next, next = AVL_NEXT(tree, next)) {
    324 		if (off + len <= next->r_off)
    325 			break;
    326 		if (prev && prev->r_off + prev->r_len < next->r_off) {
    327 			/* there's a gap */
    328 			ASSERT3U(next->r_off, >, prev->r_off + prev->r_len);
    329 			zfs_range_new_proxy(tree, prev->r_off + prev->r_len,
    330 			    next->r_off - (prev->r_off + prev->r_len));
    331 		}
    332 		if (off + len == next->r_off + next->r_len) {
    333 			/* exact overlap with end */
    334 			next = zfs_range_proxify(tree, next);
    335 			next->r_cnt++;
    336 			return;
    337 		}
    338 		if (off + len < next->r_off + next->r_len) {
    339 			/* new range ends in the middle of this block */
    340 			next = zfs_range_split(tree, next, off + len);
    341 			next->r_cnt++;
    342 			return;
    343 		}
    344 		ASSERT3U(off + len, >, next->r_off + next->r_len);
    345 		next = zfs_range_proxify(tree, next);
    346 		next->r_cnt++;
    347 	}
    348 
    349 	/* Add the remaining end range. */
    350 	zfs_range_new_proxy(tree, prev->r_off + prev->r_len,
    351 	    (off + len) - (prev->r_off + prev->r_len));
    352 }
    353 
    354 /*
    355  * Check if a reader lock can be grabbed, or wait and recheck until available.
    356  */
    357 static void
    358 zfs_range_lock_reader(znode_t *zp, rl_t *new)
    359 {
    360 	avl_tree_t *tree = &zp->z_range_avl;
    361 	rl_t *prev, *next;
    362 	avl_index_t where;
    363 	uint64_t off = new->r_off;
    364 	uint64_t len = new->r_len;
    365 
    366 	/*
    367 	 * Look for any writer locks in the range.
    368 	 */
    369 retry:
    370 	prev = avl_find(tree, new, &where);
    371 	if (prev == NULL)
    372 		prev = (rl_t *)avl_nearest(tree, where, AVL_BEFORE);
    373 
    374 	/*
    375 	 * Check the previous range for a writer lock overlap.
    376 	 */
    377 	if (prev && (off < prev->r_off + prev->r_len)) {
    378 		if ((prev->r_type == RL_WRITER) || (prev->r_write_wanted)) {
    379 			if (!prev->r_read_wanted) {
    380 				cv_init(&prev->r_rd_cv, NULL, CV_DEFAULT, NULL);
    381 				prev->r_read_wanted = B_TRUE;
    382 			}
    383 			cv_wait(&prev->r_rd_cv, &zp->z_range_lock);
    384 			goto retry;
    385 		}
    386 		if (off + len < prev->r_off + prev->r_len)
    387 			goto got_lock;
    388 	}
    389 
    390 	/*
    391 	 * Search through the following ranges to see if there's
    392 	 * write lock any overlap.
    393 	 */
    394 	if (prev)
    395 		next = AVL_NEXT(tree, prev);
    396 	else
    397 		next = (rl_t *)avl_nearest(tree, where, AVL_AFTER);
    398 	for (; next; next = AVL_NEXT(tree, next)) {
    399 		if (off + len <= next->r_off)
    400 			goto got_lock;
    401 		if ((next->r_type == RL_WRITER) || (next->r_write_wanted)) {
    402 			if (!next->r_read_wanted) {
    403 				cv_init(&next->r_rd_cv, NULL, CV_DEFAULT, NULL);
    404 				next->r_read_wanted = B_TRUE;
    405 			}
    406 			cv_wait(&next->r_rd_cv, &zp->z_range_lock);
    407 			goto retry;
    408 		}
    409 		if (off + len <= next->r_off + next->r_len)
    410 			goto got_lock;
    411 	}
    412 
    413 got_lock:
    414 	/*
    415 	 * Add the read lock, which may involve splitting existing
    416 	 * locks and bumping ref counts (r_cnt).
    417 	 */
    418 	zfs_range_add_reader(tree, new, prev, where);
    419 }
    420 
    421 /*
    422  * Lock a range (offset, length) as either shared (RL_READER)
    423  * or exclusive (RL_WRITER). Returns the range lock structure
    424  * for later unlocking or reduce range (if entire file
    425  * previously locked as RL_WRITER).
    426  */
    427 rl_t *
    428 zfs_range_lock_impl(znode_t *zp, uint64_t off, uint64_t len, rl_type_t type,
    429 	boolean_t nonblock)
    430 {
    431 	rl_t *new;
    432 
    433 	ASSERT(type == RL_READER || type == RL_WRITER || type == RL_APPEND);
    434 
    435 	if (nonblock) {
    436 		new = kmem_alloc(sizeof (rl_t), KM_NOSLEEP);
    437 		if (new == NULL) {
    438 			return NULL;
    439 		}
    440 	} else {
    441 		new = kmem_alloc(sizeof (rl_t), KM_SLEEP);
    442 	}
    443 	new->r_zp = zp;
    444 	new->r_off = off;
    445 	if (len + off < off)	/* overflow */
    446 		len = UINT64_MAX - off;
    447 	new->r_len = len;
    448 	new->r_cnt = 1; /* assume it's going to be in the tree */
    449 	new->r_type = type;
    450 	new->r_proxy = B_FALSE;
    451 	new->r_write_wanted = B_FALSE;
    452 	new->r_read_wanted = B_FALSE;
    453 
    454 	mutex_enter(&zp->z_range_lock);
    455 	if (type == RL_READER) {
    456 		ASSERT(!nonblock); /* XXXNETBSD not implemented */
    457 		/*
    458 		 * First check for the usual case of no locks
    459 		 */
    460 		if (avl_numnodes(&zp->z_range_avl) == 0)
    461 			avl_add(&zp->z_range_avl, new);
    462 		else
    463 			zfs_range_lock_reader(zp, new);
    464 	} else {
    465 		/* RL_WRITER or RL_APPEND */
    466 		if (!zfs_range_lock_writer(zp, new, nonblock)) {
    467 			kmem_free(new, sizeof (*new));
    468 			new = NULL;
    469 		}
    470 	}
    471 	mutex_exit(&zp->z_range_lock);
    472 	return (new);
    473 }
    474 
    475 rl_t *
    476 zfs_range_lock(znode_t *zp, uint64_t off, uint64_t len, rl_type_t type)
    477 {
    478 	return zfs_range_lock_impl(zp, off, len, type, B_FALSE);
    479 }
    480 
    481 rl_t *
    482 zfs_range_lock_try(znode_t *zp, uint64_t off, uint64_t len, rl_type_t type)
    483 {
    484 	return zfs_range_lock_impl(zp, off, len, type, B_TRUE);
    485 }
    486 
    487 /*
    488  * Unlock a reader lock
    489  */
    490 static void
    491 zfs_range_unlock_reader(znode_t *zp, rl_t *remove)
    492 {
    493 	avl_tree_t *tree = &zp->z_range_avl;
    494 	rl_t *rl, *next = NULL;
    495 	uint64_t len;
    496 
    497 	/*
    498 	 * The common case is when the remove entry is in the tree
    499 	 * (cnt == 1) meaning there's been no other reader locks overlapping
    500 	 * with this one. Otherwise the remove entry will have been
    501 	 * removed from the tree and replaced by proxies (one or
    502 	 * more ranges mapping to the entire range).
    503 	 */
    504 	if (remove->r_cnt == 1) {
    505 		avl_remove(tree, remove);
    506 		if (remove->r_write_wanted) {
    507 			cv_broadcast(&remove->r_wr_cv);
    508 			cv_destroy(&remove->r_wr_cv);
    509 		}
    510 		if (remove->r_read_wanted) {
    511 			cv_broadcast(&remove->r_rd_cv);
    512 			cv_destroy(&remove->r_rd_cv);
    513 		}
    514 	} else {
    515 		ASSERT0(remove->r_cnt);
    516 		ASSERT0(remove->r_write_wanted);
    517 		ASSERT0(remove->r_read_wanted);
    518 		/*
    519 		 * Find start proxy representing this reader lock,
    520 		 * then decrement ref count on all proxies
    521 		 * that make up this range, freeing them as needed.
    522 		 */
    523 		rl = avl_find(tree, remove, NULL);
    524 		ASSERT(rl);
    525 		ASSERT(rl->r_cnt);
    526 		ASSERT(rl->r_type == RL_READER);
    527 		for (len = remove->r_len; len != 0; rl = next) {
    528 			len -= rl->r_len;
    529 			if (len) {
    530 				next = AVL_NEXT(tree, rl);
    531 				ASSERT(next);
    532 				ASSERT(rl->r_off + rl->r_len == next->r_off);
    533 				ASSERT(next->r_cnt);
    534 				ASSERT(next->r_type == RL_READER);
    535 			}
    536 			rl->r_cnt--;
    537 			if (rl->r_cnt == 0) {
    538 				avl_remove(tree, rl);
    539 				if (rl->r_write_wanted) {
    540 					cv_broadcast(&rl->r_wr_cv);
    541 					cv_destroy(&rl->r_wr_cv);
    542 				}
    543 				if (rl->r_read_wanted) {
    544 					cv_broadcast(&rl->r_rd_cv);
    545 					cv_destroy(&rl->r_rd_cv);
    546 				}
    547 				kmem_free(rl, sizeof (rl_t));
    548 			}
    549 		}
    550 	}
    551 	kmem_free(remove, sizeof (rl_t));
    552 }
    553 
    554 /*
    555  * Unlock range and destroy range lock structure.
    556  */
    557 void
    558 zfs_range_unlock(rl_t *rl)
    559 {
    560 	znode_t *zp = rl->r_zp;
    561 
    562 	ASSERT(rl->r_type == RL_WRITER || rl->r_type == RL_READER);
    563 	ASSERT(rl->r_cnt == 1 || rl->r_cnt == 0);
    564 	ASSERT(!rl->r_proxy);
    565 
    566 	mutex_enter(&zp->z_range_lock);
    567 	if (rl->r_type == RL_WRITER) {
    568 		/* writer locks can't be shared or split */
    569 		avl_remove(&zp->z_range_avl, rl);
    570 		mutex_exit(&zp->z_range_lock);
    571 		if (rl->r_write_wanted) {
    572 			cv_broadcast(&rl->r_wr_cv);
    573 			cv_destroy(&rl->r_wr_cv);
    574 		}
    575 		if (rl->r_read_wanted) {
    576 			cv_broadcast(&rl->r_rd_cv);
    577 			cv_destroy(&rl->r_rd_cv);
    578 		}
    579 		kmem_free(rl, sizeof (rl_t));
    580 	} else {
    581 		/*
    582 		 * lock may be shared, let zfs_range_unlock_reader()
    583 		 * release the lock and free the rl_t
    584 		 */
    585 		zfs_range_unlock_reader(zp, rl);
    586 		mutex_exit(&zp->z_range_lock);
    587 	}
    588 }
    589 
    590 /*
    591  * Reduce range locked as RL_WRITER from whole file to specified range.
    592  * Asserts the whole file is exclusivly locked and so there's only one
    593  * entry in the tree.
    594  */
    595 void
    596 zfs_range_reduce(rl_t *rl, uint64_t off, uint64_t len)
    597 {
    598 	znode_t *zp = rl->r_zp;
    599 
    600 	/* Ensure there are no other locks */
    601 	ASSERT(avl_numnodes(&zp->z_range_avl) == 1);
    602 	ASSERT(rl->r_off == 0);
    603 	ASSERT(rl->r_type == RL_WRITER);
    604 	ASSERT(!rl->r_proxy);
    605 	ASSERT3U(rl->r_len, ==, UINT64_MAX);
    606 	ASSERT3U(rl->r_cnt, ==, 1);
    607 
    608 	mutex_enter(&zp->z_range_lock);
    609 	rl->r_off = off;
    610 	rl->r_len = len;
    611 	mutex_exit(&zp->z_range_lock);
    612 	if (rl->r_write_wanted)
    613 		cv_broadcast(&rl->r_wr_cv);
    614 	if (rl->r_read_wanted)
    615 		cv_broadcast(&rl->r_rd_cv);
    616 }
    617 
    618 /*
    619  * AVL comparison function used to order range locks
    620  * Locks are ordered on the start offset of the range.
    621  */
    622 int
    623 zfs_range_compare(const void *arg1, const void *arg2)
    624 {
    625 	const rl_t *rl1 = arg1;
    626 	const rl_t *rl2 = arg2;
    627 
    628 	if (rl1->r_off > rl2->r_off)
    629 		return (1);
    630 	if (rl1->r_off < rl2->r_off)
    631 		return (-1);
    632 	return (0);
    633 }
    634