Home | History | Annotate | Line # | Download | only in zfs
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 /*
     22  * Copyright (c) 2012 Pawel Jakub Dawidek <pawel (at) dawidek.net>.
     23  * All rights reserved.
     24  */
     25 
     26 #include <sys/zfs_context.h>
     27 #include <sys/spa_impl.h>
     28 #include <sys/vdev_impl.h>
     29 #include <sys/trim_map.h>
     30 #include <sys/time.h>
     31 
     32 /*
     33  * Calculate the zio end, upgrading based on ashift which would be
     34  * done by zio_vdev_io_start.
     35  *
     36  * This makes free range consolidation much more effective
     37  * than it would otherwise be as well as ensuring that entire
     38  * blocks are invalidated by writes.
     39  */
     40 #define	TRIM_ZIO_END(vd, offset, size)	(offset +		\
     41  	P2ROUNDUP(size, 1ULL << vd->vdev_top->vdev_ashift))
     42 
     43 /* Maximal segment size for ATA TRIM. */
     44 #define TRIM_MAP_SIZE_FACTOR	(512 << 16)
     45 
     46 #define TRIM_MAP_SEGS(size)	(1 + (size) / TRIM_MAP_SIZE_FACTOR)
     47 
     48 #define TRIM_MAP_ADD(tm, ts)	do {				\
     49 	list_insert_tail(&(tm)->tm_head, (ts));			\
     50 	(tm)->tm_pending += TRIM_MAP_SEGS((ts)->ts_end - (ts)->ts_start); \
     51 } while (0)
     52 
     53 #define TRIM_MAP_REM(tm, ts)	do {				\
     54 	list_remove(&(tm)->tm_head, (ts));			\
     55 	(tm)->tm_pending -= TRIM_MAP_SEGS((ts)->ts_end - (ts)->ts_start); \
     56 } while (0)
     57 
     58 typedef struct trim_map {
     59 	list_t		tm_head;		/* List of segments sorted by txg. */
     60 	avl_tree_t	tm_queued_frees;	/* AVL tree of segments waiting for TRIM. */
     61 	avl_tree_t	tm_inflight_frees;	/* AVL tree of in-flight TRIMs. */
     62 	avl_tree_t	tm_inflight_writes;	/* AVL tree of in-flight writes. */
     63 	list_t		tm_pending_writes;	/* Writes blocked on in-flight frees. */
     64 	kmutex_t	tm_lock;
     65 	uint64_t	tm_pending;		/* Count of pending TRIMs. */
     66 } trim_map_t;
     67 
     68 typedef struct trim_seg {
     69 	avl_node_t	ts_node;	/* AVL node. */
     70 	list_node_t	ts_next;	/* List element. */
     71 	uint64_t	ts_start;	/* Starting offset of this segment. */
     72 	uint64_t	ts_end;		/* Ending offset (non-inclusive). */
     73 	uint64_t	ts_txg;		/* Segment creation txg. */
     74 	hrtime_t	ts_time;	/* Segment creation time. */
     75 } trim_seg_t;
     76 
     77 extern boolean_t zfs_trim_enabled;
     78 
     79 static u_int trim_txg_delay = 32;	/* Keep deleted data up to 32 TXG */
     80 static u_int trim_timeout = 30;		/* Keep deleted data up to 30s */
     81 static u_int trim_max_interval = 1;	/* 1s delays between TRIMs */
     82 static u_int trim_vdev_max_pending = 10000; /* Keep up to 10K segments */
     83 
     84 SYSCTL_DECL(_vfs_zfs);
     85 SYSCTL_NODE(_vfs_zfs, OID_AUTO, trim, CTLFLAG_RD, 0, "ZFS TRIM");
     86 
     87 SYSCTL_UINT(_vfs_zfs_trim, OID_AUTO, txg_delay, CTLFLAG_RWTUN, &trim_txg_delay,
     88     0, "Delay TRIMs by up to this many TXGs");
     89 SYSCTL_UINT(_vfs_zfs_trim, OID_AUTO, timeout, CTLFLAG_RWTUN, &trim_timeout, 0,
     90     "Delay TRIMs by up to this many seconds");
     91 SYSCTL_UINT(_vfs_zfs_trim, OID_AUTO, max_interval, CTLFLAG_RWTUN,
     92     &trim_max_interval, 0,
     93     "Maximum interval between TRIM queue processing (seconds)");
     94 
     95 SYSCTL_DECL(_vfs_zfs_vdev);
     96 SYSCTL_UINT(_vfs_zfs_vdev, OID_AUTO, trim_max_pending, CTLFLAG_RWTUN,
     97     &trim_vdev_max_pending, 0,
     98     "Maximum pending TRIM segments for a vdev");
     99 
    100 static void trim_map_vdev_commit_done(spa_t *spa, vdev_t *vd);
    101 
    102 static int
    103 trim_map_seg_compare(const void *x1, const void *x2)
    104 {
    105 	const trim_seg_t *s1 = x1;
    106 	const trim_seg_t *s2 = x2;
    107 
    108 	if (s1->ts_start < s2->ts_start) {
    109 		if (s1->ts_end > s2->ts_start)
    110 			return (0);
    111 		return (-1);
    112 	}
    113 	if (s1->ts_start > s2->ts_start) {
    114 		if (s1->ts_start < s2->ts_end)
    115 			return (0);
    116 		return (1);
    117 	}
    118 	return (0);
    119 }
    120 
    121 static int
    122 trim_map_zio_compare(const void *x1, const void *x2)
    123 {
    124 	const zio_t *z1 = x1;
    125 	const zio_t *z2 = x2;
    126 
    127 	if (z1->io_offset < z2->io_offset) {
    128 		if (z1->io_offset + z1->io_size > z2->io_offset)
    129 			return (0);
    130 		return (-1);
    131 	}
    132 	if (z1->io_offset > z2->io_offset) {
    133 		if (z1->io_offset < z2->io_offset + z2->io_size)
    134 			return (0);
    135 		return (1);
    136 	}
    137 	return (0);
    138 }
    139 
    140 void
    141 trim_map_create(vdev_t *vd)
    142 {
    143 	trim_map_t *tm;
    144 
    145 	ASSERT(zfs_trim_enabled && !vd->vdev_notrim &&
    146 		vd->vdev_ops->vdev_op_leaf);
    147 
    148 	tm = kmem_zalloc(sizeof (*tm), KM_SLEEP);
    149 	mutex_init(&tm->tm_lock, NULL, MUTEX_DEFAULT, NULL);
    150 	list_create(&tm->tm_head, sizeof (trim_seg_t),
    151 	    offsetof(trim_seg_t, ts_next));
    152 	list_create(&tm->tm_pending_writes, sizeof (zio_t),
    153 	    offsetof(zio_t, io_trim_link));
    154 	avl_create(&tm->tm_queued_frees, trim_map_seg_compare,
    155 	    sizeof (trim_seg_t), offsetof(trim_seg_t, ts_node));
    156 	avl_create(&tm->tm_inflight_frees, trim_map_seg_compare,
    157 	    sizeof (trim_seg_t), offsetof(trim_seg_t, ts_node));
    158 	avl_create(&tm->tm_inflight_writes, trim_map_zio_compare,
    159 	    sizeof (zio_t), offsetof(zio_t, io_trim_node));
    160 	vd->vdev_trimmap = tm;
    161 }
    162 
    163 void
    164 trim_map_destroy(vdev_t *vd)
    165 {
    166 	trim_map_t *tm;
    167 	trim_seg_t *ts;
    168 
    169 	ASSERT(vd->vdev_ops->vdev_op_leaf);
    170 
    171 	if (!zfs_trim_enabled)
    172 		return;
    173 
    174 	tm = vd->vdev_trimmap;
    175 	if (tm == NULL)
    176 		return;
    177 
    178 	/*
    179 	 * We may have been called before trim_map_vdev_commit_done()
    180 	 * had a chance to run, so do it now to prune the remaining
    181 	 * inflight frees.
    182 	 */
    183 	trim_map_vdev_commit_done(vd->vdev_spa, vd);
    184 
    185 	mutex_enter(&tm->tm_lock);
    186 	while ((ts = list_head(&tm->tm_head)) != NULL) {
    187 		avl_remove(&tm->tm_queued_frees, ts);
    188 		TRIM_MAP_REM(tm, ts);
    189 		kmem_free(ts, sizeof (*ts));
    190 	}
    191 	mutex_exit(&tm->tm_lock);
    192 
    193 	avl_destroy(&tm->tm_queued_frees);
    194 	avl_destroy(&tm->tm_inflight_frees);
    195 	avl_destroy(&tm->tm_inflight_writes);
    196 	list_destroy(&tm->tm_pending_writes);
    197 	list_destroy(&tm->tm_head);
    198 	mutex_destroy(&tm->tm_lock);
    199 	kmem_free(tm, sizeof (*tm));
    200 	vd->vdev_trimmap = NULL;
    201 }
    202 
    203 static void
    204 trim_map_segment_add(trim_map_t *tm, uint64_t start, uint64_t end, uint64_t txg)
    205 {
    206 	avl_index_t where;
    207 	trim_seg_t tsearch, *ts_before, *ts_after, *ts;
    208 	boolean_t merge_before, merge_after;
    209 	hrtime_t time;
    210 
    211 	ASSERT(MUTEX_HELD(&tm->tm_lock));
    212 	VERIFY(start < end);
    213 
    214 	time = gethrtime();
    215 	tsearch.ts_start = start;
    216 	tsearch.ts_end = end;
    217 
    218 	ts = avl_find(&tm->tm_queued_frees, &tsearch, &where);
    219 	if (ts != NULL) {
    220 		if (start < ts->ts_start)
    221 			trim_map_segment_add(tm, start, ts->ts_start, txg);
    222 		if (end > ts->ts_end)
    223 			trim_map_segment_add(tm, ts->ts_end, end, txg);
    224 		return;
    225 	}
    226 
    227 	ts_before = avl_nearest(&tm->tm_queued_frees, where, AVL_BEFORE);
    228 	ts_after = avl_nearest(&tm->tm_queued_frees, where, AVL_AFTER);
    229 
    230 	merge_before = (ts_before != NULL && ts_before->ts_end == start);
    231 	merge_after = (ts_after != NULL && ts_after->ts_start == end);
    232 
    233 	if (merge_before && merge_after) {
    234 		avl_remove(&tm->tm_queued_frees, ts_before);
    235 		TRIM_MAP_REM(tm, ts_before);
    236 		TRIM_MAP_REM(tm, ts_after);
    237 		ts_after->ts_start = ts_before->ts_start;
    238 		ts_after->ts_txg = txg;
    239 		ts_after->ts_time = time;
    240 		TRIM_MAP_ADD(tm, ts_after);
    241 		kmem_free(ts_before, sizeof (*ts_before));
    242 	} else if (merge_before) {
    243 		TRIM_MAP_REM(tm, ts_before);
    244 		ts_before->ts_end = end;
    245 		ts_before->ts_txg = txg;
    246 		ts_before->ts_time = time;
    247 		TRIM_MAP_ADD(tm, ts_before);
    248 	} else if (merge_after) {
    249 		TRIM_MAP_REM(tm, ts_after);
    250 		ts_after->ts_start = start;
    251 		ts_after->ts_txg = txg;
    252 		ts_after->ts_time = time;
    253 		TRIM_MAP_ADD(tm, ts_after);
    254 	} else {
    255 		ts = kmem_alloc(sizeof (*ts), KM_SLEEP);
    256 		ts->ts_start = start;
    257 		ts->ts_end = end;
    258 		ts->ts_txg = txg;
    259 		ts->ts_time = time;
    260 		avl_insert(&tm->tm_queued_frees, ts, where);
    261 		TRIM_MAP_ADD(tm, ts);
    262 	}
    263 }
    264 
    265 static void
    266 trim_map_segment_remove(trim_map_t *tm, trim_seg_t *ts, uint64_t start,
    267     uint64_t end)
    268 {
    269 	trim_seg_t *nts;
    270 	boolean_t left_over, right_over;
    271 
    272 	ASSERT(MUTEX_HELD(&tm->tm_lock));
    273 
    274 	left_over = (ts->ts_start < start);
    275 	right_over = (ts->ts_end > end);
    276 
    277 	TRIM_MAP_REM(tm, ts);
    278 	if (left_over && right_over) {
    279 		nts = kmem_alloc(sizeof (*nts), KM_SLEEP);
    280 		nts->ts_start = end;
    281 		nts->ts_end = ts->ts_end;
    282 		nts->ts_txg = ts->ts_txg;
    283 		nts->ts_time = ts->ts_time;
    284 		ts->ts_end = start;
    285 		avl_insert_here(&tm->tm_queued_frees, nts, ts, AVL_AFTER);
    286 		TRIM_MAP_ADD(tm, ts);
    287 		TRIM_MAP_ADD(tm, nts);
    288 	} else if (left_over) {
    289 		ts->ts_end = start;
    290 		TRIM_MAP_ADD(tm, ts);
    291 	} else if (right_over) {
    292 		ts->ts_start = end;
    293 		TRIM_MAP_ADD(tm, ts);
    294 	} else {
    295 		avl_remove(&tm->tm_queued_frees, ts);
    296 		kmem_free(ts, sizeof (*ts));
    297 	}
    298 }
    299 
    300 static void
    301 trim_map_free_locked(trim_map_t *tm, uint64_t start, uint64_t end, uint64_t txg)
    302 {
    303 	zio_t zsearch, *zs;
    304 
    305 	ASSERT(MUTEX_HELD(&tm->tm_lock));
    306 
    307 	zsearch.io_offset = start;
    308 	zsearch.io_size = end - start;
    309 
    310 	zs = avl_find(&tm->tm_inflight_writes, &zsearch, NULL);
    311 	if (zs == NULL) {
    312 		trim_map_segment_add(tm, start, end, txg);
    313 		return;
    314 	}
    315 	if (start < zs->io_offset)
    316 		trim_map_free_locked(tm, start, zs->io_offset, txg);
    317 	if (zs->io_offset + zs->io_size < end)
    318 		trim_map_free_locked(tm, zs->io_offset + zs->io_size, end, txg);
    319 }
    320 
    321 void
    322 trim_map_free(vdev_t *vd, uint64_t offset, uint64_t size, uint64_t txg)
    323 {
    324 	trim_map_t *tm = vd->vdev_trimmap;
    325 
    326 	if (!zfs_trim_enabled || vd->vdev_notrim || tm == NULL)
    327 		return;
    328 
    329 	mutex_enter(&tm->tm_lock);
    330 	trim_map_free_locked(tm, offset, TRIM_ZIO_END(vd, offset, size), txg);
    331 	mutex_exit(&tm->tm_lock);
    332 }
    333 
    334 boolean_t
    335 trim_map_write_start(zio_t *zio)
    336 {
    337 	vdev_t *vd = zio->io_vd;
    338 	trim_map_t *tm = vd->vdev_trimmap;
    339 	trim_seg_t tsearch, *ts;
    340 	boolean_t left_over, right_over;
    341 	uint64_t start, end;
    342 
    343 	if (!zfs_trim_enabled || vd->vdev_notrim || tm == NULL)
    344 		return (B_TRUE);
    345 
    346 	start = zio->io_offset;
    347 	end = TRIM_ZIO_END(zio->io_vd, start, zio->io_size);
    348 	tsearch.ts_start = start;
    349 	tsearch.ts_end = end;
    350 
    351 	mutex_enter(&tm->tm_lock);
    352 
    353 	/*
    354 	 * Checking for colliding in-flight frees.
    355 	 */
    356 	ts = avl_find(&tm->tm_inflight_frees, &tsearch, NULL);
    357 	if (ts != NULL) {
    358 		list_insert_tail(&tm->tm_pending_writes, zio);
    359 		mutex_exit(&tm->tm_lock);
    360 		return (B_FALSE);
    361 	}
    362 
    363 	ts = avl_find(&tm->tm_queued_frees, &tsearch, NULL);
    364 	if (ts != NULL) {
    365 		/*
    366 		 * Loop until all overlapping segments are removed.
    367 		 */
    368 		do {
    369 			trim_map_segment_remove(tm, ts, start, end);
    370 			ts = avl_find(&tm->tm_queued_frees, &tsearch, NULL);
    371 		} while (ts != NULL);
    372 	}
    373 	avl_add(&tm->tm_inflight_writes, zio);
    374 
    375 	mutex_exit(&tm->tm_lock);
    376 
    377 	return (B_TRUE);
    378 }
    379 
    380 void
    381 trim_map_write_done(zio_t *zio)
    382 {
    383 	vdev_t *vd = zio->io_vd;
    384 	trim_map_t *tm = vd->vdev_trimmap;
    385 
    386 	/*
    387 	 * Don't check for vdev_notrim, since the write could have
    388 	 * started before vdev_notrim was set.
    389 	 */
    390 	if (!zfs_trim_enabled || tm == NULL)
    391 		return;
    392 
    393 	mutex_enter(&tm->tm_lock);
    394 	/*
    395 	 * Don't fail if the write isn't in the tree, since the write
    396 	 * could have started after vdev_notrim was set.
    397 	 */
    398 	if (zio->io_trim_node.avl_child[0] ||
    399 	    zio->io_trim_node.avl_child[1] ||
    400 	    AVL_XPARENT(&zio->io_trim_node) ||
    401 	    tm->tm_inflight_writes.avl_root == &zio->io_trim_node)
    402 		avl_remove(&tm->tm_inflight_writes, zio);
    403 	mutex_exit(&tm->tm_lock);
    404 }
    405 
    406 /*
    407  * Return the oldest segment (the one with the lowest txg / time) or NULL if:
    408  * 1. The list is empty
    409  * 2. The first element's txg is greater than txgsafe
    410  * 3. The first element's txg is not greater than the txg argument and the
    411  *    the first element's time is not greater than time argument
    412  */
    413 static trim_seg_t *
    414 trim_map_first(trim_map_t *tm, uint64_t txg, uint64_t txgsafe, hrtime_t time,
    415     boolean_t force)
    416 {
    417 	trim_seg_t *ts;
    418 
    419 	ASSERT(MUTEX_HELD(&tm->tm_lock));
    420 	VERIFY(txgsafe >= txg);
    421 
    422 	ts = list_head(&tm->tm_head);
    423 	if (ts != NULL && ts->ts_txg <= txgsafe &&
    424 	    (ts->ts_txg <= txg || ts->ts_time <= time || force))
    425 		return (ts);
    426 	return (NULL);
    427 }
    428 
    429 static void
    430 trim_map_vdev_commit(spa_t *spa, zio_t *zio, vdev_t *vd)
    431 {
    432 	trim_map_t *tm = vd->vdev_trimmap;
    433 	trim_seg_t *ts;
    434 	uint64_t size, offset, txgtarget, txgsafe;
    435 	int64_t hard, soft;
    436 	hrtime_t timelimit;
    437 
    438 	ASSERT(vd->vdev_ops->vdev_op_leaf);
    439 
    440 	if (tm == NULL)
    441 		return;
    442 
    443 	timelimit = gethrtime() - (hrtime_t)trim_timeout * NANOSEC;
    444 	if (vd->vdev_isl2cache) {
    445 		txgsafe = UINT64_MAX;
    446 		txgtarget = UINT64_MAX;
    447 	} else {
    448 		txgsafe = MIN(spa_last_synced_txg(spa), spa_freeze_txg(spa));
    449 		if (txgsafe > trim_txg_delay)
    450 			txgtarget = txgsafe - trim_txg_delay;
    451 		else
    452 			txgtarget = 0;
    453 	}
    454 
    455 	mutex_enter(&tm->tm_lock);
    456 	hard = 0;
    457 	if (tm->tm_pending > trim_vdev_max_pending)
    458 		hard = (tm->tm_pending - trim_vdev_max_pending) / 4;
    459 	soft = P2ROUNDUP(hard + tm->tm_pending / trim_timeout + 1, 64);
    460 	/* Loop until we have sent all outstanding free's */
    461 	while (soft > 0 &&
    462 	    (ts = trim_map_first(tm, txgtarget, txgsafe, timelimit, hard > 0))
    463 	    != NULL) {
    464 		TRIM_MAP_REM(tm, ts);
    465 		avl_remove(&tm->tm_queued_frees, ts);
    466 		avl_add(&tm->tm_inflight_frees, ts);
    467 		size = ts->ts_end - ts->ts_start;
    468 		offset = ts->ts_start;
    469 		/*
    470 		 * We drop the lock while we call zio_nowait as the IO
    471 		 * scheduler can result in a different IO being run e.g.
    472 		 * a write which would result in a recursive lock.
    473 		 */
    474 		mutex_exit(&tm->tm_lock);
    475 
    476 		zio_nowait(zio_trim(zio, spa, vd, offset, size));
    477 
    478 		soft -= TRIM_MAP_SEGS(size);
    479 		hard -= TRIM_MAP_SEGS(size);
    480 		mutex_enter(&tm->tm_lock);
    481 	}
    482 	mutex_exit(&tm->tm_lock);
    483 }
    484 
    485 static void
    486 trim_map_vdev_commit_done(spa_t *spa, vdev_t *vd)
    487 {
    488 	trim_map_t *tm = vd->vdev_trimmap;
    489 	trim_seg_t *ts;
    490 	list_t pending_writes;
    491 	zio_t *zio;
    492 	uint64_t start, size;
    493 	void *cookie;
    494 
    495 	ASSERT(vd->vdev_ops->vdev_op_leaf);
    496 
    497 	if (tm == NULL)
    498 		return;
    499 
    500 	mutex_enter(&tm->tm_lock);
    501 	if (!avl_is_empty(&tm->tm_inflight_frees)) {
    502 		cookie = NULL;
    503 		while ((ts = avl_destroy_nodes(&tm->tm_inflight_frees,
    504 		    &cookie)) != NULL) {
    505 			kmem_free(ts, sizeof (*ts));
    506 		}
    507 	}
    508 	list_create(&pending_writes, sizeof (zio_t), offsetof(zio_t,
    509 	    io_trim_link));
    510 	list_move_tail(&pending_writes, &tm->tm_pending_writes);
    511 	mutex_exit(&tm->tm_lock);
    512 
    513 	while ((zio = list_remove_head(&pending_writes)) != NULL) {
    514 		zio_vdev_io_reissue(zio);
    515 		zio_execute(zio);
    516 	}
    517 	list_destroy(&pending_writes);
    518 }
    519 
    520 static void
    521 trim_map_commit(spa_t *spa, zio_t *zio, vdev_t *vd)
    522 {
    523 	int c;
    524 
    525 	if (vd == NULL)
    526 		return;
    527 
    528 	if (vd->vdev_ops->vdev_op_leaf) {
    529 		trim_map_vdev_commit(spa, zio, vd);
    530 	} else {
    531 		for (c = 0; c < vd->vdev_children; c++)
    532 			trim_map_commit(spa, zio, vd->vdev_child[c]);
    533 	}
    534 }
    535 
    536 static void
    537 trim_map_commit_done(spa_t *spa, vdev_t *vd)
    538 {
    539 	int c;
    540 
    541 	if (vd == NULL)
    542 		return;
    543 
    544 	if (vd->vdev_ops->vdev_op_leaf) {
    545 		trim_map_vdev_commit_done(spa, vd);
    546 	} else {
    547 		for (c = 0; c < vd->vdev_children; c++)
    548 			trim_map_commit_done(spa, vd->vdev_child[c]);
    549 	}
    550 }
    551 
    552 static void
    553 trim_thread(void *arg)
    554 {
    555 	spa_t *spa = arg;
    556 	zio_t *zio;
    557 
    558 #ifdef __FreeBSD__
    559 #ifdef _KERNEL
    560 	(void) snprintf(curthread->td_name, sizeof(curthread->td_name),
    561 	    "trim %s", spa_name(spa));
    562 #endif
    563 #endif
    564 #ifdef __NetBSD__
    565 #ifdef _KERNEL
    566 	size_t sz;
    567 	char *name, *oname;
    568 	struct lwp *l = curlwp;
    569 
    570 	name = kmem_alloc(MAXCOMLEN, KM_SLEEP);
    571 	snprintf(name, MAXCOMLEN, "trim %s", spa_name(spa));
    572 	name[MAXCOMLEN - 1] = 0;
    573 
    574 	lwp_lock(l);
    575 	oname = l->l_name;
    576 	l->l_name = name;
    577 	lwp_unlock(l);
    578 
    579 	if (oname != NULL)
    580 		kmem_free(oname, MAXCOMLEN);
    581 #endif
    582 #endif
    583 
    584 	for (;;) {
    585 		mutex_enter(&spa->spa_trim_lock);
    586 		if (spa->spa_trim_thread == NULL) {
    587 			spa->spa_trim_thread = curthread;
    588 			cv_signal(&spa->spa_trim_cv);
    589 			mutex_exit(&spa->spa_trim_lock);
    590 			thread_exit();
    591 		}
    592 
    593 		(void) cv_timedwait(&spa->spa_trim_cv, &spa->spa_trim_lock,
    594 		    hz * trim_max_interval);
    595 		mutex_exit(&spa->spa_trim_lock);
    596 
    597 		zio = zio_root(spa, NULL, NULL, ZIO_FLAG_CANFAIL);
    598 
    599 		spa_config_enter(spa, SCL_STATE, FTAG, RW_READER);
    600 		trim_map_commit(spa, zio, spa->spa_root_vdev);
    601 		(void) zio_wait(zio);
    602 		trim_map_commit_done(spa, spa->spa_root_vdev);
    603 		spa_config_exit(spa, SCL_STATE, FTAG);
    604 	}
    605 }
    606 
    607 void
    608 trim_thread_create(spa_t *spa)
    609 {
    610 
    611 	if (!zfs_trim_enabled)
    612 		return;
    613 
    614 	mutex_init(&spa->spa_trim_lock, NULL, MUTEX_DEFAULT, NULL);
    615 	cv_init(&spa->spa_trim_cv, NULL, CV_DEFAULT, NULL);
    616 	mutex_enter(&spa->spa_trim_lock);
    617 	spa->spa_trim_thread = thread_create(NULL, 0, trim_thread, spa, 0, &p0,
    618 	    TS_RUN, minclsyspri);
    619 	mutex_exit(&spa->spa_trim_lock);
    620 }
    621 
    622 void
    623 trim_thread_destroy(spa_t *spa)
    624 {
    625 
    626 	if (!zfs_trim_enabled)
    627 		return;
    628 	if (spa->spa_trim_thread == NULL)
    629 		return;
    630 
    631 	mutex_enter(&spa->spa_trim_lock);
    632 	/* Setting spa_trim_thread to NULL tells the thread to stop. */
    633 	spa->spa_trim_thread = NULL;
    634 	cv_signal(&spa->spa_trim_cv);
    635 	/* The thread will set it back to != NULL on exit. */
    636 	while (spa->spa_trim_thread == NULL)
    637 		cv_wait(&spa->spa_trim_cv, &spa->spa_trim_lock);
    638 	spa->spa_trim_thread = NULL;
    639 	mutex_exit(&spa->spa_trim_lock);
    640 
    641 	cv_destroy(&spa->spa_trim_cv);
    642 	mutex_destroy(&spa->spa_trim_lock);
    643 }
    644 
    645 void
    646 trim_thread_wakeup(spa_t *spa)
    647 {
    648 
    649 	if (!zfs_trim_enabled)
    650 		return;
    651 	if (spa->spa_trim_thread == NULL)
    652 		return;
    653 
    654 	mutex_enter(&spa->spa_trim_lock);
    655 	cv_signal(&spa->spa_trim_cv);
    656 	mutex_exit(&spa->spa_trim_lock);
    657 }
    658