Home | History | Annotate | Line # | Download | only in zfs
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * This file and its contents are supplied under the terms of the
      5  * Common Development and Distribution License ("CDDL"), version 1.0.
      6  * You may only use this file in accordance with the terms of version
      7  * 1.0 of the CDDL.
      8  *
      9  * A full copy of the text of the CDDL should have accompanied this
     10  * source.  A copy of the CDDL is also available via the Internet at
     11  * http://www.illumos.org/license/CDDL.
     12  *
     13  * CDDL HEADER END
     14  */
     15 /*
     16  * Copyright (c) 2013, 2014 by Delphix. All rights reserved.
     17  */
     18 
     19 #include <sys/zfs_context.h>
     20 #include <sys/multilist.h>
     21 
     22 /* needed for spa_get_random() */
     23 #include <sys/spa.h>
     24 
     25 /*
     26  * Given the object contained on the list, return a pointer to the
     27  * object's multilist_node_t structure it contains.
     28  */
     29 static multilist_node_t *
     30 multilist_d2l(multilist_t *ml, void *obj)
     31 {
     32 	return ((multilist_node_t *)((char *)obj + ml->ml_offset));
     33 }
     34 
     35 /*
     36  * Initialize a new mutlilist using the parameters specified.
     37  *
     38  *  - 'size' denotes the size of the structure containing the
     39  *     multilist_node_t.
     40  *  - 'offset' denotes the byte offset of the mutlilist_node_t within
     41  *     the structure that contains it.
     42  *  - 'num' specifies the number of internal sublists to create.
     43  *  - 'index_func' is used to determine which sublist to insert into
     44  *     when the multilist_insert() function is called; as well as which
     45  *     sublist to remove from when multilist_remove() is called. The
     46  *     requirements this function must meet, are the following:
     47  *
     48  *      - It must always return the same value when called on the same
     49  *        object (to ensure the object is removed from the list it was
     50  *        inserted into).
     51  *
     52  *      - It must return a value in the range [0, number of sublists).
     53  *        The multilist_get_num_sublists() function may be used to
     54  *        determine the number of sublists in the multilist.
     55  *
     56  *     Also, in order to reduce internal contention between the sublists
     57  *     during insertion and removal, this function should choose evenly
     58  *     between all available sublists when inserting. This isn't a hard
     59  *     requirement, but a general rule of thumb in order to garner the
     60  *     best multi-threaded performance out of the data structure.
     61  */
     62 void
     63 multilist_create(multilist_t *ml, size_t size, size_t offset, unsigned int num,
     64     multilist_sublist_index_func_t *index_func)
     65 {
     66 	ASSERT3P(ml, !=, NULL);
     67 	ASSERT3U(size, >, 0);
     68 	ASSERT3U(size, >=, offset + sizeof (multilist_node_t));
     69 	ASSERT3U(num, >, 0);
     70 	ASSERT3P(index_func, !=, NULL);
     71 
     72 	ml->ml_offset = offset;
     73 	ml->ml_num_sublists = num;
     74 	ml->ml_index_func = index_func;
     75 
     76 	ml->ml_sublists = kmem_zalloc(sizeof (multilist_sublist_t) *
     77 	    ml->ml_num_sublists, KM_SLEEP);
     78 
     79 	ASSERT3P(ml->ml_sublists, !=, NULL);
     80 
     81 	for (int i = 0; i < ml->ml_num_sublists; i++) {
     82 		multilist_sublist_t *mls = &ml->ml_sublists[i];
     83 		mutex_init(&mls->mls_lock, NULL, MUTEX_DEFAULT, NULL);
     84 		list_create(&mls->mls_list, size, offset);
     85 	}
     86 }
     87 
     88 /*
     89  * Destroy the given multilist object, and free up any memory it holds.
     90  */
     91 void
     92 multilist_destroy(multilist_t *ml)
     93 {
     94 	ASSERT(multilist_is_empty(ml));
     95 
     96 	for (int i = 0; i < ml->ml_num_sublists; i++) {
     97 		multilist_sublist_t *mls = &ml->ml_sublists[i];
     98 
     99 		ASSERT(list_is_empty(&mls->mls_list));
    100 
    101 		list_destroy(&mls->mls_list);
    102 		mutex_destroy(&mls->mls_lock);
    103 	}
    104 
    105 	ASSERT3P(ml->ml_sublists, !=, NULL);
    106 	kmem_free(ml->ml_sublists,
    107 	    sizeof (multilist_sublist_t) * ml->ml_num_sublists);
    108 
    109 	ml->ml_num_sublists = 0;
    110 	ml->ml_offset = 0;
    111 }
    112 
    113 /*
    114  * Insert the given object into the multilist.
    115  *
    116  * This function will insert the object specified into the sublist
    117  * determined using the function given at multilist creation time.
    118  *
    119  * The sublist locks are automatically acquired if not already held, to
    120  * ensure consistency when inserting and removing from multiple threads.
    121  */
    122 void
    123 multilist_insert(multilist_t *ml, void *obj)
    124 {
    125 	unsigned int sublist_idx = ml->ml_index_func(ml, obj);
    126 	multilist_sublist_t *mls;
    127 	boolean_t need_lock;
    128 
    129 	DTRACE_PROBE3(multilist__insert, multilist_t *, ml,
    130 	    unsigned int, sublist_idx, void *, obj);
    131 
    132 	ASSERT3U(sublist_idx, <, ml->ml_num_sublists);
    133 
    134 	mls = &ml->ml_sublists[sublist_idx];
    135 
    136 	/*
    137 	 * Note: Callers may already hold the sublist lock by calling
    138 	 * multilist_sublist_lock().  Here we rely on MUTEX_HELD()
    139 	 * returning TRUE if and only if the current thread holds the
    140 	 * lock.  While it's a little ugly to make the lock recursive in
    141 	 * this way, it works and allows the calling code to be much
    142 	 * simpler -- otherwise it would have to pass around a flag
    143 	 * indicating that it already has the lock.
    144 	 */
    145 	need_lock = !MUTEX_HELD(&mls->mls_lock);
    146 
    147 	if (need_lock)
    148 		mutex_enter(&mls->mls_lock);
    149 
    150 	ASSERT(!multilist_link_active(multilist_d2l(ml, obj)));
    151 
    152 	multilist_sublist_insert_head(mls, obj);
    153 
    154 	if (need_lock)
    155 		mutex_exit(&mls->mls_lock);
    156 }
    157 
    158 /*
    159  * Remove the given object from the multilist.
    160  *
    161  * This function will remove the object specified from the sublist
    162  * determined using the function given at multilist creation time.
    163  *
    164  * The necessary sublist locks are automatically acquired, to ensure
    165  * consistency when inserting and removing from multiple threads.
    166  */
    167 void
    168 multilist_remove(multilist_t *ml, void *obj)
    169 {
    170 	unsigned int sublist_idx = ml->ml_index_func(ml, obj);
    171 	multilist_sublist_t *mls;
    172 	boolean_t need_lock;
    173 
    174 	DTRACE_PROBE3(multilist__remove, multilist_t *, ml,
    175 	    unsigned int, sublist_idx, void *, obj);
    176 
    177 	ASSERT3U(sublist_idx, <, ml->ml_num_sublists);
    178 
    179 	mls = &ml->ml_sublists[sublist_idx];
    180 	/* See comment in multilist_insert(). */
    181 	need_lock = !MUTEX_HELD(&mls->mls_lock);
    182 
    183 	if (need_lock)
    184 		mutex_enter(&mls->mls_lock);
    185 
    186 	ASSERT(multilist_link_active(multilist_d2l(ml, obj)));
    187 
    188 	multilist_sublist_remove(mls, obj);
    189 
    190 	if (need_lock)
    191 		mutex_exit(&mls->mls_lock);
    192 }
    193 
    194 /*
    195  * Check to see if this multilist object is empty.
    196  *
    197  * This will return TRUE if it finds all of the sublists of this
    198  * multilist to be empty, and FALSE otherwise. Each sublist lock will be
    199  * automatically acquired as necessary.
    200  *
    201  * If concurrent insertions and removals are occurring, the semantics
    202  * of this function become a little fuzzy. Instead of locking all
    203  * sublists for the entire call time of the function, each sublist is
    204  * only locked as it is individually checked for emptiness. Thus, it's
    205  * possible for this function to return TRUE with non-empty sublists at
    206  * the time the function returns. This would be due to another thread
    207  * inserting into a given sublist, after that specific sublist was check
    208  * and deemed empty, but before all sublists have been checked.
    209  */
    210 int
    211 multilist_is_empty(multilist_t *ml)
    212 {
    213 	for (int i = 0; i < ml->ml_num_sublists; i++) {
    214 		multilist_sublist_t *mls = &ml->ml_sublists[i];
    215 		/* See comment in multilist_insert(). */
    216 		boolean_t need_lock = !MUTEX_HELD(&mls->mls_lock);
    217 
    218 		if (need_lock)
    219 			mutex_enter(&mls->mls_lock);
    220 
    221 		if (!list_is_empty(&mls->mls_list)) {
    222 			if (need_lock)
    223 				mutex_exit(&mls->mls_lock);
    224 
    225 			return (FALSE);
    226 		}
    227 
    228 		if (need_lock)
    229 			mutex_exit(&mls->mls_lock);
    230 	}
    231 
    232 	return (TRUE);
    233 }
    234 
    235 /* Return the number of sublists composing this multilist */
    236 unsigned int
    237 multilist_get_num_sublists(multilist_t *ml)
    238 {
    239 	return (ml->ml_num_sublists);
    240 }
    241 
    242 /* Return a randomly selected, valid sublist index for this multilist */
    243 unsigned int
    244 multilist_get_random_index(multilist_t *ml)
    245 {
    246 	return (spa_get_random(ml->ml_num_sublists));
    247 }
    248 
    249 /* Lock and return the sublist specified at the given index */
    250 multilist_sublist_t *
    251 multilist_sublist_lock(multilist_t *ml, unsigned int sublist_idx)
    252 {
    253 	multilist_sublist_t *mls;
    254 
    255 	ASSERT3U(sublist_idx, <, ml->ml_num_sublists);
    256 	mls = &ml->ml_sublists[sublist_idx];
    257 	mutex_enter(&mls->mls_lock);
    258 
    259 	return (mls);
    260 }
    261 
    262 void
    263 multilist_sublist_unlock(multilist_sublist_t *mls)
    264 {
    265 	mutex_exit(&mls->mls_lock);
    266 }
    267 
    268 /*
    269  * We're allowing any object to be inserted into this specific sublist,
    270  * but this can lead to trouble if multilist_remove() is called to
    271  * remove this object. Specifically, if calling ml_index_func on this
    272  * object returns an index for sublist different than what is passed as
    273  * a parameter here, any call to multilist_remove() with this newly
    274  * inserted object is undefined! (the call to multilist_remove() will
    275  * remove the object from a list that it isn't contained in)
    276  */
    277 void
    278 multilist_sublist_insert_head(multilist_sublist_t *mls, void *obj)
    279 {
    280 	ASSERT(MUTEX_HELD(&mls->mls_lock));
    281 	list_insert_head(&mls->mls_list, obj);
    282 }
    283 
    284 /* please see comment above multilist_sublist_insert_head */
    285 void
    286 multilist_sublist_insert_tail(multilist_sublist_t *mls, void *obj)
    287 {
    288 	ASSERT(MUTEX_HELD(&mls->mls_lock));
    289 	list_insert_tail(&mls->mls_list, obj);
    290 }
    291 
    292 /*
    293  * Move the object one element forward in the list.
    294  *
    295  * This function will move the given object forward in the list (towards
    296  * the head) by one object. So, in essence, it will swap its position in
    297  * the list with its "prev" pointer. If the given object is already at the
    298  * head of the list, it cannot be moved forward any more than it already
    299  * is, so no action is taken.
    300  *
    301  * NOTE: This function **must not** remove any object from the list other
    302  *       than the object given as the parameter. This is relied upon in
    303  *       arc_evict_state_impl().
    304  */
    305 void
    306 multilist_sublist_move_forward(multilist_sublist_t *mls, void *obj)
    307 {
    308 	void *prev = list_prev(&mls->mls_list, obj);
    309 
    310 	ASSERT(MUTEX_HELD(&mls->mls_lock));
    311 	ASSERT(!list_is_empty(&mls->mls_list));
    312 
    313 	/* 'obj' must be at the head of the list, nothing to do */
    314 	if (prev == NULL)
    315 		return;
    316 
    317 	list_remove(&mls->mls_list, obj);
    318 	list_insert_before(&mls->mls_list, prev, obj);
    319 }
    320 
    321 void
    322 multilist_sublist_remove(multilist_sublist_t *mls, void *obj)
    323 {
    324 	ASSERT(MUTEX_HELD(&mls->mls_lock));
    325 	list_remove(&mls->mls_list, obj);
    326 }
    327 
    328 void *
    329 multilist_sublist_head(multilist_sublist_t *mls)
    330 {
    331 	ASSERT(MUTEX_HELD(&mls->mls_lock));
    332 	return (list_head(&mls->mls_list));
    333 }
    334 
    335 void *
    336 multilist_sublist_tail(multilist_sublist_t *mls)
    337 {
    338 	ASSERT(MUTEX_HELD(&mls->mls_lock));
    339 	return (list_tail(&mls->mls_list));
    340 }
    341 
    342 void *
    343 multilist_sublist_next(multilist_sublist_t *mls, void *obj)
    344 {
    345 	ASSERT(MUTEX_HELD(&mls->mls_lock));
    346 	return (list_next(&mls->mls_list, obj));
    347 }
    348 
    349 void *
    350 multilist_sublist_prev(multilist_sublist_t *mls, void *obj)
    351 {
    352 	ASSERT(MUTEX_HELD(&mls->mls_lock));
    353 	return (list_prev(&mls->mls_list, obj));
    354 }
    355 
    356 void
    357 multilist_link_init(multilist_node_t *link)
    358 {
    359 	list_link_init(link);
    360 }
    361 
    362 int
    363 multilist_link_active(multilist_node_t *link)
    364 {
    365 	return (list_link_active(link));
    366 }
    367