1 1.21 kamil /* $NetBSD: bufq_priocscan.c,v 1.21 2017/05/04 11:03:27 kamil Exp $ */ 2 1.1 yamt 3 1.1 yamt /*- 4 1.17 yamt * Copyright (c)2004,2005,2006,2008,2009,2011,2012 YAMAMOTO Takashi, 5 1.1 yamt * All rights reserved. 6 1.1 yamt * 7 1.1 yamt * Redistribution and use in source and binary forms, with or without 8 1.1 yamt * modification, are permitted provided that the following conditions 9 1.1 yamt * are met: 10 1.1 yamt * 1. Redistributions of source code must retain the above copyright 11 1.1 yamt * notice, this list of conditions and the following disclaimer. 12 1.1 yamt * 2. Redistributions in binary form must reproduce the above copyright 13 1.1 yamt * notice, this list of conditions and the following disclaimer in the 14 1.1 yamt * documentation and/or other materials provided with the distribution. 15 1.1 yamt * 16 1.1 yamt * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 1.1 yamt * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 1.1 yamt * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 1.1 yamt * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 1.1 yamt * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 1.1 yamt * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 1.1 yamt * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 1.1 yamt * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 1.1 yamt * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 1.1 yamt * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 1.1 yamt * SUCH DAMAGE. 27 1.1 yamt */ 28 1.1 yamt 29 1.1 yamt #include <sys/cdefs.h> 30 1.21 kamil __KERNEL_RCSID(0, "$NetBSD: bufq_priocscan.c,v 1.21 2017/05/04 11:03:27 kamil Exp $"); 31 1.1 yamt 32 1.1 yamt #include <sys/param.h> 33 1.1 yamt #include <sys/systm.h> 34 1.1 yamt #include <sys/buf.h> 35 1.2 yamt #include <sys/bufq.h> 36 1.5 yamt #include <sys/bufq_impl.h> 37 1.14 yamt #include <sys/kmem.h> 38 1.17 yamt #include <sys/rbtree.h> 39 1.19 pgoyette #include <sys/module.h> 40 1.17 yamt 41 1.17 yamt #undef PRIOCSCAN_USE_GLOBAL_POSITION 42 1.1 yamt 43 1.1 yamt /* 44 1.1 yamt * Cyclical scan (CSCAN) 45 1.1 yamt */ 46 1.17 yamt 47 1.17 yamt struct cscan_key { 48 1.17 yamt daddr_t k_rawblkno; 49 1.17 yamt int k_cylinder; 50 1.17 yamt }; 51 1.17 yamt 52 1.1 yamt struct cscan_queue { 53 1.17 yamt rb_tree_t cq_buffers; /* ordered list of buffers */ 54 1.17 yamt #if !defined(PRIOCSCAN_USE_GLOBAL_POSITION) 55 1.17 yamt struct cscan_key cq_lastkey; /* key of last request */ 56 1.17 yamt #endif /* !defined(PRIOCSCAN_USE_GLOBAL_POSITION) */ 57 1.17 yamt int cq_sortby; /* BUFQ_SORT_MASK */ 58 1.17 yamt rb_tree_ops_t cq_ops; 59 1.1 yamt }; 60 1.1 yamt 61 1.17 yamt static signed int 62 1.17 yamt buf_cmp(const struct buf *b1, const struct buf *b2, int sortby) 63 1.17 yamt { 64 1.1 yamt 65 1.17 yamt if (buf_inorder(b2, b1, sortby)) { 66 1.17 yamt return 1; /* b1 > b2 */ 67 1.17 yamt } 68 1.17 yamt if (buf_inorder(b1, b2, sortby)) { 69 1.17 yamt return -1; /* b1 < b2 */ 70 1.17 yamt } 71 1.17 yamt return 0; 72 1.17 yamt } 73 1.17 yamt 74 1.17 yamt /* return positive if n1 > n2 */ 75 1.17 yamt static signed int 76 1.17 yamt cscan_tree_compare_nodes(void *context, const void *n1, const void *n2) 77 1.1 yamt { 78 1.17 yamt const struct cscan_queue * const q = context; 79 1.17 yamt const struct buf * const b1 = n1; 80 1.17 yamt const struct buf * const b2 = n2; 81 1.17 yamt const int sortby = q->cq_sortby; 82 1.17 yamt const int diff = buf_cmp(b1, b2, sortby); 83 1.17 yamt 84 1.17 yamt /* 85 1.17 yamt * XXX rawblkno/cylinder might not be unique. eg. unbuffered i/o 86 1.17 yamt */ 87 1.1 yamt 88 1.17 yamt if (diff != 0) { 89 1.17 yamt return diff; 90 1.17 yamt } 91 1.17 yamt 92 1.17 yamt /* 93 1.17 yamt * XXX rawblkno/cylinder might not be unique. eg. unbuffered i/o 94 1.17 yamt */ 95 1.17 yamt if (b1 > b2) { 96 1.17 yamt return 1; 97 1.17 yamt } 98 1.17 yamt if (b1 < b2) { 99 1.17 yamt return -1; 100 1.17 yamt } 101 1.17 yamt return 0; 102 1.17 yamt } 103 1.17 yamt 104 1.17 yamt /* return positive if n1 > k2 */ 105 1.17 yamt static signed int 106 1.17 yamt cscan_tree_compare_key(void *context, const void *n1, const void *k2) 107 1.17 yamt { 108 1.17 yamt const struct cscan_queue * const q = context; 109 1.17 yamt const struct buf * const b1 = n1; 110 1.17 yamt const struct cscan_key * const key = k2; 111 1.17 yamt const struct buf tmp = { 112 1.17 yamt .b_rawblkno = key->k_rawblkno, 113 1.17 yamt .b_cylinder = key->k_cylinder, 114 1.17 yamt }; 115 1.17 yamt const struct buf *b2 = &tmp; 116 1.17 yamt const int sortby = q->cq_sortby; 117 1.17 yamt 118 1.17 yamt return buf_cmp(b1, b2, sortby); 119 1.1 yamt } 120 1.1 yamt 121 1.17 yamt static void __unused 122 1.17 yamt cscan_dump(struct cscan_queue *cq) 123 1.1 yamt { 124 1.17 yamt const int sortby = cq->cq_sortby; 125 1.17 yamt struct buf *bp; 126 1.1 yamt 127 1.17 yamt RB_TREE_FOREACH(bp, &cq->cq_buffers) { 128 1.17 yamt if (sortby == BUFQ_SORT_RAWBLOCK) { 129 1.17 yamt printf(" %jd", (intmax_t)bp->b_rawblkno); 130 1.17 yamt } else { 131 1.17 yamt printf(" %jd/%jd", 132 1.17 yamt (intmax_t)bp->b_cylinder, (intmax_t)bp->b_rawblkno); 133 1.17 yamt } 134 1.17 yamt } 135 1.17 yamt } 136 1.1 yamt 137 1.17 yamt static inline bool 138 1.17 yamt cscan_empty(struct cscan_queue *q) 139 1.17 yamt { 140 1.1 yamt 141 1.17 yamt /* XXX this might do more work than necessary */ 142 1.17 yamt return rb_tree_iterate(&q->cq_buffers, NULL, RB_DIR_LEFT) == NULL; 143 1.17 yamt } 144 1.1 yamt 145 1.17 yamt static void 146 1.17 yamt cscan_put(struct cscan_queue *q, struct buf *bp) 147 1.17 yamt { 148 1.18 martin struct buf *obp __diagused; 149 1.1 yamt 150 1.17 yamt obp = rb_tree_insert_node(&q->cq_buffers, bp); 151 1.17 yamt KASSERT(obp == bp); /* see cscan_tree_compare_nodes */ 152 1.1 yamt } 153 1.1 yamt 154 1.1 yamt static struct buf * 155 1.17 yamt cscan_get(struct cscan_queue *q, int remove, struct cscan_key *key) 156 1.1 yamt { 157 1.1 yamt struct buf *bp; 158 1.1 yamt 159 1.17 yamt bp = rb_tree_find_node_geq(&q->cq_buffers, key); 160 1.17 yamt KDASSERT(bp == NULL || cscan_tree_compare_key(q, bp, key) >= 0); 161 1.1 yamt if (bp == NULL) { 162 1.17 yamt bp = rb_tree_iterate(&q->cq_buffers, NULL, RB_DIR_LEFT); 163 1.17 yamt KDASSERT(cscan_tree_compare_key(q, bp, key) < 0); 164 1.1 yamt } 165 1.1 yamt if (bp != NULL && remove) { 166 1.17 yamt #if defined(DEBUG) 167 1.17 yamt struct buf *nbp; 168 1.17 yamt #endif /* defined(DEBUG) */ 169 1.1 yamt 170 1.17 yamt rb_tree_remove_node(&q->cq_buffers, bp); 171 1.17 yamt /* 172 1.17 yamt * remember the head position. 173 1.17 yamt */ 174 1.17 yamt key->k_cylinder = bp->b_cylinder; 175 1.17 yamt key->k_rawblkno = bp->b_rawblkno + (bp->b_bcount >> DEV_BSHIFT); 176 1.17 yamt #if defined(DEBUG) 177 1.17 yamt nbp = rb_tree_find_node_geq(&q->cq_buffers, key); 178 1.17 yamt if (nbp != NULL && cscan_tree_compare_nodes(q, nbp, bp) < 0) { 179 1.17 yamt panic("%s: wrong order %p < %p\n", __func__, 180 1.17 yamt nbp, bp); 181 1.17 yamt } 182 1.17 yamt #endif /* defined(DEBUG) */ 183 1.1 yamt } 184 1.17 yamt return bp; 185 1.1 yamt } 186 1.1 yamt 187 1.1 yamt static void 188 1.17 yamt cscan_init(struct cscan_queue *q, int sortby) 189 1.1 yamt { 190 1.17 yamt static const rb_tree_ops_t cscan_tree_ops = { 191 1.17 yamt .rbto_compare_nodes = cscan_tree_compare_nodes, 192 1.17 yamt .rbto_compare_key = cscan_tree_compare_key, 193 1.17 yamt .rbto_node_offset = offsetof(struct buf, b_u.u_rbnode), 194 1.17 yamt .rbto_context = NULL, 195 1.17 yamt }; 196 1.1 yamt 197 1.17 yamt q->cq_sortby = sortby; 198 1.17 yamt /* XXX copy ops to workaround rbtree.h API limitation */ 199 1.17 yamt q->cq_ops = cscan_tree_ops; 200 1.17 yamt q->cq_ops.rbto_context = q; 201 1.17 yamt rb_tree_init(&q->cq_buffers, &q->cq_ops); 202 1.1 yamt } 203 1.1 yamt 204 1.1 yamt /* 205 1.1 yamt * Per-prioritiy CSCAN. 206 1.1 yamt * 207 1.1 yamt * XXX probably we should have a way to raise 208 1.1 yamt * priority of the on-queue requests. 209 1.1 yamt */ 210 1.1 yamt #define PRIOCSCAN_NQUEUE 3 211 1.1 yamt 212 1.1 yamt struct priocscan_queue { 213 1.1 yamt struct cscan_queue q_queue; 214 1.15 yamt unsigned int q_burst; 215 1.1 yamt }; 216 1.1 yamt 217 1.1 yamt struct bufq_priocscan { 218 1.1 yamt struct priocscan_queue bq_queue[PRIOCSCAN_NQUEUE]; 219 1.1 yamt 220 1.17 yamt #if defined(PRIOCSCAN_USE_GLOBAL_POSITION) 221 1.1 yamt /* 222 1.1 yamt * XXX using "global" head position can reduce positioning time 223 1.1 yamt * when switching between queues. 224 1.1 yamt * although it might affect against fairness. 225 1.1 yamt */ 226 1.17 yamt struct cscan_key bq_lastkey; 227 1.1 yamt #endif 228 1.1 yamt }; 229 1.1 yamt 230 1.1 yamt /* 231 1.1 yamt * how many requests to serve when having pending requests on other queues. 232 1.1 yamt * 233 1.1 yamt * XXX tune 234 1.12 yamt * be careful: while making these values larger likely 235 1.12 yamt * increases the total throughput, it can also increase latencies 236 1.12 yamt * for some workloads. 237 1.1 yamt */ 238 1.1 yamt const int priocscan_burst[] = { 239 1.1 yamt 64, 16, 4 240 1.1 yamt }; 241 1.1 yamt 242 1.3 yamt static void bufq_priocscan_init(struct bufq_state *); 243 1.1 yamt static void bufq_priocscan_put(struct bufq_state *, struct buf *); 244 1.1 yamt static struct buf *bufq_priocscan_get(struct bufq_state *, int); 245 1.3 yamt 246 1.5 yamt BUFQ_DEFINE(priocscan, 40, bufq_priocscan_init); 247 1.3 yamt 248 1.7 perry static inline struct cscan_queue *bufq_priocscan_selectqueue( 249 1.1 yamt struct bufq_priocscan *, const struct buf *); 250 1.1 yamt 251 1.7 perry static inline struct cscan_queue * 252 1.1 yamt bufq_priocscan_selectqueue(struct bufq_priocscan *q, const struct buf *bp) 253 1.1 yamt { 254 1.1 yamt static const int priocscan_priomap[] = { 255 1.1 yamt [BPRIO_TIMENONCRITICAL] = 2, 256 1.1 yamt [BPRIO_TIMELIMITED] = 1, 257 1.1 yamt [BPRIO_TIMECRITICAL] = 0 258 1.1 yamt }; 259 1.1 yamt 260 1.1 yamt return &q->bq_queue[priocscan_priomap[BIO_GETPRIO(bp)]].q_queue; 261 1.1 yamt } 262 1.1 yamt 263 1.1 yamt static void 264 1.1 yamt bufq_priocscan_put(struct bufq_state *bufq, struct buf *bp) 265 1.1 yamt { 266 1.21 kamil struct bufq_priocscan *q = bufq_private(bufq); 267 1.1 yamt struct cscan_queue *cq; 268 1.1 yamt 269 1.1 yamt cq = bufq_priocscan_selectqueue(q, bp); 270 1.17 yamt cscan_put(cq, bp); 271 1.1 yamt } 272 1.1 yamt 273 1.1 yamt static struct buf * 274 1.1 yamt bufq_priocscan_get(struct bufq_state *bufq, int remove) 275 1.1 yamt { 276 1.21 kamil struct bufq_priocscan *q = bufq_private(bufq); 277 1.1 yamt struct priocscan_queue *pq, *npq; 278 1.15 yamt struct priocscan_queue *first; /* highest priority non-empty queue */ 279 1.1 yamt const struct priocscan_queue *epq; 280 1.1 yamt struct buf *bp; 281 1.9 thorpej bool single; /* true if there's only one non-empty queue */ 282 1.1 yamt 283 1.15 yamt /* 284 1.15 yamt * find the highest priority non-empty queue. 285 1.15 yamt */ 286 1.1 yamt pq = &q->bq_queue[0]; 287 1.1 yamt epq = pq + PRIOCSCAN_NQUEUE; 288 1.1 yamt for (; pq < epq; pq++) { 289 1.15 yamt if (!cscan_empty(&pq->q_queue)) { 290 1.1 yamt break; 291 1.15 yamt } 292 1.1 yamt } 293 1.1 yamt if (pq == epq) { 294 1.15 yamt /* 295 1.15 yamt * all our queues are empty. there's nothing to serve. 296 1.15 yamt */ 297 1.1 yamt return NULL; 298 1.1 yamt } 299 1.15 yamt first = pq; 300 1.1 yamt 301 1.15 yamt /* 302 1.15 yamt * scan the rest of queues. 303 1.15 yamt * 304 1.15 yamt * if we have two or more non-empty queues, we serve the highest 305 1.15 yamt * priority one with non-zero burst count. 306 1.15 yamt */ 307 1.10 thorpej single = true; 308 1.15 yamt for (npq = pq + 1; npq < epq; npq++) { 309 1.15 yamt if (!cscan_empty(&npq->q_queue)) { 310 1.15 yamt /* 311 1.15 yamt * we found another non-empty queue. 312 1.15 yamt * it means that a queue needs to consume its burst 313 1.15 yamt * count to be served. 314 1.15 yamt */ 315 1.10 thorpej single = false; 316 1.15 yamt 317 1.15 yamt /* 318 1.15 yamt * check if our current candidate queue has already 319 1.15 yamt * exhausted its burst count. 320 1.15 yamt */ 321 1.15 yamt if (pq->q_burst > 0) { 322 1.1 yamt break; 323 1.15 yamt } 324 1.1 yamt pq = npq; 325 1.1 yamt } 326 1.1 yamt } 327 1.1 yamt if (single) { 328 1.1 yamt /* 329 1.15 yamt * there's only a non-empty queue. 330 1.15 yamt * just serve it without consuming its burst count. 331 1.1 yamt */ 332 1.15 yamt KASSERT(pq == first); 333 1.1 yamt } else { 334 1.1 yamt /* 335 1.15 yamt * there are two or more non-empty queues. 336 1.1 yamt */ 337 1.15 yamt if (pq->q_burst == 0) { 338 1.15 yamt /* 339 1.15 yamt * no queues can be served because they have already 340 1.15 yamt * exhausted their burst count. 341 1.15 yamt */ 342 1.15 yamt unsigned int i; 343 1.1 yamt #ifdef DEBUG 344 1.15 yamt for (i = 0; i < PRIOCSCAN_NQUEUE; i++) { 345 1.15 yamt pq = &q->bq_queue[i]; 346 1.15 yamt if (!cscan_empty(&pq->q_queue) && pq->q_burst) { 347 1.15 yamt panic("%s: inconsist", __func__); 348 1.15 yamt } 349 1.15 yamt } 350 1.1 yamt #endif /* DEBUG */ 351 1.15 yamt /* 352 1.15 yamt * reset burst counts. 353 1.15 yamt */ 354 1.15 yamt if (remove) { 355 1.15 yamt for (i = 0; i < PRIOCSCAN_NQUEUE; i++) { 356 1.15 yamt pq = &q->bq_queue[i]; 357 1.15 yamt pq->q_burst = priocscan_burst[i]; 358 1.15 yamt } 359 1.15 yamt } 360 1.1 yamt 361 1.15 yamt /* 362 1.15 yamt * serve the highest priority non-empty queue. 363 1.15 yamt */ 364 1.15 yamt pq = first; 365 1.15 yamt } 366 1.1 yamt /* 367 1.15 yamt * consume the burst count. 368 1.15 yamt * 369 1.15 yamt * XXX account only by number of requests. is it good enough? 370 1.1 yamt */ 371 1.4 yamt if (remove) { 372 1.16 hannken KASSERT(pq->q_burst > 0); 373 1.15 yamt pq->q_burst--; 374 1.1 yamt } 375 1.1 yamt } 376 1.1 yamt 377 1.15 yamt /* 378 1.15 yamt * finally, get a request from the selected queue. 379 1.15 yamt */ 380 1.1 yamt KDASSERT(!cscan_empty(&pq->q_queue)); 381 1.17 yamt bp = cscan_get(&pq->q_queue, remove, 382 1.17 yamt #if defined(PRIOCSCAN_USE_GLOBAL_POSITION) 383 1.17 yamt &q->bq_lastkey 384 1.17 yamt #else /* defined(PRIOCSCAN_USE_GLOBAL_POSITION) */ 385 1.17 yamt &pq->q_queue.cq_lastkey 386 1.17 yamt #endif /* defined(PRIOCSCAN_USE_GLOBAL_POSITION) */ 387 1.17 yamt ); 388 1.1 yamt KDASSERT(bp != NULL); 389 1.1 yamt KDASSERT(&pq->q_queue == bufq_priocscan_selectqueue(q, bp)); 390 1.1 yamt 391 1.1 yamt return bp; 392 1.1 yamt } 393 1.1 yamt 394 1.11 reinoud static struct buf * 395 1.13 yamt bufq_priocscan_cancel(struct bufq_state *bufq, struct buf *bp) 396 1.11 reinoud { 397 1.21 kamil struct bufq_priocscan * const q = bufq_private(bufq); 398 1.17 yamt unsigned int i; 399 1.11 reinoud 400 1.13 yamt for (i = 0; i < PRIOCSCAN_NQUEUE; i++) { 401 1.13 yamt struct cscan_queue * const cq = &q->bq_queue[i].q_queue; 402 1.17 yamt struct buf *it; 403 1.17 yamt 404 1.17 yamt /* 405 1.17 yamt * XXX probably could be faster but the cancel functionality 406 1.17 yamt * is not widely used anyway. 407 1.17 yamt */ 408 1.17 yamt RB_TREE_FOREACH(it, &cq->cq_buffers) { 409 1.17 yamt if (it == bp) { 410 1.17 yamt rb_tree_remove_node(&cq->cq_buffers, bp); 411 1.17 yamt return bp; 412 1.11 reinoud } 413 1.11 reinoud } 414 1.11 reinoud } 415 1.11 reinoud return NULL; 416 1.11 reinoud } 417 1.11 reinoud 418 1.3 yamt static void 419 1.14 yamt bufq_priocscan_fini(struct bufq_state *bufq) 420 1.14 yamt { 421 1.14 yamt 422 1.14 yamt KASSERT(bufq->bq_private != NULL); 423 1.14 yamt kmem_free(bufq->bq_private, sizeof(struct bufq_priocscan)); 424 1.14 yamt } 425 1.14 yamt 426 1.14 yamt static void 427 1.1 yamt bufq_priocscan_init(struct bufq_state *bufq) 428 1.1 yamt { 429 1.1 yamt struct bufq_priocscan *q; 430 1.17 yamt const int sortby = bufq->bq_flags & BUFQ_SORT_MASK; 431 1.15 yamt unsigned int i; 432 1.1 yamt 433 1.1 yamt bufq->bq_get = bufq_priocscan_get; 434 1.1 yamt bufq->bq_put = bufq_priocscan_put; 435 1.11 reinoud bufq->bq_cancel = bufq_priocscan_cancel; 436 1.14 yamt bufq->bq_fini = bufq_priocscan_fini; 437 1.14 yamt bufq->bq_private = kmem_zalloc(sizeof(struct bufq_priocscan), KM_SLEEP); 438 1.1 yamt 439 1.1 yamt q = bufq->bq_private; 440 1.1 yamt for (i = 0; i < PRIOCSCAN_NQUEUE; i++) { 441 1.1 yamt struct cscan_queue *cq = &q->bq_queue[i].q_queue; 442 1.1 yamt 443 1.17 yamt cscan_init(cq, sortby); 444 1.1 yamt } 445 1.1 yamt } 446 1.19 pgoyette 447 1.20 pgoyette MODULE(MODULE_CLASS_BUFQ, bufq_priocscan, NULL); 448 1.19 pgoyette 449 1.19 pgoyette static int 450 1.19 pgoyette bufq_priocscan_modcmd(modcmd_t cmd, void *opaque) 451 1.19 pgoyette { 452 1.19 pgoyette 453 1.19 pgoyette switch (cmd) { 454 1.19 pgoyette case MODULE_CMD_INIT: 455 1.19 pgoyette return bufq_register(&bufq_strat_priocscan); 456 1.19 pgoyette case MODULE_CMD_FINI: 457 1.19 pgoyette return bufq_unregister(&bufq_strat_priocscan); 458 1.19 pgoyette default: 459 1.19 pgoyette return ENOTTY; 460 1.19 pgoyette } 461 1.19 pgoyette } 462