bufq_priocscan.c revision 1.18 1 /* $NetBSD: bufq_priocscan.c,v 1.18 2014/01/28 12:50:54 martin Exp $ */
2
3 /*-
4 * Copyright (c)2004,2005,2006,2008,2009,2011,2012 YAMAMOTO Takashi,
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29 #include <sys/cdefs.h>
30 __KERNEL_RCSID(0, "$NetBSD: bufq_priocscan.c,v 1.18 2014/01/28 12:50:54 martin Exp $");
31
32 #include <sys/param.h>
33 #include <sys/systm.h>
34 #include <sys/buf.h>
35 #include <sys/bufq.h>
36 #include <sys/bufq_impl.h>
37 #include <sys/kmem.h>
38 #include <sys/rbtree.h>
39
40 #undef PRIOCSCAN_USE_GLOBAL_POSITION
41
42 /*
43 * Cyclical scan (CSCAN)
44 */
45
46 struct cscan_key {
47 daddr_t k_rawblkno;
48 int k_cylinder;
49 };
50
51 struct cscan_queue {
52 rb_tree_t cq_buffers; /* ordered list of buffers */
53 #if !defined(PRIOCSCAN_USE_GLOBAL_POSITION)
54 struct cscan_key cq_lastkey; /* key of last request */
55 #endif /* !defined(PRIOCSCAN_USE_GLOBAL_POSITION) */
56 int cq_sortby; /* BUFQ_SORT_MASK */
57 rb_tree_ops_t cq_ops;
58 };
59
60 static signed int
61 buf_cmp(const struct buf *b1, const struct buf *b2, int sortby)
62 {
63
64 if (buf_inorder(b2, b1, sortby)) {
65 return 1; /* b1 > b2 */
66 }
67 if (buf_inorder(b1, b2, sortby)) {
68 return -1; /* b1 < b2 */
69 }
70 return 0;
71 }
72
73 /* return positive if n1 > n2 */
74 static signed int
75 cscan_tree_compare_nodes(void *context, const void *n1, const void *n2)
76 {
77 const struct cscan_queue * const q = context;
78 const struct buf * const b1 = n1;
79 const struct buf * const b2 = n2;
80 const int sortby = q->cq_sortby;
81 const int diff = buf_cmp(b1, b2, sortby);
82
83 /*
84 * XXX rawblkno/cylinder might not be unique. eg. unbuffered i/o
85 */
86
87 if (diff != 0) {
88 return diff;
89 }
90
91 /*
92 * XXX rawblkno/cylinder might not be unique. eg. unbuffered i/o
93 */
94 if (b1 > b2) {
95 return 1;
96 }
97 if (b1 < b2) {
98 return -1;
99 }
100 return 0;
101 }
102
103 /* return positive if n1 > k2 */
104 static signed int
105 cscan_tree_compare_key(void *context, const void *n1, const void *k2)
106 {
107 const struct cscan_queue * const q = context;
108 const struct buf * const b1 = n1;
109 const struct cscan_key * const key = k2;
110 const struct buf tmp = {
111 .b_rawblkno = key->k_rawblkno,
112 .b_cylinder = key->k_cylinder,
113 };
114 const struct buf *b2 = &tmp;
115 const int sortby = q->cq_sortby;
116
117 return buf_cmp(b1, b2, sortby);
118 }
119
120 static void __unused
121 cscan_dump(struct cscan_queue *cq)
122 {
123 const int sortby = cq->cq_sortby;
124 struct buf *bp;
125
126 RB_TREE_FOREACH(bp, &cq->cq_buffers) {
127 if (sortby == BUFQ_SORT_RAWBLOCK) {
128 printf(" %jd", (intmax_t)bp->b_rawblkno);
129 } else {
130 printf(" %jd/%jd",
131 (intmax_t)bp->b_cylinder, (intmax_t)bp->b_rawblkno);
132 }
133 }
134 }
135
136 static inline bool
137 cscan_empty(struct cscan_queue *q)
138 {
139
140 /* XXX this might do more work than necessary */
141 return rb_tree_iterate(&q->cq_buffers, NULL, RB_DIR_LEFT) == NULL;
142 }
143
144 static void
145 cscan_put(struct cscan_queue *q, struct buf *bp)
146 {
147 struct buf *obp __diagused;
148
149 obp = rb_tree_insert_node(&q->cq_buffers, bp);
150 KASSERT(obp == bp); /* see cscan_tree_compare_nodes */
151 }
152
153 static struct buf *
154 cscan_get(struct cscan_queue *q, int remove, struct cscan_key *key)
155 {
156 struct buf *bp;
157
158 bp = rb_tree_find_node_geq(&q->cq_buffers, key);
159 KDASSERT(bp == NULL || cscan_tree_compare_key(q, bp, key) >= 0);
160 if (bp == NULL) {
161 bp = rb_tree_iterate(&q->cq_buffers, NULL, RB_DIR_LEFT);
162 KDASSERT(cscan_tree_compare_key(q, bp, key) < 0);
163 }
164 if (bp != NULL && remove) {
165 #if defined(DEBUG)
166 struct buf *nbp;
167 #endif /* defined(DEBUG) */
168
169 rb_tree_remove_node(&q->cq_buffers, bp);
170 /*
171 * remember the head position.
172 */
173 key->k_cylinder = bp->b_cylinder;
174 key->k_rawblkno = bp->b_rawblkno + (bp->b_bcount >> DEV_BSHIFT);
175 #if defined(DEBUG)
176 nbp = rb_tree_find_node_geq(&q->cq_buffers, key);
177 if (nbp != NULL && cscan_tree_compare_nodes(q, nbp, bp) < 0) {
178 panic("%s: wrong order %p < %p\n", __func__,
179 nbp, bp);
180 }
181 #endif /* defined(DEBUG) */
182 }
183 return bp;
184 }
185
186 static void
187 cscan_init(struct cscan_queue *q, int sortby)
188 {
189 static const rb_tree_ops_t cscan_tree_ops = {
190 .rbto_compare_nodes = cscan_tree_compare_nodes,
191 .rbto_compare_key = cscan_tree_compare_key,
192 .rbto_node_offset = offsetof(struct buf, b_u.u_rbnode),
193 .rbto_context = NULL,
194 };
195
196 q->cq_sortby = sortby;
197 /* XXX copy ops to workaround rbtree.h API limitation */
198 q->cq_ops = cscan_tree_ops;
199 q->cq_ops.rbto_context = q;
200 rb_tree_init(&q->cq_buffers, &q->cq_ops);
201 }
202
203 /*
204 * Per-prioritiy CSCAN.
205 *
206 * XXX probably we should have a way to raise
207 * priority of the on-queue requests.
208 */
209 #define PRIOCSCAN_NQUEUE 3
210
211 struct priocscan_queue {
212 struct cscan_queue q_queue;
213 unsigned int q_burst;
214 };
215
216 struct bufq_priocscan {
217 struct priocscan_queue bq_queue[PRIOCSCAN_NQUEUE];
218
219 #if defined(PRIOCSCAN_USE_GLOBAL_POSITION)
220 /*
221 * XXX using "global" head position can reduce positioning time
222 * when switching between queues.
223 * although it might affect against fairness.
224 */
225 struct cscan_key bq_lastkey;
226 #endif
227 };
228
229 /*
230 * how many requests to serve when having pending requests on other queues.
231 *
232 * XXX tune
233 * be careful: while making these values larger likely
234 * increases the total throughput, it can also increase latencies
235 * for some workloads.
236 */
237 const int priocscan_burst[] = {
238 64, 16, 4
239 };
240
241 static void bufq_priocscan_init(struct bufq_state *);
242 static void bufq_priocscan_put(struct bufq_state *, struct buf *);
243 static struct buf *bufq_priocscan_get(struct bufq_state *, int);
244
245 BUFQ_DEFINE(priocscan, 40, bufq_priocscan_init);
246
247 static inline struct cscan_queue *bufq_priocscan_selectqueue(
248 struct bufq_priocscan *, const struct buf *);
249
250 static inline struct cscan_queue *
251 bufq_priocscan_selectqueue(struct bufq_priocscan *q, const struct buf *bp)
252 {
253 static const int priocscan_priomap[] = {
254 [BPRIO_TIMENONCRITICAL] = 2,
255 [BPRIO_TIMELIMITED] = 1,
256 [BPRIO_TIMECRITICAL] = 0
257 };
258
259 return &q->bq_queue[priocscan_priomap[BIO_GETPRIO(bp)]].q_queue;
260 }
261
262 static void
263 bufq_priocscan_put(struct bufq_state *bufq, struct buf *bp)
264 {
265 struct bufq_priocscan *q = bufq->bq_private;
266 struct cscan_queue *cq;
267
268 cq = bufq_priocscan_selectqueue(q, bp);
269 cscan_put(cq, bp);
270 }
271
272 static struct buf *
273 bufq_priocscan_get(struct bufq_state *bufq, int remove)
274 {
275 struct bufq_priocscan *q = bufq->bq_private;
276 struct priocscan_queue *pq, *npq;
277 struct priocscan_queue *first; /* highest priority non-empty queue */
278 const struct priocscan_queue *epq;
279 struct buf *bp;
280 bool single; /* true if there's only one non-empty queue */
281
282 /*
283 * find the highest priority non-empty queue.
284 */
285 pq = &q->bq_queue[0];
286 epq = pq + PRIOCSCAN_NQUEUE;
287 for (; pq < epq; pq++) {
288 if (!cscan_empty(&pq->q_queue)) {
289 break;
290 }
291 }
292 if (pq == epq) {
293 /*
294 * all our queues are empty. there's nothing to serve.
295 */
296 return NULL;
297 }
298 first = pq;
299
300 /*
301 * scan the rest of queues.
302 *
303 * if we have two or more non-empty queues, we serve the highest
304 * priority one with non-zero burst count.
305 */
306 single = true;
307 for (npq = pq + 1; npq < epq; npq++) {
308 if (!cscan_empty(&npq->q_queue)) {
309 /*
310 * we found another non-empty queue.
311 * it means that a queue needs to consume its burst
312 * count to be served.
313 */
314 single = false;
315
316 /*
317 * check if our current candidate queue has already
318 * exhausted its burst count.
319 */
320 if (pq->q_burst > 0) {
321 break;
322 }
323 pq = npq;
324 }
325 }
326 if (single) {
327 /*
328 * there's only a non-empty queue.
329 * just serve it without consuming its burst count.
330 */
331 KASSERT(pq == first);
332 } else {
333 /*
334 * there are two or more non-empty queues.
335 */
336 if (pq->q_burst == 0) {
337 /*
338 * no queues can be served because they have already
339 * exhausted their burst count.
340 */
341 unsigned int i;
342 #ifdef DEBUG
343 for (i = 0; i < PRIOCSCAN_NQUEUE; i++) {
344 pq = &q->bq_queue[i];
345 if (!cscan_empty(&pq->q_queue) && pq->q_burst) {
346 panic("%s: inconsist", __func__);
347 }
348 }
349 #endif /* DEBUG */
350 /*
351 * reset burst counts.
352 */
353 if (remove) {
354 for (i = 0; i < PRIOCSCAN_NQUEUE; i++) {
355 pq = &q->bq_queue[i];
356 pq->q_burst = priocscan_burst[i];
357 }
358 }
359
360 /*
361 * serve the highest priority non-empty queue.
362 */
363 pq = first;
364 }
365 /*
366 * consume the burst count.
367 *
368 * XXX account only by number of requests. is it good enough?
369 */
370 if (remove) {
371 KASSERT(pq->q_burst > 0);
372 pq->q_burst--;
373 }
374 }
375
376 /*
377 * finally, get a request from the selected queue.
378 */
379 KDASSERT(!cscan_empty(&pq->q_queue));
380 bp = cscan_get(&pq->q_queue, remove,
381 #if defined(PRIOCSCAN_USE_GLOBAL_POSITION)
382 &q->bq_lastkey
383 #else /* defined(PRIOCSCAN_USE_GLOBAL_POSITION) */
384 &pq->q_queue.cq_lastkey
385 #endif /* defined(PRIOCSCAN_USE_GLOBAL_POSITION) */
386 );
387 KDASSERT(bp != NULL);
388 KDASSERT(&pq->q_queue == bufq_priocscan_selectqueue(q, bp));
389
390 return bp;
391 }
392
393 static struct buf *
394 bufq_priocscan_cancel(struct bufq_state *bufq, struct buf *bp)
395 {
396 struct bufq_priocscan * const q = bufq->bq_private;
397 unsigned int i;
398
399 for (i = 0; i < PRIOCSCAN_NQUEUE; i++) {
400 struct cscan_queue * const cq = &q->bq_queue[i].q_queue;
401 struct buf *it;
402
403 /*
404 * XXX probably could be faster but the cancel functionality
405 * is not widely used anyway.
406 */
407 RB_TREE_FOREACH(it, &cq->cq_buffers) {
408 if (it == bp) {
409 rb_tree_remove_node(&cq->cq_buffers, bp);
410 return bp;
411 }
412 }
413 }
414 return NULL;
415 }
416
417 static void
418 bufq_priocscan_fini(struct bufq_state *bufq)
419 {
420
421 KASSERT(bufq->bq_private != NULL);
422 kmem_free(bufq->bq_private, sizeof(struct bufq_priocscan));
423 }
424
425 static void
426 bufq_priocscan_init(struct bufq_state *bufq)
427 {
428 struct bufq_priocscan *q;
429 const int sortby = bufq->bq_flags & BUFQ_SORT_MASK;
430 unsigned int i;
431
432 bufq->bq_get = bufq_priocscan_get;
433 bufq->bq_put = bufq_priocscan_put;
434 bufq->bq_cancel = bufq_priocscan_cancel;
435 bufq->bq_fini = bufq_priocscan_fini;
436 bufq->bq_private = kmem_zalloc(sizeof(struct bufq_priocscan), KM_SLEEP);
437
438 q = bufq->bq_private;
439 for (i = 0; i < PRIOCSCAN_NQUEUE; i++) {
440 struct cscan_queue *cq = &q->bq_queue[i].q_queue;
441
442 cscan_init(cq, sortby);
443 }
444 }
445