uvm_pdpolicy_clockpro.c revision 1.1.2.10 1 /* $NetBSD: uvm_pdpolicy_clockpro.c,v 1.1.2.10 2006/03/24 14:02:24 yamt Exp $ */
2
3 /*-
4 * Copyright (c)2005, 2006 YAMAMOTO Takashi,
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29 /*
30 * CLOCK-Pro replacement policy:
31 * http://www.cs.wm.edu/hpcs/WWW/HTML/publications/abs05-3.html
32 *
33 * approximation of the list of non-resident pages using hash:
34 * http://linux-mm.org/ClockProApproximation
35 */
36
37 /* #define CLOCKPRO_DEBUG */
38
39 #if defined(PDSIM)
40
41 #include "pdsim.h"
42
43 #else /* defined(PDSIM) */
44
45 #include <sys/cdefs.h>
46 __KERNEL_RCSID(0, "$NetBSD: uvm_pdpolicy_clockpro.c,v 1.1.2.10 2006/03/24 14:02:24 yamt Exp $");
47
48 #include "opt_ddb.h"
49
50 #include <sys/param.h>
51 #include <sys/proc.h>
52 #include <sys/systm.h>
53 #include <sys/kernel.h>
54 #include <sys/vnode.h>
55 #include <sys/hash.h>
56
57 #include <uvm/uvm.h>
58 #include <uvm/uvm_pdpolicy_impl.h>
59
60 #if ((__STDC_VERSION__ - 0) >= 199901L)
61 #define DPRINTF(...) /* nothing */
62 #define WARN(...) printf(__VA_ARGS__)
63 #else /* ((__STDC_VERSION__ - 0) >= 199901L) */
64 #define DPRINTF(a...) /* nothing */ /* GCC */
65 #define WARN(a...) printf(a)
66 #endif /* ((__STDC_VERSION__ - 0) >= 199901L) */
67
68 #define dump(a) /* nothing */
69
70 #undef USEONCE2
71 #define LISTQ
72 #undef ADAPTIVE
73
74 #endif /* defined(PDSIM) */
75
76 #if !defined(CLOCKPRO_COLDPCT)
77 #define CLOCKPRO_COLDPCT 10
78 #endif /* !defined(CLOCKPRO_COLDPCT) */
79
80 #define CLOCKPRO_COLDPCTMAX 90
81
82 #if !defined(CLOCKPRO_HASHFACTOR)
83 #define CLOCKPRO_HASHFACTOR 2
84 #endif /* !defined(CLOCKPRO_HASHFACTOR) */
85
86 #define CLOCKPRO_NEWQMIN ((1024 * 1024) >> PAGE_SHIFT) /* XXX */
87
88 int clockpro_hashfactor = CLOCKPRO_HASHFACTOR;
89
90 PDPOL_EVCNT_DEFINE(nresrecordobj)
91 PDPOL_EVCNT_DEFINE(nresrecordanon)
92 PDPOL_EVCNT_DEFINE(nreslookup)
93 PDPOL_EVCNT_DEFINE(nresfoundobj)
94 PDPOL_EVCNT_DEFINE(nresfoundanon)
95 PDPOL_EVCNT_DEFINE(nresanonfree)
96 PDPOL_EVCNT_DEFINE(nresconflict)
97 PDPOL_EVCNT_DEFINE(nresoverwritten)
98 PDPOL_EVCNT_DEFINE(nreshandhot)
99
100 PDPOL_EVCNT_DEFINE(hhottakeover)
101 PDPOL_EVCNT_DEFINE(hhotref)
102 PDPOL_EVCNT_DEFINE(hhotunref)
103 PDPOL_EVCNT_DEFINE(hhotcold)
104 PDPOL_EVCNT_DEFINE(hhotcoldtest)
105
106 PDPOL_EVCNT_DEFINE(hcoldtakeover)
107 PDPOL_EVCNT_DEFINE(hcoldref)
108 PDPOL_EVCNT_DEFINE(hcoldunref)
109 PDPOL_EVCNT_DEFINE(hcoldreftest)
110 PDPOL_EVCNT_DEFINE(hcoldunreftest)
111 PDPOL_EVCNT_DEFINE(hcoldunreftestspeculative)
112 PDPOL_EVCNT_DEFINE(hcoldhot)
113
114 PDPOL_EVCNT_DEFINE(speculativeenqueue)
115 PDPOL_EVCNT_DEFINE(speculativehit1)
116 PDPOL_EVCNT_DEFINE(speculativehit2)
117 PDPOL_EVCNT_DEFINE(speculativemiss)
118
119 #define PQ_REFERENCED PQ_PRIVATE1
120 #define PQ_HOT PQ_PRIVATE2
121 #define PQ_TEST PQ_PRIVATE3
122 #define PQ_INITIALREF PQ_PRIVATE4
123 #if PQ_PRIVATE6 != PQ_PRIVATE5 * 2 || PQ_PRIVATE7 != PQ_PRIVATE6 * 2
124 #error PQ_PRIVATE
125 #endif
126 #define PQ_QMASK (PQ_PRIVATE5|PQ_PRIVATE6|PQ_PRIVATE7)
127 #define PQ_QFACTOR PQ_PRIVATE5
128 #define PQ_SPECULATIVE PQ_PRIVATE8
129
130 #define CLOCKPRO_NOQUEUE 0
131 #define CLOCKPRO_NEWQ 1 /* small queue to clear initial ref. */
132 #if defined(LISTQ)
133 #define CLOCKPRO_COLDQ 2
134 #define CLOCKPRO_HOTQ 3
135 #else /* defined(LISTQ) */
136 #define CLOCKPRO_COLDQ (2 + coldqidx) /* XXX */
137 #define CLOCKPRO_HOTQ (3 - coldqidx) /* XXX */
138 #endif /* defined(LISTQ) */
139 #define CLOCKPRO_LISTQ 4
140 #define CLOCKPRO_NQUEUE 4
141
142 static inline void
143 clockpro_setq(struct vm_page *pg, int qidx)
144 {
145 KASSERT(qidx >= CLOCKPRO_NOQUEUE);
146 KASSERT(qidx <= CLOCKPRO_NQUEUE);
147
148 pg->pqflags = (pg->pqflags & ~PQ_QMASK) | (qidx * PQ_QFACTOR);
149 }
150
151 static inline int
152 clockpro_getq(struct vm_page *pg)
153 {
154 int qidx;
155
156 qidx = (pg->pqflags & PQ_QMASK) / PQ_QFACTOR;
157 KASSERT(qidx >= CLOCKPRO_NOQUEUE);
158 KASSERT(qidx <= CLOCKPRO_NQUEUE);
159 return qidx;
160 }
161
162 typedef struct {
163 struct pglist q_q;
164 int q_len;
165 } pageq_t;
166
167 struct clockpro_state {
168 int s_npages;
169 int s_coldtarget;
170 int s_ncold;
171
172 int s_newqlenmax;
173 pageq_t s_q[CLOCKPRO_NQUEUE];
174
175 struct uvm_pctparam s_coldtargetpct;
176 };
177
178 static pageq_t *
179 clockpro_queue(struct clockpro_state *s, int qidx)
180 {
181
182 KASSERT(CLOCKPRO_NOQUEUE < qidx);
183 KASSERT(qidx <= CLOCKPRO_NQUEUE);
184
185 return &s->s_q[qidx - 1];
186 }
187
188 #if !defined(LISTQ)
189
190 static int coldqidx;
191
192 static void
193 clockpro_switchqueue(void)
194 {
195
196 coldqidx = 1 - coldqidx;
197 }
198
199 #endif /* !defined(LISTQ) */
200
201 static struct clockpro_state clockpro;
202 static struct clockpro_scanstate {
203 int ss_nscanned;
204 } scanstate;
205
206 /* ---------------------------------------- */
207
208 static void
209 pageq_init(pageq_t *q)
210 {
211
212 TAILQ_INIT(&q->q_q);
213 q->q_len = 0;
214 }
215
216 static int
217 pageq_len(const pageq_t *q)
218 {
219
220 return q->q_len;
221 }
222
223 static struct vm_page *
224 pageq_first(const pageq_t *q)
225 {
226
227 return TAILQ_FIRST(&q->q_q);
228 }
229
230 static void
231 pageq_insert_tail(pageq_t *q, struct vm_page *pg)
232 {
233
234 TAILQ_INSERT_TAIL(&q->q_q, pg, pageq);
235 q->q_len++;
236 }
237
238 static void
239 pageq_insert_head(pageq_t *q, struct vm_page *pg)
240 {
241
242 TAILQ_INSERT_HEAD(&q->q_q, pg, pageq);
243 q->q_len++;
244 }
245
246 static void
247 pageq_remove(pageq_t *q, struct vm_page *pg)
248 {
249
250 #if 1
251 KASSERT(clockpro_queue(&clockpro, clockpro_getq(pg)) == q);
252 #endif
253 KASSERT(q->q_len > 0);
254 TAILQ_REMOVE(&q->q_q, pg, pageq);
255 q->q_len--;
256 }
257
258 static struct vm_page *
259 pageq_remove_head(pageq_t *q)
260 {
261 struct vm_page *pg;
262
263 pg = TAILQ_FIRST(&q->q_q);
264 if (pg == NULL) {
265 KASSERT(q->q_len == 0);
266 return NULL;
267 }
268 pageq_remove(q, pg);
269 return pg;
270 }
271
272 /* ---------------------------------------- */
273
274 static void
275 clockpro_insert_tail(struct clockpro_state *s, int qidx, struct vm_page *pg)
276 {
277 pageq_t *q = clockpro_queue(s, qidx);
278
279 clockpro_setq(pg, qidx);
280 pageq_insert_tail(q, pg);
281 }
282
283 static void
284 clockpro_insert_head(struct clockpro_state *s, int qidx, struct vm_page *pg)
285 {
286 pageq_t *q = clockpro_queue(s, qidx);
287
288 clockpro_setq(pg, qidx);
289 pageq_insert_head(q, pg);
290 }
291
292 /* ---------------------------------------- */
293
294 typedef uint32_t nonres_cookie_t;
295 #define NONRES_COOKIE_INVAL 0
296
297 typedef uintptr_t objid_t;
298
299 /*
300 * XXX maybe these hash functions need reconsideration,
301 * given that hash distribution is critical here.
302 */
303
304 static uint32_t
305 pageidentityhash1(objid_t obj, off_t idx)
306 {
307 uint32_t hash = HASH32_BUF_INIT;
308
309 #if 1
310 hash = hash32_buf(&idx, sizeof(idx), hash);
311 hash = hash32_buf(&obj, sizeof(obj), hash);
312 #else
313 hash = hash32_buf(&obj, sizeof(obj), hash);
314 hash = hash32_buf(&idx, sizeof(idx), hash);
315 #endif
316 return hash;
317 }
318
319 static uint32_t
320 pageidentityhash2(objid_t obj, off_t idx)
321 {
322 uint32_t hash = HASH32_BUF_INIT;
323
324 hash = hash32_buf(&obj, sizeof(obj), hash);
325 hash = hash32_buf(&idx, sizeof(idx), hash);
326 return hash;
327 }
328
329 static nonres_cookie_t
330 calccookie(objid_t obj, off_t idx)
331 {
332 uint32_t hash = pageidentityhash2(obj, idx);
333 nonres_cookie_t cookie = hash;
334
335 if (__predict_false(cookie == NONRES_COOKIE_INVAL)) {
336 cookie++; /* XXX */
337 }
338 return cookie;
339 }
340
341 #define BUCKETSIZE 14
342 struct bucket {
343 int cycle;
344 int cur;
345 nonres_cookie_t pages[BUCKETSIZE];
346 };
347 static int cycle_target;
348 static int cycle_target_frac;
349
350 static struct bucket static_bucket;
351 static struct bucket *buckets = &static_bucket;
352 static size_t hashsize = 1;
353
354 static int coldadj;
355 #define COLDTARGET_ADJ(d) coldadj += (d)
356
357 #if defined(PDSIM)
358
359 static void *
360 clockpro_hashalloc(int n)
361 {
362 size_t allocsz = sizeof(*buckets) * n;
363
364 return malloc(allocsz);
365 }
366
367 static void
368 clockpro_hashfree(void *p, int n)
369 {
370
371 free(p);
372 }
373
374 #else /* defined(PDSIM) */
375
376 static void *
377 clockpro_hashalloc(int n)
378 {
379 size_t allocsz = round_page(sizeof(*buckets) * n);
380
381 return (void *)uvm_km_alloc(kernel_map, allocsz, 0, UVM_KMF_WIRED);
382 }
383
384 static void
385 clockpro_hashfree(void *p, int n)
386 {
387 size_t allocsz = round_page(sizeof(*buckets) * n);
388
389 uvm_km_free(kernel_map, (vaddr_t)p, allocsz, UVM_KMF_WIRED);
390 }
391
392 #endif /* defined(PDSIM) */
393
394 static void
395 clockpro_hashinit(uint64_t n)
396 {
397 struct bucket *newbuckets;
398 struct bucket *oldbuckets;
399 size_t sz;
400 size_t oldsz;
401 int i;
402
403 sz = howmany(n, BUCKETSIZE);
404 sz *= clockpro_hashfactor;
405 newbuckets = clockpro_hashalloc(sz);
406 if (newbuckets == NULL) {
407 panic("%s: allocation failure", __func__);
408 }
409 for (i = 0; i < sz; i++) {
410 struct bucket *b = &newbuckets[i];
411 int j;
412
413 b->cycle = cycle_target;
414 b->cur = 0;
415 for (j = 0; j < BUCKETSIZE; j++) {
416 b->pages[j] = NONRES_COOKIE_INVAL;
417 }
418 }
419 /* XXX lock */
420 oldbuckets = buckets;
421 oldsz = hashsize;
422 buckets = newbuckets;
423 hashsize = sz;
424 /* XXX unlock */
425 if (oldbuckets != &static_bucket) {
426 clockpro_hashfree(oldbuckets, oldsz);
427 }
428 }
429
430 static struct bucket *
431 nonresident_getbucket(objid_t obj, off_t idx)
432 {
433 uint32_t hash;
434
435 hash = pageidentityhash1(obj, idx);
436 return &buckets[hash % hashsize];
437 }
438
439 static void
440 nonresident_rotate(struct bucket *b)
441 {
442
443 while (b->cycle - cycle_target < 0) {
444 if (b->pages[b->cur] != NONRES_COOKIE_INVAL) {
445 PDPOL_EVCNT_INCR(nreshandhot);
446 COLDTARGET_ADJ(-1);
447 }
448 b->pages[b->cur] = NONRES_COOKIE_INVAL;
449 b->cur = (b->cur + 1) % BUCKETSIZE;
450 b->cycle++;
451 }
452 }
453
454 static boolean_t
455 nonresident_lookupremove(objid_t obj, off_t idx)
456 {
457 struct bucket *b = nonresident_getbucket(obj, idx);
458 nonres_cookie_t cookie = calccookie(obj, idx);
459 int i;
460
461 nonresident_rotate(b);
462 for (i = 0; i < BUCKETSIZE; i++) {
463 if (b->pages[i] == cookie) {
464 b->pages[i] = NONRES_COOKIE_INVAL;
465 return TRUE;
466 }
467 }
468 return FALSE;
469 }
470
471 static objid_t
472 pageobj(struct vm_page *pg)
473 {
474 const void *obj;
475
476 /*
477 * XXX object pointer is often freed and reused for unrelated object.
478 * for vnodes, it would be better to use something like
479 * a hash of fsid/fileid/generation.
480 */
481
482 obj = pg->uobject;
483 if (obj == NULL) {
484 obj = pg->uanon;
485 KASSERT(obj != NULL);
486 KASSERT(pg->offset == 0);
487 }
488
489 return (objid_t)obj;
490 }
491
492 static off_t
493 pageidx(struct vm_page *pg)
494 {
495
496 KASSERT((pg->offset & PAGE_MASK) == 0);
497 return pg->offset >> PAGE_SHIFT;
498 }
499
500 static boolean_t
501 nonresident_pagelookupremove(struct vm_page *pg)
502 {
503 boolean_t found = nonresident_lookupremove(pageobj(pg), pageidx(pg));
504
505 PDPOL_EVCNT_INCR(nreslookup);
506 if (found) {
507 if (pg->uobject) {
508 PDPOL_EVCNT_INCR(nresfoundobj);
509 } else {
510 PDPOL_EVCNT_INCR(nresfoundanon);
511 }
512 }
513 return found;
514 }
515
516 static void
517 nonresident_pagerecord(struct vm_page *pg)
518 {
519 objid_t obj = pageobj(pg);
520 off_t idx = pageidx(pg);
521 struct bucket *b = nonresident_getbucket(obj, idx);
522 nonres_cookie_t cookie = calccookie(obj, idx);
523
524 #if defined(DEBUG)
525 int i;
526
527 for (i = 0; i < BUCKETSIZE; i++) {
528 if (b->pages[i] == cookie) {
529 PDPOL_EVCNT_INCR(nresconflict);
530 }
531 }
532 #endif /* defined(DEBUG) */
533
534 if (pg->uobject) {
535 PDPOL_EVCNT_INCR(nresrecordobj);
536 } else {
537 PDPOL_EVCNT_INCR(nresrecordanon);
538 }
539 nonresident_rotate(b);
540 if (b->pages[b->cur] != NONRES_COOKIE_INVAL) {
541 PDPOL_EVCNT_INCR(nresoverwritten);
542 COLDTARGET_ADJ(-1);
543 }
544 b->pages[b->cur] = cookie;
545 b->cur = (b->cur + 1) % BUCKETSIZE;
546 }
547
548 /* ---------------------------------------- */
549
550 #if defined(CLOCKPRO_DEBUG)
551 static void
552 check_sanity(void)
553 {
554 }
555 #else /* defined(CLOCKPRO_DEBUG) */
556 #define check_sanity() /* nothing */
557 #endif /* defined(CLOCKPRO_DEBUG) */
558
559 static void
560 clockpro_reinit(void)
561 {
562
563 clockpro_hashinit(uvmexp.npages);
564 }
565
566 static void
567 clockpro_init(void)
568 {
569 struct clockpro_state *s = &clockpro;
570 int i;
571
572 for (i = 0; i < CLOCKPRO_NQUEUE; i++) {
573 pageq_init(&s->s_q[i]);
574 }
575 s->s_newqlenmax = 1;
576 s->s_coldtarget = 1;
577 uvm_pctparam_init(&s->s_coldtargetpct, CLOCKPRO_COLDPCT, NULL);
578 }
579
580 static void
581 clockpro_tune(void)
582 {
583 struct clockpro_state *s = &clockpro;
584 int coldtarget;
585
586 #if defined(ADAPTIVE)
587 int coldmax = s->s_npages * CLOCKPRO_COLDPCTMAX / 100;
588 int coldmin = 1;
589
590 coldtarget = s->s_coldtarget;
591 if (coldtarget + coldadj < coldmin) {
592 coldadj = coldmin - coldtarget;
593 } else if (coldtarget + coldadj > coldmax) {
594 coldadj = coldmax - coldtarget;
595 }
596 coldtarget += coldadj;
597 #else /* defined(ADAPTIVE) */
598 coldtarget = UVM_PCTPARAM_APPLY(&s->s_coldtargetpct, s->s_npages);
599 if (coldtarget < 1) {
600 coldtarget = 1;
601 }
602 #endif /* defined(ADAPTIVE) */
603
604 s->s_coldtarget = coldtarget;
605 s->s_newqlenmax = coldtarget / 4;
606 if (s->s_newqlenmax < CLOCKPRO_NEWQMIN) {
607 s->s_newqlenmax = CLOCKPRO_NEWQMIN;
608 }
609 }
610
611 static void
612 clockpro_movereferencebit(struct vm_page *pg)
613 {
614 boolean_t referenced;
615
616 referenced = pmap_clear_reference(pg);
617 if (referenced) {
618 pg->pqflags |= PQ_REFERENCED;
619 }
620 }
621
622 static void
623 clockpro_clearreferencebit(struct vm_page *pg)
624 {
625
626 clockpro_movereferencebit(pg);
627 pg->pqflags &= ~PQ_REFERENCED;
628 }
629
630 static void
631 clockpro___newqrotate(int len)
632 {
633 struct clockpro_state * const s = &clockpro;
634 pageq_t * const newq = clockpro_queue(s, CLOCKPRO_NEWQ);
635 struct vm_page *pg;
636
637 while (pageq_len(newq) > len) {
638 pg = pageq_remove_head(newq);
639 KASSERT(pg != NULL);
640 KASSERT(clockpro_getq(pg) == CLOCKPRO_NEWQ);
641 if ((pg->pqflags & PQ_INITIALREF) != 0) {
642 clockpro_clearreferencebit(pg);
643 pg->pqflags &= ~PQ_INITIALREF;
644 }
645 /* place at the list head */
646 clockpro_insert_tail(s, CLOCKPRO_COLDQ, pg);
647 }
648 }
649
650 static void
651 clockpro_newqrotate(void)
652 {
653 struct clockpro_state * const s = &clockpro;
654
655 check_sanity();
656 clockpro___newqrotate(s->s_newqlenmax);
657 check_sanity();
658 }
659
660 static void
661 clockpro_newqflush(int n)
662 {
663
664 check_sanity();
665 clockpro___newqrotate(n);
666 check_sanity();
667 }
668
669 static void
670 clockpro_newqflushone(void)
671 {
672 struct clockpro_state * const s = &clockpro;
673
674 clockpro_newqflush(
675 MAX(pageq_len(clockpro_queue(s, CLOCKPRO_NEWQ)) - 1, 0));
676 }
677
678 /*
679 * our "tail" is called "list-head" in the paper.
680 */
681
682 static void
683 clockpro___enqueuetail(struct vm_page *pg)
684 {
685 struct clockpro_state * const s = &clockpro;
686
687 KASSERT(clockpro_getq(pg) == CLOCKPRO_NOQUEUE);
688
689 check_sanity();
690 #if !defined(USEONCE2)
691 clockpro_insert_tail(s, CLOCKPRO_NEWQ, pg);
692 clockpro_newqrotate();
693 #else /* !defined(USEONCE2) */
694 #if defined(LISTQ)
695 KASSERT((pg->pqflags & PQ_REFERENCED) == 0);
696 #endif /* defined(LISTQ) */
697 clockpro_insert_tail(s, CLOCKPRO_COLDQ, pg);
698 #endif /* !defined(USEONCE2) */
699 check_sanity();
700 }
701
702 static void
703 clockpro_pageenqueue(struct vm_page *pg)
704 {
705 struct clockpro_state * const s = &clockpro;
706 boolean_t hot;
707 boolean_t speculative = (pg->pqflags & PQ_SPECULATIVE) != 0; /* XXX */
708
709 KASSERT((~pg->pqflags & (PQ_INITIALREF|PQ_SPECULATIVE)) != 0);
710 UVM_LOCK_ASSERT_PAGEQ();
711 check_sanity();
712 KASSERT(clockpro_getq(pg) == CLOCKPRO_NOQUEUE);
713 s->s_npages++;
714 pg->pqflags &= ~(PQ_HOT|PQ_TEST);
715 if (speculative) {
716 hot = FALSE;
717 PDPOL_EVCNT_INCR(speculativeenqueue);
718 } else {
719 hot = nonresident_pagelookupremove(pg);
720 if (hot) {
721 COLDTARGET_ADJ(1);
722 }
723 }
724
725 /*
726 * consider mmap'ed file:
727 *
728 * - read-ahead enqueues a page.
729 *
730 * - on the following read-ahead hit, the fault handler activates it.
731 *
732 * - finally, the userland code which caused the above fault
733 * actually accesses the page. it makes its reference bit set.
734 *
735 * we want to count the above as a single access, rather than
736 * three accesses with short reuse distances.
737 */
738
739 #if defined(USEONCE2)
740 pg->pqflags &= ~PQ_INITIALREF;
741 if (hot) {
742 pg->pqflags |= PQ_TEST;
743 }
744 s->s_ncold++;
745 clockpro_clearreferencebit(pg);
746 clockpro___enqueuetail(pg);
747 #else /* defined(USEONCE2) */
748 if (speculative) {
749 s->s_ncold++;
750 } else if (hot) {
751 pg->pqflags |= PQ_HOT;
752 } else {
753 pg->pqflags |= PQ_TEST;
754 s->s_ncold++;
755 }
756 clockpro___enqueuetail(pg);
757 #endif /* defined(USEONCE2) */
758 KASSERT(s->s_ncold <= s->s_npages);
759 }
760
761 static pageq_t *
762 clockpro_pagequeue(struct vm_page *pg)
763 {
764 struct clockpro_state * const s = &clockpro;
765 int qidx;
766
767 qidx = clockpro_getq(pg);
768 KASSERT(qidx != CLOCKPRO_NOQUEUE);
769
770 return clockpro_queue(s, qidx);
771 }
772
773 static void
774 clockpro_pagedequeue(struct vm_page *pg)
775 {
776 struct clockpro_state * const s = &clockpro;
777 pageq_t *q;
778
779 KASSERT(s->s_npages > 0);
780 check_sanity();
781 q = clockpro_pagequeue(pg);
782 pageq_remove(q, pg);
783 check_sanity();
784 clockpro_setq(pg, CLOCKPRO_NOQUEUE);
785 if ((pg->pqflags & PQ_HOT) == 0) {
786 KASSERT(s->s_ncold > 0);
787 s->s_ncold--;
788 }
789 KASSERT(s->s_npages > 0);
790 s->s_npages--;
791 check_sanity();
792 }
793
794 static void
795 clockpro_pagerequeue(struct vm_page *pg)
796 {
797 struct clockpro_state * const s = &clockpro;
798 int qidx;
799
800 qidx = clockpro_getq(pg);
801 KASSERT(qidx == CLOCKPRO_HOTQ || qidx == CLOCKPRO_COLDQ);
802 pageq_remove(clockpro_queue(s, qidx), pg);
803 check_sanity();
804 clockpro_setq(pg, CLOCKPRO_NOQUEUE);
805
806 clockpro___enqueuetail(pg);
807 }
808
809 static void
810 handhot_endtest(struct vm_page *pg)
811 {
812
813 KASSERT((pg->pqflags & PQ_HOT) == 0);
814 if ((pg->pqflags & PQ_TEST) != 0) {
815 PDPOL_EVCNT_INCR(hhotcoldtest);
816 COLDTARGET_ADJ(-1);
817 pg->pqflags &= ~PQ_TEST;
818 } else {
819 PDPOL_EVCNT_INCR(hhotcold);
820 }
821 }
822
823 static void
824 handhot_advance(void)
825 {
826 struct clockpro_state * const s = &clockpro;
827 struct vm_page *pg;
828 pageq_t *hotq;
829 int hotqlen;
830
831 clockpro_tune();
832
833 dump("hot called");
834 if (s->s_ncold >= s->s_coldtarget) {
835 return;
836 }
837 hotq = clockpro_queue(s, CLOCKPRO_HOTQ);
838 again:
839 pg = pageq_first(hotq);
840 if (pg == NULL) {
841 DPRINTF("%s: HHOT TAKEOVER\n", __func__);
842 dump("hhottakeover");
843 PDPOL_EVCNT_INCR(hhottakeover);
844 #if defined(LISTQ)
845 while (/* CONSTCOND */ 1) {
846 pageq_t *coldq = clockpro_queue(s, CLOCKPRO_COLDQ);
847
848 pg = pageq_first(coldq);
849 if (pg == NULL) {
850 clockpro_newqflushone();
851 pg = pageq_first(coldq);
852 if (pg == NULL) {
853 WARN("hhot: no page?\n");
854 return;
855 }
856 }
857 KASSERT(clockpro_pagequeue(pg) == coldq);
858 pageq_remove(coldq, pg);
859 check_sanity();
860 if ((pg->pqflags & PQ_HOT) == 0) {
861 handhot_endtest(pg);
862 clockpro_insert_tail(s, CLOCKPRO_LISTQ, pg);
863 } else {
864 clockpro_insert_head(s, CLOCKPRO_HOTQ, pg);
865 break;
866 }
867 }
868 #else /* defined(LISTQ) */
869 clockpro_newqflush(0); /* XXX XXX */
870 clockpro_switchqueue();
871 hotq = clockpro_queue(s, CLOCKPRO_HOTQ);
872 goto again;
873 #endif /* defined(LISTQ) */
874 }
875
876 KASSERT(clockpro_pagequeue(pg) == hotq);
877
878 /*
879 * terminate test period of nonresident pages by cycling them.
880 */
881
882 cycle_target_frac += BUCKETSIZE;
883 hotqlen = pageq_len(hotq);
884 while (cycle_target_frac >= hotqlen) {
885 cycle_target++;
886 cycle_target_frac -= hotqlen;
887 }
888
889 if ((pg->pqflags & PQ_HOT) == 0) {
890 #if defined(LISTQ)
891 panic("cold page in hotq: %p", pg);
892 #else /* defined(LISTQ) */
893 handhot_endtest(pg);
894 goto next;
895 #endif /* defined(LISTQ) */
896 }
897 KASSERT((pg->pqflags & PQ_TEST) == 0);
898 KASSERT((pg->pqflags & PQ_INITIALREF) == 0);
899 KASSERT((pg->pqflags & PQ_SPECULATIVE) == 0);
900
901 /*
902 * once we met our target,
903 * stop at a hot page so that no cold pages in test period
904 * have larger recency than any hot pages.
905 */
906
907 if (s->s_ncold >= s->s_coldtarget) {
908 dump("hot done");
909 return;
910 }
911 clockpro_movereferencebit(pg);
912 if ((pg->pqflags & PQ_REFERENCED) == 0) {
913 PDPOL_EVCNT_INCR(hhotunref);
914 uvmexp.pddeact++;
915 pg->pqflags &= ~PQ_HOT;
916 clockpro.s_ncold++;
917 KASSERT(s->s_ncold <= s->s_npages);
918 } else {
919 PDPOL_EVCNT_INCR(hhotref);
920 }
921 pg->pqflags &= ~PQ_REFERENCED;
922 #if !defined(LISTQ)
923 next:
924 #endif /* !defined(LISTQ) */
925 clockpro_pagerequeue(pg);
926 dump("hot");
927 goto again;
928 }
929
930 static struct vm_page *
931 handcold_advance(void)
932 {
933 struct clockpro_state * const s = &clockpro;
934 struct vm_page *pg;
935
936 for (;;) {
937 pageq_t *listq = clockpro_queue(s, CLOCKPRO_LISTQ);
938 pageq_t *coldq;
939
940 clockpro_newqrotate();
941 handhot_advance();
942 #if defined(LISTQ)
943 pg = pageq_first(listq);
944 if (pg != NULL) {
945 KASSERT(clockpro_getq(pg) == CLOCKPRO_LISTQ);
946 KASSERT((pg->pqflags & PQ_TEST) == 0);
947 KASSERT((pg->pqflags & PQ_HOT) == 0);
948 KASSERT((pg->pqflags & PQ_INITIALREF) == 0);
949 pageq_remove(listq, pg);
950 check_sanity();
951 clockpro_insert_head(s, CLOCKPRO_COLDQ, pg); /* XXX */
952 goto gotcold;
953 }
954 #endif /* defined(LISTQ) */
955 check_sanity();
956 coldq = clockpro_queue(s, CLOCKPRO_COLDQ);
957 pg = pageq_first(coldq);
958 if (pg == NULL) {
959 clockpro_newqflushone();
960 pg = pageq_first(coldq);
961 }
962 if (pg == NULL) {
963 DPRINTF("%s: HCOLD TAKEOVER\n", __func__);
964 dump("hcoldtakeover");
965 PDPOL_EVCNT_INCR(hcoldtakeover);
966 KASSERT(
967 pageq_len(clockpro_queue(s, CLOCKPRO_NEWQ)) == 0);
968 #if defined(LISTQ)
969 KASSERT(
970 pageq_len(clockpro_queue(s, CLOCKPRO_HOTQ)) == 0);
971 #else /* defined(LISTQ) */
972 clockpro_switchqueue();
973 coldq = clockpro_queue(s, CLOCKPRO_COLDQ);
974 pg = pageq_first(coldq);
975 #endif /* defined(LISTQ) */
976 }
977 if (pg == NULL) {
978 WARN("hcold: no page?\n");
979 return NULL;
980 }
981 KASSERT((pg->pqflags & PQ_INITIALREF) == 0);
982 if ((pg->pqflags & PQ_HOT) != 0) {
983 PDPOL_EVCNT_INCR(hcoldhot);
984 pageq_remove(coldq, pg);
985 clockpro_insert_tail(s, CLOCKPRO_HOTQ, pg);
986 check_sanity();
987 KASSERT((pg->pqflags & PQ_TEST) == 0);
988 uvmexp.pdscans++;
989 continue;
990 }
991 #if defined(LISTQ)
992 gotcold:
993 #endif /* defined(LISTQ) */
994 KASSERT((pg->pqflags & PQ_HOT) == 0);
995 uvmexp.pdscans++;
996 clockpro_movereferencebit(pg);
997 if ((pg->pqflags & PQ_SPECULATIVE) != 0) {
998 KASSERT((pg->pqflags & PQ_TEST) == 0);
999 if ((pg->pqflags & PQ_REFERENCED) != 0) {
1000 PDPOL_EVCNT_INCR(speculativehit2);
1001 pg->pqflags &= ~(PQ_SPECULATIVE|PQ_REFERENCED);
1002 clockpro_pagedequeue(pg);
1003 clockpro_pageenqueue(pg);
1004 continue;
1005 }
1006 PDPOL_EVCNT_INCR(speculativemiss);
1007 }
1008 switch (pg->pqflags & (PQ_REFERENCED|PQ_TEST)) {
1009 case PQ_TEST:
1010 PDPOL_EVCNT_INCR(hcoldunreftest);
1011 nonresident_pagerecord(pg);
1012 goto gotit;
1013 case 0:
1014 PDPOL_EVCNT_INCR(hcoldunref);
1015 gotit:
1016 KASSERT(s->s_ncold > 0);
1017 clockpro_pagerequeue(pg); /* XXX */
1018 dump("cold done");
1019 /* XXX "pg" is still in queue */
1020 handhot_advance();
1021 goto done;
1022
1023 case PQ_REFERENCED|PQ_TEST:
1024 PDPOL_EVCNT_INCR(hcoldreftest);
1025 s->s_ncold--;
1026 COLDTARGET_ADJ(1);
1027 pg->pqflags |= PQ_HOT;
1028 pg->pqflags &= ~PQ_TEST;
1029 break;
1030
1031 case PQ_REFERENCED:
1032 PDPOL_EVCNT_INCR(hcoldref);
1033 pg->pqflags |= PQ_TEST;
1034 break;
1035 }
1036 pg->pqflags &= ~PQ_REFERENCED;
1037 uvmexp.pdreact++;
1038 /* move to the list head */
1039 clockpro_pagerequeue(pg);
1040 dump("cold");
1041 }
1042 done:;
1043 return pg;
1044 }
1045
1046 void
1047 uvmpdpol_pageactivate(struct vm_page *pg)
1048 {
1049
1050 if (!uvmpdpol_pageisqueued_p(pg)) {
1051 KASSERT((pg->pqflags & PQ_SPECULATIVE) == 0);
1052 pg->pqflags |= PQ_INITIALREF;
1053 clockpro_pageenqueue(pg);
1054 } else if ((pg->pqflags & PQ_SPECULATIVE)) {
1055 PDPOL_EVCNT_INCR(speculativehit1);
1056 pg->pqflags &= ~PQ_SPECULATIVE;
1057 pg->pqflags |= PQ_INITIALREF;
1058 clockpro_pagedequeue(pg);
1059 clockpro_pageenqueue(pg);
1060 }
1061 pg->pqflags |= PQ_REFERENCED;
1062 }
1063
1064 void
1065 uvmpdpol_pagedeactivate(struct vm_page *pg)
1066 {
1067
1068 pg->pqflags &= ~PQ_REFERENCED;
1069 }
1070
1071 void
1072 uvmpdpol_pagedequeue(struct vm_page *pg)
1073 {
1074
1075 if (!uvmpdpol_pageisqueued_p(pg)) {
1076 return;
1077 }
1078 clockpro_pagedequeue(pg);
1079 pg->pqflags &= ~PQ_SPECULATIVE;
1080 }
1081
1082 void
1083 uvmpdpol_pageenqueue(struct vm_page *pg)
1084 {
1085
1086 #if 1
1087 if (uvmpdpol_pageisqueued_p(pg)) {
1088 return;
1089 }
1090 clockpro_clearreferencebit(pg);
1091 pg->pqflags |= PQ_SPECULATIVE;
1092 clockpro_pageenqueue(pg);
1093 #else
1094 uvmpdpol_pageactivate(pg);
1095 #endif
1096 }
1097
1098 void
1099 uvmpdpol_anfree(struct vm_anon *an)
1100 {
1101
1102 KASSERT(an->an_page == NULL);
1103 if (nonresident_lookupremove((objid_t)an, 0)) {
1104 PDPOL_EVCNT_INCR(nresanonfree);
1105 }
1106 }
1107
1108 void
1109 uvmpdpol_init(void)
1110 {
1111
1112 clockpro_init();
1113 }
1114
1115 void
1116 uvmpdpol_reinit(void)
1117 {
1118
1119 clockpro_reinit();
1120 }
1121
1122 void
1123 uvmpdpol_estimatepageable(int *active, int *inactive)
1124 {
1125 struct clockpro_state * const s = &clockpro;
1126
1127 if (active) {
1128 *active = s->s_npages - s->s_ncold;
1129 }
1130 if (inactive) {
1131 *inactive = s->s_ncold;
1132 }
1133 }
1134
1135 boolean_t
1136 uvmpdpol_pageisqueued_p(struct vm_page *pg)
1137 {
1138
1139 return clockpro_getq(pg) != CLOCKPRO_NOQUEUE;
1140 }
1141
1142 void
1143 uvmpdpol_scaninit(void)
1144 {
1145 struct clockpro_scanstate * const ss = &scanstate;
1146
1147 ss->ss_nscanned = 0;
1148 }
1149
1150 struct vm_page *
1151 uvmpdpol_selectvictim(void)
1152 {
1153 struct clockpro_state * const s = &clockpro;
1154 struct clockpro_scanstate * const ss = &scanstate;
1155 struct vm_page *pg;
1156
1157 if (ss->ss_nscanned > s->s_npages) {
1158 DPRINTF("scan too much\n");
1159 return NULL;
1160 }
1161 pg = handcold_advance();
1162 ss->ss_nscanned++;
1163 return pg;
1164 }
1165
1166 static void
1167 clockpro_dropswap(pageq_t *q, int *todo)
1168 {
1169 struct vm_page *pg;
1170
1171 TAILQ_FOREACH_REVERSE(pg, &q->q_q, pglist, pageq) {
1172 if (*todo <= 0) {
1173 break;
1174 }
1175 if ((pg->pqflags & PQ_HOT) == 0) {
1176 continue;
1177 }
1178 if ((pg->pqflags & PQ_SWAPBACKED) == 0) {
1179 continue;
1180 }
1181 if (uvmpd_trydropswap(pg)) {
1182 (*todo)--;
1183 }
1184 }
1185 }
1186
1187 void
1188 uvmpdpol_balancequeue(int swap_shortage)
1189 {
1190 struct clockpro_state * const s = &clockpro;
1191 int todo = swap_shortage;
1192
1193 if (todo == 0) {
1194 return;
1195 }
1196
1197 /*
1198 * reclaim swap slots from hot pages
1199 */
1200
1201 DPRINTF("%s: swap_shortage=%d\n", __func__, swap_shortage);
1202
1203 clockpro_dropswap(clockpro_queue(s, CLOCKPRO_NEWQ), &todo);
1204 clockpro_dropswap(clockpro_queue(s, CLOCKPRO_COLDQ), &todo);
1205 clockpro_dropswap(clockpro_queue(s, CLOCKPRO_HOTQ), &todo);
1206
1207 DPRINTF("%s: done=%d\n", __func__, swap_shortage - todo);
1208 }
1209
1210 boolean_t
1211 uvmpdpol_needsscan_p(void)
1212 {
1213 struct clockpro_state * const s = &clockpro;
1214
1215 if (s->s_ncold < s->s_coldtarget) {
1216 return TRUE;
1217 }
1218 return FALSE;
1219 }
1220
1221 void
1222 uvmpdpol_tune(void)
1223 {
1224
1225 clockpro_tune();
1226 }
1227
1228 #if !defined(PDSIM)
1229
1230 #include <sys/sysctl.h> /* XXX SYSCTL_DESCR */
1231
1232 void
1233 uvmpdpol_sysctlsetup(void)
1234 {
1235 #if !defined(ADAPTIVE)
1236 struct clockpro_state * const s = &clockpro;
1237
1238 uvm_pctparam_createsysctlnode(&s->s_coldtargetpct, "coldtargetpct",
1239 SYSCTL_DESCR("Percentage cold target queue of the entire queue"));
1240 #endif /* !defined(ADAPTIVE) */
1241 }
1242
1243 #endif /* !defined(PDSIM) */
1244
1245 #if defined(DDB)
1246
1247 void clockpro_dump(void);
1248
1249 void
1250 clockpro_dump(void)
1251 {
1252 struct clockpro_state * const s = &clockpro;
1253
1254 struct vm_page *pg;
1255 int ncold, nhot, ntest, nspeculative, ninitialref, nref;
1256 int newqlen, coldqlen, hotqlen, listqlen;
1257
1258 newqlen = coldqlen = hotqlen = listqlen = 0;
1259 printf("npages=%d, ncold=%d, coldtarget=%d, newqlenmax=%d\n",
1260 s->s_npages, s->s_ncold, s->s_coldtarget, s->s_newqlenmax);
1261
1262 #define INITCOUNT() \
1263 ncold = nhot = ntest = nspeculative = ninitialref = nref = 0
1264
1265 #define COUNT(pg) \
1266 if ((pg->pqflags & PQ_HOT) != 0) { \
1267 nhot++; \
1268 } else { \
1269 ncold++; \
1270 if ((pg->pqflags & PQ_TEST) != 0) { \
1271 ntest++; \
1272 } \
1273 if ((pg->pqflags & PQ_SPECULATIVE) != 0) { \
1274 nspeculative++; \
1275 } \
1276 if ((pg->pqflags & PQ_INITIALREF) != 0) { \
1277 ninitialref++; \
1278 } else if ((pg->pqflags & PQ_REFERENCED) != 0 || \
1279 pmap_is_referenced(pg)) { \
1280 nref++; \
1281 } \
1282 }
1283
1284 #define PRINTCOUNT(name) \
1285 printf("%s hot=%d, cold=%d, test=%d, speculative=%d, initialref=%d, " \
1286 "nref=%d\n", \
1287 (name), nhot, ncold, ntest, nspeculative, ninitialref, nref)
1288
1289 INITCOUNT();
1290 TAILQ_FOREACH(pg, &clockpro_queue(s, CLOCKPRO_NEWQ)->q_q, pageq) {
1291 if (clockpro_getq(pg) != CLOCKPRO_NEWQ) {
1292 printf("newq corrupt %p\n", pg);
1293 }
1294 COUNT(pg)
1295 newqlen++;
1296 }
1297 PRINTCOUNT("newq");
1298
1299 INITCOUNT();
1300 TAILQ_FOREACH(pg, &clockpro_queue(s, CLOCKPRO_COLDQ)->q_q, pageq) {
1301 if (clockpro_getq(pg) != CLOCKPRO_COLDQ) {
1302 printf("coldq corrupt %p\n", pg);
1303 }
1304 COUNT(pg)
1305 coldqlen++;
1306 }
1307 PRINTCOUNT("coldq");
1308
1309 INITCOUNT();
1310 TAILQ_FOREACH(pg, &clockpro_queue(s, CLOCKPRO_HOTQ)->q_q, pageq) {
1311 if (clockpro_getq(pg) != CLOCKPRO_HOTQ) {
1312 printf("hotq corrupt %p\n", pg);
1313 }
1314 #if defined(LISTQ)
1315 if ((pg->pqflags & PQ_HOT) == 0) {
1316 printf("cold page in hotq: %p\n", pg);
1317 }
1318 #endif /* defined(LISTQ) */
1319 COUNT(pg)
1320 hotqlen++;
1321 }
1322 PRINTCOUNT("hotq");
1323
1324 INITCOUNT();
1325 TAILQ_FOREACH(pg, &clockpro_queue(s, CLOCKPRO_LISTQ)->q_q, pageq) {
1326 #if !defined(LISTQ)
1327 printf("listq %p\n");
1328 #endif /* !defined(LISTQ) */
1329 if (clockpro_getq(pg) != CLOCKPRO_LISTQ) {
1330 printf("listq corrupt %p\n", pg);
1331 }
1332 COUNT(pg)
1333 listqlen++;
1334 }
1335 PRINTCOUNT("listq");
1336
1337 printf("newqlen=%d/%d, coldqlen=%d/%d, hotqlen=%d/%d, listqlen=%d/%d\n",
1338 newqlen, pageq_len(clockpro_queue(s, CLOCKPRO_NEWQ)),
1339 coldqlen, pageq_len(clockpro_queue(s, CLOCKPRO_COLDQ)),
1340 hotqlen, pageq_len(clockpro_queue(s, CLOCKPRO_HOTQ)),
1341 listqlen, pageq_len(clockpro_queue(s, CLOCKPRO_LISTQ)));
1342 }
1343
1344 #endif /* defined(DDB) */
1345
1346 #if defined(PDSIM)
1347 static void
1348 pdsim_dumpq(int qidx)
1349 {
1350 struct clockpro_state * const s = &clockpro;
1351 pageq_t *q = clockpro_queue(s, qidx);
1352 struct vm_page *pg;
1353
1354 TAILQ_FOREACH(pg, &q->q_q, pageq) {
1355 DPRINTF(" %" PRIu64 "%s%s%s%s%s%s",
1356 pg->offset >> PAGE_SHIFT,
1357 (pg->pqflags & PQ_HOT) ? "H" : "",
1358 (pg->pqflags & PQ_TEST) ? "T" : "",
1359 (pg->pqflags & PQ_REFERENCED) ? "R" : "",
1360 pmap_is_referenced(pg) ? "r" : "",
1361 (pg->pqflags & PQ_INITIALREF) ? "I" : "",
1362 (pg->pqflags & PQ_SPECULATIVE) ? "S" : ""
1363 );
1364 }
1365 }
1366
1367 void
1368 pdsim_dump(const char *id)
1369 {
1370 #if defined(DEBUG)
1371 struct clockpro_state * const s = &clockpro;
1372
1373 DPRINTF(" %s L(", id);
1374 pdsim_dumpq(CLOCKPRO_LISTQ);
1375 DPRINTF(" ) H(");
1376 pdsim_dumpq(CLOCKPRO_HOTQ);
1377 DPRINTF(" ) C(");
1378 pdsim_dumpq(CLOCKPRO_COLDQ);
1379 DPRINTF(" ) N(");
1380 pdsim_dumpq(CLOCKPRO_NEWQ);
1381 DPRINTF(" ) ncold=%d/%d, coldadj=%d\n",
1382 s->s_ncold, s->s_coldtarget, coldadj);
1383 #endif /* defined(DEBUG) */
1384 }
1385 #endif /* defined(PDSIM) */
1386