uvm_pdpolicy_clockpro.c revision 1.3 1 /* $NetBSD: uvm_pdpolicy_clockpro.c,v 1.3 2006/10/09 12:50:39 yamt Exp $ */
2
3 /*-
4 * Copyright (c)2005, 2006 YAMAMOTO Takashi,
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29 /*
30 * CLOCK-Pro replacement policy:
31 * http://www.cs.wm.edu/hpcs/WWW/HTML/publications/abs05-3.html
32 *
33 * approximation of the list of non-resident pages using hash:
34 * http://linux-mm.org/ClockProApproximation
35 */
36
37 /* #define CLOCKPRO_DEBUG */
38
39 #if defined(PDSIM)
40
41 #include "pdsim.h"
42
43 #else /* defined(PDSIM) */
44
45 #include <sys/cdefs.h>
46 __KERNEL_RCSID(0, "$NetBSD: uvm_pdpolicy_clockpro.c,v 1.3 2006/10/09 12:50:39 yamt Exp $");
47
48 #include "opt_ddb.h"
49
50 #include <sys/param.h>
51 #include <sys/proc.h>
52 #include <sys/systm.h>
53 #include <sys/kernel.h>
54 #include <sys/vnode.h>
55 #include <sys/hash.h>
56
57 #include <uvm/uvm.h>
58 #include <uvm/uvm_pdpolicy.h>
59 #include <uvm/uvm_pdpolicy_impl.h>
60
61 #if ((__STDC_VERSION__ - 0) >= 199901L)
62 #define DPRINTF(...) /* nothing */
63 #define WARN(...) printf(__VA_ARGS__)
64 #else /* ((__STDC_VERSION__ - 0) >= 199901L) */
65 #define DPRINTF(a...) /* nothing */ /* GCC */
66 #define WARN(a...) printf(a)
67 #endif /* ((__STDC_VERSION__ - 0) >= 199901L) */
68
69 #define dump(a) /* nothing */
70
71 #undef USEONCE2
72 #define LISTQ
73 #undef ADAPTIVE
74
75 #endif /* defined(PDSIM) */
76
77 #if !defined(CLOCKPRO_COLDPCT)
78 #define CLOCKPRO_COLDPCT 10
79 #endif /* !defined(CLOCKPRO_COLDPCT) */
80
81 #define CLOCKPRO_COLDPCTMAX 90
82
83 #if !defined(CLOCKPRO_HASHFACTOR)
84 #define CLOCKPRO_HASHFACTOR 2
85 #endif /* !defined(CLOCKPRO_HASHFACTOR) */
86
87 #define CLOCKPRO_NEWQMIN ((1024 * 1024) >> PAGE_SHIFT) /* XXX */
88
89 int clockpro_hashfactor = CLOCKPRO_HASHFACTOR;
90
91 PDPOL_EVCNT_DEFINE(nresrecordobj)
92 PDPOL_EVCNT_DEFINE(nresrecordanon)
93 PDPOL_EVCNT_DEFINE(nreslookup)
94 PDPOL_EVCNT_DEFINE(nresfoundobj)
95 PDPOL_EVCNT_DEFINE(nresfoundanon)
96 PDPOL_EVCNT_DEFINE(nresanonfree)
97 PDPOL_EVCNT_DEFINE(nresconflict)
98 PDPOL_EVCNT_DEFINE(nresoverwritten)
99 PDPOL_EVCNT_DEFINE(nreshandhot)
100
101 PDPOL_EVCNT_DEFINE(hhottakeover)
102 PDPOL_EVCNT_DEFINE(hhotref)
103 PDPOL_EVCNT_DEFINE(hhotunref)
104 PDPOL_EVCNT_DEFINE(hhotcold)
105 PDPOL_EVCNT_DEFINE(hhotcoldtest)
106
107 PDPOL_EVCNT_DEFINE(hcoldtakeover)
108 PDPOL_EVCNT_DEFINE(hcoldref)
109 PDPOL_EVCNT_DEFINE(hcoldunref)
110 PDPOL_EVCNT_DEFINE(hcoldreftest)
111 PDPOL_EVCNT_DEFINE(hcoldunreftest)
112 PDPOL_EVCNT_DEFINE(hcoldunreftestspeculative)
113 PDPOL_EVCNT_DEFINE(hcoldhot)
114
115 PDPOL_EVCNT_DEFINE(speculativeenqueue)
116 PDPOL_EVCNT_DEFINE(speculativehit1)
117 PDPOL_EVCNT_DEFINE(speculativehit2)
118 PDPOL_EVCNT_DEFINE(speculativemiss)
119
120 #define PQ_REFERENCED PQ_PRIVATE1
121 #define PQ_HOT PQ_PRIVATE2
122 #define PQ_TEST PQ_PRIVATE3
123 #define PQ_INITIALREF PQ_PRIVATE4
124 #if PQ_PRIVATE6 != PQ_PRIVATE5 * 2 || PQ_PRIVATE7 != PQ_PRIVATE6 * 2
125 #error PQ_PRIVATE
126 #endif
127 #define PQ_QMASK (PQ_PRIVATE5|PQ_PRIVATE6|PQ_PRIVATE7)
128 #define PQ_QFACTOR PQ_PRIVATE5
129 #define PQ_SPECULATIVE PQ_PRIVATE8
130
131 #define CLOCKPRO_NOQUEUE 0
132 #define CLOCKPRO_NEWQ 1 /* small queue to clear initial ref. */
133 #if defined(LISTQ)
134 #define CLOCKPRO_COLDQ 2
135 #define CLOCKPRO_HOTQ 3
136 #else /* defined(LISTQ) */
137 #define CLOCKPRO_COLDQ (2 + coldqidx) /* XXX */
138 #define CLOCKPRO_HOTQ (3 - coldqidx) /* XXX */
139 #endif /* defined(LISTQ) */
140 #define CLOCKPRO_LISTQ 4
141 #define CLOCKPRO_NQUEUE 4
142
143 static inline void
144 clockpro_setq(struct vm_page *pg, int qidx)
145 {
146 KASSERT(qidx >= CLOCKPRO_NOQUEUE);
147 KASSERT(qidx <= CLOCKPRO_NQUEUE);
148
149 pg->pqflags = (pg->pqflags & ~PQ_QMASK) | (qidx * PQ_QFACTOR);
150 }
151
152 static inline int
153 clockpro_getq(struct vm_page *pg)
154 {
155 int qidx;
156
157 qidx = (pg->pqflags & PQ_QMASK) / PQ_QFACTOR;
158 KASSERT(qidx >= CLOCKPRO_NOQUEUE);
159 KASSERT(qidx <= CLOCKPRO_NQUEUE);
160 return qidx;
161 }
162
163 typedef struct {
164 struct pglist q_q;
165 int q_len;
166 } pageq_t;
167
168 struct clockpro_state {
169 int s_npages;
170 int s_coldtarget;
171 int s_ncold;
172
173 int s_newqlenmax;
174 pageq_t s_q[CLOCKPRO_NQUEUE];
175
176 struct uvm_pctparam s_coldtargetpct;
177 };
178
179 static pageq_t *
180 clockpro_queue(struct clockpro_state *s, int qidx)
181 {
182
183 KASSERT(CLOCKPRO_NOQUEUE < qidx);
184 KASSERT(qidx <= CLOCKPRO_NQUEUE);
185
186 return &s->s_q[qidx - 1];
187 }
188
189 #if !defined(LISTQ)
190
191 static int coldqidx;
192
193 static void
194 clockpro_switchqueue(void)
195 {
196
197 coldqidx = 1 - coldqidx;
198 }
199
200 #endif /* !defined(LISTQ) */
201
202 static struct clockpro_state clockpro;
203 static struct clockpro_scanstate {
204 int ss_nscanned;
205 } scanstate;
206
207 /* ---------------------------------------- */
208
209 static void
210 pageq_init(pageq_t *q)
211 {
212
213 TAILQ_INIT(&q->q_q);
214 q->q_len = 0;
215 }
216
217 static int
218 pageq_len(const pageq_t *q)
219 {
220
221 return q->q_len;
222 }
223
224 static struct vm_page *
225 pageq_first(const pageq_t *q)
226 {
227
228 return TAILQ_FIRST(&q->q_q);
229 }
230
231 static void
232 pageq_insert_tail(pageq_t *q, struct vm_page *pg)
233 {
234
235 TAILQ_INSERT_TAIL(&q->q_q, pg, pageq);
236 q->q_len++;
237 }
238
239 static void
240 pageq_insert_head(pageq_t *q, struct vm_page *pg)
241 {
242
243 TAILQ_INSERT_HEAD(&q->q_q, pg, pageq);
244 q->q_len++;
245 }
246
247 static void
248 pageq_remove(pageq_t *q, struct vm_page *pg)
249 {
250
251 #if 1
252 KASSERT(clockpro_queue(&clockpro, clockpro_getq(pg)) == q);
253 #endif
254 KASSERT(q->q_len > 0);
255 TAILQ_REMOVE(&q->q_q, pg, pageq);
256 q->q_len--;
257 }
258
259 static struct vm_page *
260 pageq_remove_head(pageq_t *q)
261 {
262 struct vm_page *pg;
263
264 pg = TAILQ_FIRST(&q->q_q);
265 if (pg == NULL) {
266 KASSERT(q->q_len == 0);
267 return NULL;
268 }
269 pageq_remove(q, pg);
270 return pg;
271 }
272
273 /* ---------------------------------------- */
274
275 static void
276 clockpro_insert_tail(struct clockpro_state *s, int qidx, struct vm_page *pg)
277 {
278 pageq_t *q = clockpro_queue(s, qidx);
279
280 clockpro_setq(pg, qidx);
281 pageq_insert_tail(q, pg);
282 }
283
284 static void __unused
285 clockpro_insert_head(struct clockpro_state *s, int qidx, struct vm_page *pg)
286 {
287 pageq_t *q = clockpro_queue(s, qidx);
288
289 clockpro_setq(pg, qidx);
290 pageq_insert_head(q, pg);
291 }
292
293 /* ---------------------------------------- */
294
295 typedef uint32_t nonres_cookie_t;
296 #define NONRES_COOKIE_INVAL 0
297
298 typedef uintptr_t objid_t;
299
300 /*
301 * XXX maybe these hash functions need reconsideration,
302 * given that hash distribution is critical here.
303 */
304
305 static uint32_t
306 pageidentityhash1(objid_t obj, off_t idx)
307 {
308 uint32_t hash = HASH32_BUF_INIT;
309
310 #if 1
311 hash = hash32_buf(&idx, sizeof(idx), hash);
312 hash = hash32_buf(&obj, sizeof(obj), hash);
313 #else
314 hash = hash32_buf(&obj, sizeof(obj), hash);
315 hash = hash32_buf(&idx, sizeof(idx), hash);
316 #endif
317 return hash;
318 }
319
320 static uint32_t
321 pageidentityhash2(objid_t obj, off_t idx)
322 {
323 uint32_t hash = HASH32_BUF_INIT;
324
325 hash = hash32_buf(&obj, sizeof(obj), hash);
326 hash = hash32_buf(&idx, sizeof(idx), hash);
327 return hash;
328 }
329
330 static nonres_cookie_t
331 calccookie(objid_t obj, off_t idx)
332 {
333 uint32_t hash = pageidentityhash2(obj, idx);
334 nonres_cookie_t cookie = hash;
335
336 if (__predict_false(cookie == NONRES_COOKIE_INVAL)) {
337 cookie++; /* XXX */
338 }
339 return cookie;
340 }
341
342 #define BUCKETSIZE 14
343 struct bucket {
344 int cycle;
345 int cur;
346 nonres_cookie_t pages[BUCKETSIZE];
347 };
348 static int cycle_target;
349 static int cycle_target_frac;
350
351 static struct bucket static_bucket;
352 static struct bucket *buckets = &static_bucket;
353 static size_t hashsize = 1;
354
355 static int coldadj;
356 #define COLDTARGET_ADJ(d) coldadj += (d)
357
358 #if defined(PDSIM)
359
360 static void *
361 clockpro_hashalloc(int n)
362 {
363 size_t allocsz = sizeof(*buckets) * n;
364
365 return malloc(allocsz);
366 }
367
368 static void
369 clockpro_hashfree(void *p, int n)
370 {
371
372 free(p);
373 }
374
375 #else /* defined(PDSIM) */
376
377 static void *
378 clockpro_hashalloc(int n)
379 {
380 size_t allocsz = round_page(sizeof(*buckets) * n);
381
382 return (void *)uvm_km_alloc(kernel_map, allocsz, 0, UVM_KMF_WIRED);
383 }
384
385 static void
386 clockpro_hashfree(void *p, int n)
387 {
388 size_t allocsz = round_page(sizeof(*buckets) * n);
389
390 uvm_km_free(kernel_map, (vaddr_t)p, allocsz, UVM_KMF_WIRED);
391 }
392
393 #endif /* defined(PDSIM) */
394
395 static void
396 clockpro_hashinit(uint64_t n)
397 {
398 struct bucket *newbuckets;
399 struct bucket *oldbuckets;
400 size_t sz;
401 size_t oldsz;
402 int i;
403
404 sz = howmany(n, BUCKETSIZE);
405 sz *= clockpro_hashfactor;
406 newbuckets = clockpro_hashalloc(sz);
407 if (newbuckets == NULL) {
408 panic("%s: allocation failure", __func__);
409 }
410 for (i = 0; i < sz; i++) {
411 struct bucket *b = &newbuckets[i];
412 int j;
413
414 b->cycle = cycle_target;
415 b->cur = 0;
416 for (j = 0; j < BUCKETSIZE; j++) {
417 b->pages[j] = NONRES_COOKIE_INVAL;
418 }
419 }
420 /* XXX lock */
421 oldbuckets = buckets;
422 oldsz = hashsize;
423 buckets = newbuckets;
424 hashsize = sz;
425 /* XXX unlock */
426 if (oldbuckets != &static_bucket) {
427 clockpro_hashfree(oldbuckets, oldsz);
428 }
429 }
430
431 static struct bucket *
432 nonresident_getbucket(objid_t obj, off_t idx)
433 {
434 uint32_t hash;
435
436 hash = pageidentityhash1(obj, idx);
437 return &buckets[hash % hashsize];
438 }
439
440 static void
441 nonresident_rotate(struct bucket *b)
442 {
443
444 while (b->cycle - cycle_target < 0) {
445 if (b->pages[b->cur] != NONRES_COOKIE_INVAL) {
446 PDPOL_EVCNT_INCR(nreshandhot);
447 COLDTARGET_ADJ(-1);
448 }
449 b->pages[b->cur] = NONRES_COOKIE_INVAL;
450 b->cur = (b->cur + 1) % BUCKETSIZE;
451 b->cycle++;
452 }
453 }
454
455 static boolean_t
456 nonresident_lookupremove(objid_t obj, off_t idx)
457 {
458 struct bucket *b = nonresident_getbucket(obj, idx);
459 nonres_cookie_t cookie = calccookie(obj, idx);
460 int i;
461
462 nonresident_rotate(b);
463 for (i = 0; i < BUCKETSIZE; i++) {
464 if (b->pages[i] == cookie) {
465 b->pages[i] = NONRES_COOKIE_INVAL;
466 return TRUE;
467 }
468 }
469 return FALSE;
470 }
471
472 static objid_t
473 pageobj(struct vm_page *pg)
474 {
475 const void *obj;
476
477 /*
478 * XXX object pointer is often freed and reused for unrelated object.
479 * for vnodes, it would be better to use something like
480 * a hash of fsid/fileid/generation.
481 */
482
483 obj = pg->uobject;
484 if (obj == NULL) {
485 obj = pg->uanon;
486 KASSERT(obj != NULL);
487 KASSERT(pg->offset == 0);
488 }
489
490 return (objid_t)obj;
491 }
492
493 static off_t
494 pageidx(struct vm_page *pg)
495 {
496
497 KASSERT((pg->offset & PAGE_MASK) == 0);
498 return pg->offset >> PAGE_SHIFT;
499 }
500
501 static boolean_t
502 nonresident_pagelookupremove(struct vm_page *pg)
503 {
504 boolean_t found = nonresident_lookupremove(pageobj(pg), pageidx(pg));
505
506 PDPOL_EVCNT_INCR(nreslookup);
507 if (found) {
508 if (pg->uobject) {
509 PDPOL_EVCNT_INCR(nresfoundobj);
510 } else {
511 PDPOL_EVCNT_INCR(nresfoundanon);
512 }
513 }
514 return found;
515 }
516
517 static void
518 nonresident_pagerecord(struct vm_page *pg)
519 {
520 objid_t obj = pageobj(pg);
521 off_t idx = pageidx(pg);
522 struct bucket *b = nonresident_getbucket(obj, idx);
523 nonres_cookie_t cookie = calccookie(obj, idx);
524
525 #if defined(DEBUG)
526 int i;
527
528 for (i = 0; i < BUCKETSIZE; i++) {
529 if (b->pages[i] == cookie) {
530 PDPOL_EVCNT_INCR(nresconflict);
531 }
532 }
533 #endif /* defined(DEBUG) */
534
535 if (pg->uobject) {
536 PDPOL_EVCNT_INCR(nresrecordobj);
537 } else {
538 PDPOL_EVCNT_INCR(nresrecordanon);
539 }
540 nonresident_rotate(b);
541 if (b->pages[b->cur] != NONRES_COOKIE_INVAL) {
542 PDPOL_EVCNT_INCR(nresoverwritten);
543 COLDTARGET_ADJ(-1);
544 }
545 b->pages[b->cur] = cookie;
546 b->cur = (b->cur + 1) % BUCKETSIZE;
547 }
548
549 /* ---------------------------------------- */
550
551 #if defined(CLOCKPRO_DEBUG)
552 static void
553 check_sanity(void)
554 {
555 }
556 #else /* defined(CLOCKPRO_DEBUG) */
557 #define check_sanity() /* nothing */
558 #endif /* defined(CLOCKPRO_DEBUG) */
559
560 static void
561 clockpro_reinit(void)
562 {
563
564 clockpro_hashinit(uvmexp.npages);
565 }
566
567 static void
568 clockpro_init(void)
569 {
570 struct clockpro_state *s = &clockpro;
571 int i;
572
573 for (i = 0; i < CLOCKPRO_NQUEUE; i++) {
574 pageq_init(&s->s_q[i]);
575 }
576 s->s_newqlenmax = 1;
577 s->s_coldtarget = 1;
578 uvm_pctparam_init(&s->s_coldtargetpct, CLOCKPRO_COLDPCT, NULL);
579 }
580
581 static void
582 clockpro_tune(void)
583 {
584 struct clockpro_state *s = &clockpro;
585 int coldtarget;
586
587 #if defined(ADAPTIVE)
588 int coldmax = s->s_npages * CLOCKPRO_COLDPCTMAX / 100;
589 int coldmin = 1;
590
591 coldtarget = s->s_coldtarget;
592 if (coldtarget + coldadj < coldmin) {
593 coldadj = coldmin - coldtarget;
594 } else if (coldtarget + coldadj > coldmax) {
595 coldadj = coldmax - coldtarget;
596 }
597 coldtarget += coldadj;
598 #else /* defined(ADAPTIVE) */
599 coldtarget = UVM_PCTPARAM_APPLY(&s->s_coldtargetpct, s->s_npages);
600 if (coldtarget < 1) {
601 coldtarget = 1;
602 }
603 #endif /* defined(ADAPTIVE) */
604
605 s->s_coldtarget = coldtarget;
606 s->s_newqlenmax = coldtarget / 4;
607 if (s->s_newqlenmax < CLOCKPRO_NEWQMIN) {
608 s->s_newqlenmax = CLOCKPRO_NEWQMIN;
609 }
610 }
611
612 static void
613 clockpro_movereferencebit(struct vm_page *pg)
614 {
615 boolean_t referenced;
616
617 referenced = pmap_clear_reference(pg);
618 if (referenced) {
619 pg->pqflags |= PQ_REFERENCED;
620 }
621 }
622
623 static void
624 clockpro_clearreferencebit(struct vm_page *pg)
625 {
626
627 clockpro_movereferencebit(pg);
628 pg->pqflags &= ~PQ_REFERENCED;
629 }
630
631 static void
632 clockpro___newqrotate(int len)
633 {
634 struct clockpro_state * const s = &clockpro;
635 pageq_t * const newq = clockpro_queue(s, CLOCKPRO_NEWQ);
636 struct vm_page *pg;
637
638 while (pageq_len(newq) > len) {
639 pg = pageq_remove_head(newq);
640 KASSERT(pg != NULL);
641 KASSERT(clockpro_getq(pg) == CLOCKPRO_NEWQ);
642 if ((pg->pqflags & PQ_INITIALREF) != 0) {
643 clockpro_clearreferencebit(pg);
644 pg->pqflags &= ~PQ_INITIALREF;
645 }
646 /* place at the list head */
647 clockpro_insert_tail(s, CLOCKPRO_COLDQ, pg);
648 }
649 }
650
651 static void
652 clockpro_newqrotate(void)
653 {
654 struct clockpro_state * const s = &clockpro;
655
656 check_sanity();
657 clockpro___newqrotate(s->s_newqlenmax);
658 check_sanity();
659 }
660
661 static void
662 clockpro_newqflush(int n)
663 {
664
665 check_sanity();
666 clockpro___newqrotate(n);
667 check_sanity();
668 }
669
670 static void
671 clockpro_newqflushone(void)
672 {
673 struct clockpro_state * const s = &clockpro;
674
675 clockpro_newqflush(
676 MAX(pageq_len(clockpro_queue(s, CLOCKPRO_NEWQ)) - 1, 0));
677 }
678
679 /*
680 * our "tail" is called "list-head" in the paper.
681 */
682
683 static void
684 clockpro___enqueuetail(struct vm_page *pg)
685 {
686 struct clockpro_state * const s = &clockpro;
687
688 KASSERT(clockpro_getq(pg) == CLOCKPRO_NOQUEUE);
689
690 check_sanity();
691 #if !defined(USEONCE2)
692 clockpro_insert_tail(s, CLOCKPRO_NEWQ, pg);
693 clockpro_newqrotate();
694 #else /* !defined(USEONCE2) */
695 #if defined(LISTQ)
696 KASSERT((pg->pqflags & PQ_REFERENCED) == 0);
697 #endif /* defined(LISTQ) */
698 clockpro_insert_tail(s, CLOCKPRO_COLDQ, pg);
699 #endif /* !defined(USEONCE2) */
700 check_sanity();
701 }
702
703 static void
704 clockpro_pageenqueue(struct vm_page *pg)
705 {
706 struct clockpro_state * const s = &clockpro;
707 boolean_t hot;
708 boolean_t speculative = (pg->pqflags & PQ_SPECULATIVE) != 0; /* XXX */
709
710 KASSERT((~pg->pqflags & (PQ_INITIALREF|PQ_SPECULATIVE)) != 0);
711 UVM_LOCK_ASSERT_PAGEQ();
712 check_sanity();
713 KASSERT(clockpro_getq(pg) == CLOCKPRO_NOQUEUE);
714 s->s_npages++;
715 pg->pqflags &= ~(PQ_HOT|PQ_TEST);
716 if (speculative) {
717 hot = FALSE;
718 PDPOL_EVCNT_INCR(speculativeenqueue);
719 } else {
720 hot = nonresident_pagelookupremove(pg);
721 if (hot) {
722 COLDTARGET_ADJ(1);
723 }
724 }
725
726 /*
727 * consider mmap'ed file:
728 *
729 * - read-ahead enqueues a page.
730 *
731 * - on the following read-ahead hit, the fault handler activates it.
732 *
733 * - finally, the userland code which caused the above fault
734 * actually accesses the page. it makes its reference bit set.
735 *
736 * we want to count the above as a single access, rather than
737 * three accesses with short reuse distances.
738 */
739
740 #if defined(USEONCE2)
741 pg->pqflags &= ~PQ_INITIALREF;
742 if (hot) {
743 pg->pqflags |= PQ_TEST;
744 }
745 s->s_ncold++;
746 clockpro_clearreferencebit(pg);
747 clockpro___enqueuetail(pg);
748 #else /* defined(USEONCE2) */
749 if (speculative) {
750 s->s_ncold++;
751 } else if (hot) {
752 pg->pqflags |= PQ_HOT;
753 } else {
754 pg->pqflags |= PQ_TEST;
755 s->s_ncold++;
756 }
757 clockpro___enqueuetail(pg);
758 #endif /* defined(USEONCE2) */
759 KASSERT(s->s_ncold <= s->s_npages);
760 }
761
762 static pageq_t *
763 clockpro_pagequeue(struct vm_page *pg)
764 {
765 struct clockpro_state * const s = &clockpro;
766 int qidx;
767
768 qidx = clockpro_getq(pg);
769 KASSERT(qidx != CLOCKPRO_NOQUEUE);
770
771 return clockpro_queue(s, qidx);
772 }
773
774 static void
775 clockpro_pagedequeue(struct vm_page *pg)
776 {
777 struct clockpro_state * const s = &clockpro;
778 pageq_t *q;
779
780 KASSERT(s->s_npages > 0);
781 check_sanity();
782 q = clockpro_pagequeue(pg);
783 pageq_remove(q, pg);
784 check_sanity();
785 clockpro_setq(pg, CLOCKPRO_NOQUEUE);
786 if ((pg->pqflags & PQ_HOT) == 0) {
787 KASSERT(s->s_ncold > 0);
788 s->s_ncold--;
789 }
790 KASSERT(s->s_npages > 0);
791 s->s_npages--;
792 check_sanity();
793 }
794
795 static void
796 clockpro_pagerequeue(struct vm_page *pg)
797 {
798 struct clockpro_state * const s = &clockpro;
799 int qidx;
800
801 qidx = clockpro_getq(pg);
802 KASSERT(qidx == CLOCKPRO_HOTQ || qidx == CLOCKPRO_COLDQ);
803 pageq_remove(clockpro_queue(s, qidx), pg);
804 check_sanity();
805 clockpro_setq(pg, CLOCKPRO_NOQUEUE);
806
807 clockpro___enqueuetail(pg);
808 }
809
810 static void
811 handhot_endtest(struct vm_page *pg)
812 {
813
814 KASSERT((pg->pqflags & PQ_HOT) == 0);
815 if ((pg->pqflags & PQ_TEST) != 0) {
816 PDPOL_EVCNT_INCR(hhotcoldtest);
817 COLDTARGET_ADJ(-1);
818 pg->pqflags &= ~PQ_TEST;
819 } else {
820 PDPOL_EVCNT_INCR(hhotcold);
821 }
822 }
823
824 static void
825 handhot_advance(void)
826 {
827 struct clockpro_state * const s = &clockpro;
828 struct vm_page *pg;
829 pageq_t *hotq;
830 int hotqlen;
831
832 clockpro_tune();
833
834 dump("hot called");
835 if (s->s_ncold >= s->s_coldtarget) {
836 return;
837 }
838 hotq = clockpro_queue(s, CLOCKPRO_HOTQ);
839 again:
840 pg = pageq_first(hotq);
841 if (pg == NULL) {
842 DPRINTF("%s: HHOT TAKEOVER\n", __func__);
843 dump("hhottakeover");
844 PDPOL_EVCNT_INCR(hhottakeover);
845 #if defined(LISTQ)
846 while (/* CONSTCOND */ 1) {
847 pageq_t *coldq = clockpro_queue(s, CLOCKPRO_COLDQ);
848
849 pg = pageq_first(coldq);
850 if (pg == NULL) {
851 clockpro_newqflushone();
852 pg = pageq_first(coldq);
853 if (pg == NULL) {
854 WARN("hhot: no page?\n");
855 return;
856 }
857 }
858 KASSERT(clockpro_pagequeue(pg) == coldq);
859 pageq_remove(coldq, pg);
860 check_sanity();
861 if ((pg->pqflags & PQ_HOT) == 0) {
862 handhot_endtest(pg);
863 clockpro_insert_tail(s, CLOCKPRO_LISTQ, pg);
864 } else {
865 clockpro_insert_head(s, CLOCKPRO_HOTQ, pg);
866 break;
867 }
868 }
869 #else /* defined(LISTQ) */
870 clockpro_newqflush(0); /* XXX XXX */
871 clockpro_switchqueue();
872 hotq = clockpro_queue(s, CLOCKPRO_HOTQ);
873 goto again;
874 #endif /* defined(LISTQ) */
875 }
876
877 KASSERT(clockpro_pagequeue(pg) == hotq);
878
879 /*
880 * terminate test period of nonresident pages by cycling them.
881 */
882
883 cycle_target_frac += BUCKETSIZE;
884 hotqlen = pageq_len(hotq);
885 while (cycle_target_frac >= hotqlen) {
886 cycle_target++;
887 cycle_target_frac -= hotqlen;
888 }
889
890 if ((pg->pqflags & PQ_HOT) == 0) {
891 #if defined(LISTQ)
892 panic("cold page in hotq: %p", pg);
893 #else /* defined(LISTQ) */
894 handhot_endtest(pg);
895 goto next;
896 #endif /* defined(LISTQ) */
897 }
898 KASSERT((pg->pqflags & PQ_TEST) == 0);
899 KASSERT((pg->pqflags & PQ_INITIALREF) == 0);
900 KASSERT((pg->pqflags & PQ_SPECULATIVE) == 0);
901
902 /*
903 * once we met our target,
904 * stop at a hot page so that no cold pages in test period
905 * have larger recency than any hot pages.
906 */
907
908 if (s->s_ncold >= s->s_coldtarget) {
909 dump("hot done");
910 return;
911 }
912 clockpro_movereferencebit(pg);
913 if ((pg->pqflags & PQ_REFERENCED) == 0) {
914 PDPOL_EVCNT_INCR(hhotunref);
915 uvmexp.pddeact++;
916 pg->pqflags &= ~PQ_HOT;
917 clockpro.s_ncold++;
918 KASSERT(s->s_ncold <= s->s_npages);
919 } else {
920 PDPOL_EVCNT_INCR(hhotref);
921 }
922 pg->pqflags &= ~PQ_REFERENCED;
923 #if !defined(LISTQ)
924 next:
925 #endif /* !defined(LISTQ) */
926 clockpro_pagerequeue(pg);
927 dump("hot");
928 goto again;
929 }
930
931 static struct vm_page *
932 handcold_advance(void)
933 {
934 struct clockpro_state * const s = &clockpro;
935 struct vm_page *pg;
936
937 for (;;) {
938 #if defined(LISTQ)
939 pageq_t *listq = clockpro_queue(s, CLOCKPRO_LISTQ);
940 #endif /* defined(LISTQ) */
941 pageq_t *coldq;
942
943 clockpro_newqrotate();
944 handhot_advance();
945 #if defined(LISTQ)
946 pg = pageq_first(listq);
947 if (pg != NULL) {
948 KASSERT(clockpro_getq(pg) == CLOCKPRO_LISTQ);
949 KASSERT((pg->pqflags & PQ_TEST) == 0);
950 KASSERT((pg->pqflags & PQ_HOT) == 0);
951 KASSERT((pg->pqflags & PQ_INITIALREF) == 0);
952 pageq_remove(listq, pg);
953 check_sanity();
954 clockpro_insert_head(s, CLOCKPRO_COLDQ, pg); /* XXX */
955 goto gotcold;
956 }
957 #endif /* defined(LISTQ) */
958 check_sanity();
959 coldq = clockpro_queue(s, CLOCKPRO_COLDQ);
960 pg = pageq_first(coldq);
961 if (pg == NULL) {
962 clockpro_newqflushone();
963 pg = pageq_first(coldq);
964 }
965 if (pg == NULL) {
966 DPRINTF("%s: HCOLD TAKEOVER\n", __func__);
967 dump("hcoldtakeover");
968 PDPOL_EVCNT_INCR(hcoldtakeover);
969 KASSERT(
970 pageq_len(clockpro_queue(s, CLOCKPRO_NEWQ)) == 0);
971 #if defined(LISTQ)
972 KASSERT(
973 pageq_len(clockpro_queue(s, CLOCKPRO_HOTQ)) == 0);
974 #else /* defined(LISTQ) */
975 clockpro_switchqueue();
976 coldq = clockpro_queue(s, CLOCKPRO_COLDQ);
977 pg = pageq_first(coldq);
978 #endif /* defined(LISTQ) */
979 }
980 if (pg == NULL) {
981 WARN("hcold: no page?\n");
982 return NULL;
983 }
984 KASSERT((pg->pqflags & PQ_INITIALREF) == 0);
985 if ((pg->pqflags & PQ_HOT) != 0) {
986 PDPOL_EVCNT_INCR(hcoldhot);
987 pageq_remove(coldq, pg);
988 clockpro_insert_tail(s, CLOCKPRO_HOTQ, pg);
989 check_sanity();
990 KASSERT((pg->pqflags & PQ_TEST) == 0);
991 uvmexp.pdscans++;
992 continue;
993 }
994 #if defined(LISTQ)
995 gotcold:
996 #endif /* defined(LISTQ) */
997 KASSERT((pg->pqflags & PQ_HOT) == 0);
998 uvmexp.pdscans++;
999 clockpro_movereferencebit(pg);
1000 if ((pg->pqflags & PQ_SPECULATIVE) != 0) {
1001 KASSERT((pg->pqflags & PQ_TEST) == 0);
1002 if ((pg->pqflags & PQ_REFERENCED) != 0) {
1003 PDPOL_EVCNT_INCR(speculativehit2);
1004 pg->pqflags &= ~(PQ_SPECULATIVE|PQ_REFERENCED);
1005 clockpro_pagedequeue(pg);
1006 clockpro_pageenqueue(pg);
1007 continue;
1008 }
1009 PDPOL_EVCNT_INCR(speculativemiss);
1010 }
1011 switch (pg->pqflags & (PQ_REFERENCED|PQ_TEST)) {
1012 case PQ_TEST:
1013 PDPOL_EVCNT_INCR(hcoldunreftest);
1014 nonresident_pagerecord(pg);
1015 goto gotit;
1016 case 0:
1017 PDPOL_EVCNT_INCR(hcoldunref);
1018 gotit:
1019 KASSERT(s->s_ncold > 0);
1020 clockpro_pagerequeue(pg); /* XXX */
1021 dump("cold done");
1022 /* XXX "pg" is still in queue */
1023 handhot_advance();
1024 goto done;
1025
1026 case PQ_REFERENCED|PQ_TEST:
1027 PDPOL_EVCNT_INCR(hcoldreftest);
1028 s->s_ncold--;
1029 COLDTARGET_ADJ(1);
1030 pg->pqflags |= PQ_HOT;
1031 pg->pqflags &= ~PQ_TEST;
1032 break;
1033
1034 case PQ_REFERENCED:
1035 PDPOL_EVCNT_INCR(hcoldref);
1036 pg->pqflags |= PQ_TEST;
1037 break;
1038 }
1039 pg->pqflags &= ~PQ_REFERENCED;
1040 uvmexp.pdreact++;
1041 /* move to the list head */
1042 clockpro_pagerequeue(pg);
1043 dump("cold");
1044 }
1045 done:;
1046 return pg;
1047 }
1048
1049 void
1050 uvmpdpol_pageactivate(struct vm_page *pg)
1051 {
1052
1053 if (!uvmpdpol_pageisqueued_p(pg)) {
1054 KASSERT((pg->pqflags & PQ_SPECULATIVE) == 0);
1055 pg->pqflags |= PQ_INITIALREF;
1056 clockpro_pageenqueue(pg);
1057 } else if ((pg->pqflags & PQ_SPECULATIVE)) {
1058 PDPOL_EVCNT_INCR(speculativehit1);
1059 pg->pqflags &= ~PQ_SPECULATIVE;
1060 pg->pqflags |= PQ_INITIALREF;
1061 clockpro_pagedequeue(pg);
1062 clockpro_pageenqueue(pg);
1063 }
1064 pg->pqflags |= PQ_REFERENCED;
1065 }
1066
1067 void
1068 uvmpdpol_pagedeactivate(struct vm_page *pg)
1069 {
1070
1071 pg->pqflags &= ~PQ_REFERENCED;
1072 }
1073
1074 void
1075 uvmpdpol_pagedequeue(struct vm_page *pg)
1076 {
1077
1078 if (!uvmpdpol_pageisqueued_p(pg)) {
1079 return;
1080 }
1081 clockpro_pagedequeue(pg);
1082 pg->pqflags &= ~PQ_SPECULATIVE;
1083 }
1084
1085 void
1086 uvmpdpol_pageenqueue(struct vm_page *pg)
1087 {
1088
1089 #if 1
1090 if (uvmpdpol_pageisqueued_p(pg)) {
1091 return;
1092 }
1093 clockpro_clearreferencebit(pg);
1094 pg->pqflags |= PQ_SPECULATIVE;
1095 clockpro_pageenqueue(pg);
1096 #else
1097 uvmpdpol_pageactivate(pg);
1098 #endif
1099 }
1100
1101 void
1102 uvmpdpol_anfree(struct vm_anon *an)
1103 {
1104
1105 KASSERT(an->an_page == NULL);
1106 if (nonresident_lookupremove((objid_t)an, 0)) {
1107 PDPOL_EVCNT_INCR(nresanonfree);
1108 }
1109 }
1110
1111 void
1112 uvmpdpol_init(void)
1113 {
1114
1115 clockpro_init();
1116 }
1117
1118 void
1119 uvmpdpol_reinit(void)
1120 {
1121
1122 clockpro_reinit();
1123 }
1124
1125 void
1126 uvmpdpol_estimatepageable(int *active, int *inactive)
1127 {
1128 struct clockpro_state * const s = &clockpro;
1129
1130 if (active) {
1131 *active = s->s_npages - s->s_ncold;
1132 }
1133 if (inactive) {
1134 *inactive = s->s_ncold;
1135 }
1136 }
1137
1138 boolean_t
1139 uvmpdpol_pageisqueued_p(struct vm_page *pg)
1140 {
1141
1142 return clockpro_getq(pg) != CLOCKPRO_NOQUEUE;
1143 }
1144
1145 void
1146 uvmpdpol_scaninit(void)
1147 {
1148 struct clockpro_scanstate * const ss = &scanstate;
1149
1150 ss->ss_nscanned = 0;
1151 }
1152
1153 struct vm_page *
1154 uvmpdpol_selectvictim(void)
1155 {
1156 struct clockpro_state * const s = &clockpro;
1157 struct clockpro_scanstate * const ss = &scanstate;
1158 struct vm_page *pg;
1159
1160 if (ss->ss_nscanned > s->s_npages) {
1161 DPRINTF("scan too much\n");
1162 return NULL;
1163 }
1164 pg = handcold_advance();
1165 ss->ss_nscanned++;
1166 return pg;
1167 }
1168
1169 static void
1170 clockpro_dropswap(pageq_t *q, int *todo)
1171 {
1172 struct vm_page *pg;
1173
1174 TAILQ_FOREACH_REVERSE(pg, &q->q_q, pglist, pageq) {
1175 if (*todo <= 0) {
1176 break;
1177 }
1178 if ((pg->pqflags & PQ_HOT) == 0) {
1179 continue;
1180 }
1181 if ((pg->pqflags & PQ_SWAPBACKED) == 0) {
1182 continue;
1183 }
1184 if (uvmpd_trydropswap(pg)) {
1185 (*todo)--;
1186 }
1187 }
1188 }
1189
1190 void
1191 uvmpdpol_balancequeue(int swap_shortage)
1192 {
1193 struct clockpro_state * const s = &clockpro;
1194 int todo = swap_shortage;
1195
1196 if (todo == 0) {
1197 return;
1198 }
1199
1200 /*
1201 * reclaim swap slots from hot pages
1202 */
1203
1204 DPRINTF("%s: swap_shortage=%d\n", __func__, swap_shortage);
1205
1206 clockpro_dropswap(clockpro_queue(s, CLOCKPRO_NEWQ), &todo);
1207 clockpro_dropswap(clockpro_queue(s, CLOCKPRO_COLDQ), &todo);
1208 clockpro_dropswap(clockpro_queue(s, CLOCKPRO_HOTQ), &todo);
1209
1210 DPRINTF("%s: done=%d\n", __func__, swap_shortage - todo);
1211 }
1212
1213 boolean_t
1214 uvmpdpol_needsscan_p(void)
1215 {
1216 struct clockpro_state * const s = &clockpro;
1217
1218 if (s->s_ncold < s->s_coldtarget) {
1219 return TRUE;
1220 }
1221 return FALSE;
1222 }
1223
1224 void
1225 uvmpdpol_tune(void)
1226 {
1227
1228 clockpro_tune();
1229 }
1230
1231 #if !defined(PDSIM)
1232
1233 #include <sys/sysctl.h> /* XXX SYSCTL_DESCR */
1234
1235 void
1236 uvmpdpol_sysctlsetup(void)
1237 {
1238 #if !defined(ADAPTIVE)
1239 struct clockpro_state * const s = &clockpro;
1240
1241 uvm_pctparam_createsysctlnode(&s->s_coldtargetpct, "coldtargetpct",
1242 SYSCTL_DESCR("Percentage cold target queue of the entire queue"));
1243 #endif /* !defined(ADAPTIVE) */
1244 }
1245
1246 #endif /* !defined(PDSIM) */
1247
1248 #if defined(DDB)
1249
1250 void clockpro_dump(void);
1251
1252 void
1253 clockpro_dump(void)
1254 {
1255 struct clockpro_state * const s = &clockpro;
1256
1257 struct vm_page *pg;
1258 int ncold, nhot, ntest, nspeculative, ninitialref, nref;
1259 int newqlen, coldqlen, hotqlen, listqlen;
1260
1261 newqlen = coldqlen = hotqlen = listqlen = 0;
1262 printf("npages=%d, ncold=%d, coldtarget=%d, newqlenmax=%d\n",
1263 s->s_npages, s->s_ncold, s->s_coldtarget, s->s_newqlenmax);
1264
1265 #define INITCOUNT() \
1266 ncold = nhot = ntest = nspeculative = ninitialref = nref = 0
1267
1268 #define COUNT(pg) \
1269 if ((pg->pqflags & PQ_HOT) != 0) { \
1270 nhot++; \
1271 } else { \
1272 ncold++; \
1273 if ((pg->pqflags & PQ_TEST) != 0) { \
1274 ntest++; \
1275 } \
1276 if ((pg->pqflags & PQ_SPECULATIVE) != 0) { \
1277 nspeculative++; \
1278 } \
1279 if ((pg->pqflags & PQ_INITIALREF) != 0) { \
1280 ninitialref++; \
1281 } else if ((pg->pqflags & PQ_REFERENCED) != 0 || \
1282 pmap_is_referenced(pg)) { \
1283 nref++; \
1284 } \
1285 }
1286
1287 #define PRINTCOUNT(name) \
1288 printf("%s hot=%d, cold=%d, test=%d, speculative=%d, initialref=%d, " \
1289 "nref=%d\n", \
1290 (name), nhot, ncold, ntest, nspeculative, ninitialref, nref)
1291
1292 INITCOUNT();
1293 TAILQ_FOREACH(pg, &clockpro_queue(s, CLOCKPRO_NEWQ)->q_q, pageq) {
1294 if (clockpro_getq(pg) != CLOCKPRO_NEWQ) {
1295 printf("newq corrupt %p\n", pg);
1296 }
1297 COUNT(pg)
1298 newqlen++;
1299 }
1300 PRINTCOUNT("newq");
1301
1302 INITCOUNT();
1303 TAILQ_FOREACH(pg, &clockpro_queue(s, CLOCKPRO_COLDQ)->q_q, pageq) {
1304 if (clockpro_getq(pg) != CLOCKPRO_COLDQ) {
1305 printf("coldq corrupt %p\n", pg);
1306 }
1307 COUNT(pg)
1308 coldqlen++;
1309 }
1310 PRINTCOUNT("coldq");
1311
1312 INITCOUNT();
1313 TAILQ_FOREACH(pg, &clockpro_queue(s, CLOCKPRO_HOTQ)->q_q, pageq) {
1314 if (clockpro_getq(pg) != CLOCKPRO_HOTQ) {
1315 printf("hotq corrupt %p\n", pg);
1316 }
1317 #if defined(LISTQ)
1318 if ((pg->pqflags & PQ_HOT) == 0) {
1319 printf("cold page in hotq: %p\n", pg);
1320 }
1321 #endif /* defined(LISTQ) */
1322 COUNT(pg)
1323 hotqlen++;
1324 }
1325 PRINTCOUNT("hotq");
1326
1327 INITCOUNT();
1328 TAILQ_FOREACH(pg, &clockpro_queue(s, CLOCKPRO_LISTQ)->q_q, pageq) {
1329 #if !defined(LISTQ)
1330 printf("listq %p\n");
1331 #endif /* !defined(LISTQ) */
1332 if (clockpro_getq(pg) != CLOCKPRO_LISTQ) {
1333 printf("listq corrupt %p\n", pg);
1334 }
1335 COUNT(pg)
1336 listqlen++;
1337 }
1338 PRINTCOUNT("listq");
1339
1340 printf("newqlen=%d/%d, coldqlen=%d/%d, hotqlen=%d/%d, listqlen=%d/%d\n",
1341 newqlen, pageq_len(clockpro_queue(s, CLOCKPRO_NEWQ)),
1342 coldqlen, pageq_len(clockpro_queue(s, CLOCKPRO_COLDQ)),
1343 hotqlen, pageq_len(clockpro_queue(s, CLOCKPRO_HOTQ)),
1344 listqlen, pageq_len(clockpro_queue(s, CLOCKPRO_LISTQ)));
1345 }
1346
1347 #endif /* defined(DDB) */
1348
1349 #if defined(PDSIM)
1350 #if defined(DEBUG)
1351 static void
1352 pdsim_dumpq(int qidx)
1353 {
1354 struct clockpro_state * const s = &clockpro;
1355 pageq_t *q = clockpro_queue(s, qidx);
1356 struct vm_page *pg;
1357
1358 TAILQ_FOREACH(pg, &q->q_q, pageq) {
1359 DPRINTF(" %" PRIu64 "%s%s%s%s%s%s",
1360 pg->offset >> PAGE_SHIFT,
1361 (pg->pqflags & PQ_HOT) ? "H" : "",
1362 (pg->pqflags & PQ_TEST) ? "T" : "",
1363 (pg->pqflags & PQ_REFERENCED) ? "R" : "",
1364 pmap_is_referenced(pg) ? "r" : "",
1365 (pg->pqflags & PQ_INITIALREF) ? "I" : "",
1366 (pg->pqflags & PQ_SPECULATIVE) ? "S" : ""
1367 );
1368 }
1369 }
1370 #endif /* defined(DEBUG) */
1371
1372 void
1373 pdsim_dump(const char *id)
1374 {
1375 #if defined(DEBUG)
1376 struct clockpro_state * const s = &clockpro;
1377
1378 DPRINTF(" %s L(", id);
1379 pdsim_dumpq(CLOCKPRO_LISTQ);
1380 DPRINTF(" ) H(");
1381 pdsim_dumpq(CLOCKPRO_HOTQ);
1382 DPRINTF(" ) C(");
1383 pdsim_dumpq(CLOCKPRO_COLDQ);
1384 DPRINTF(" ) N(");
1385 pdsim_dumpq(CLOCKPRO_NEWQ);
1386 DPRINTF(" ) ncold=%d/%d, coldadj=%d\n",
1387 s->s_ncold, s->s_coldtarget, coldadj);
1388 #endif /* defined(DEBUG) */
1389 }
1390 #endif /* defined(PDSIM) */
1391