uvm_pdpolicy_clockpro.c revision 1.1.2.6 1 /* $NetBSD: uvm_pdpolicy_clockpro.c,v 1.1.2.6 2006/03/10 13:01:19 yamt Exp $ */
2
3 /*-
4 * Copyright (c)2005, 2006 YAMAMOTO Takashi,
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29 /*
30 * CLOCK-Pro replacement policy:
31 * http://www.cs.wm.edu/hpcs/WWW/HTML/publications/abs05-3.html
32 *
33 * approximation of the list of non-resident pages using hash:
34 * http://linux-mm.org/ClockProApproximation
35 */
36
37 /* #define CLOCKPRO_DEBUG */
38
39 #if defined(PDSIM)
40
41 #include "pdsim.h"
42
43 #else /* defined(PDSIM) */
44
45 #include <sys/cdefs.h>
46 __KERNEL_RCSID(0, "$NetBSD: uvm_pdpolicy_clockpro.c,v 1.1.2.6 2006/03/10 13:01:19 yamt Exp $");
47
48 #include "opt_ddb.h"
49
50 #include <sys/param.h>
51 #include <sys/proc.h>
52 #include <sys/systm.h>
53 #include <sys/kernel.h>
54 #include <sys/vnode.h>
55 #include <sys/hash.h>
56
57 #include <uvm/uvm.h>
58 #include <uvm/uvm_pdpolicy_impl.h>
59
60 #if ((__STDC_VERSION__ - 0) >= 199901L)
61 #define DPRINTF(...) /* nothing */
62 #define WARN(...) printf(__VA_ARGS__)
63 #else /* ((__STDC_VERSION__ - 0) >= 199901L) */
64 #define DPRINTF(a...) /* nothing */ /* GCC */
65 #define WARN(a...) printf(a)
66 #endif /* ((__STDC_VERSION__ - 0) >= 199901L) */
67
68 #define dump(a) /* nothing */
69
70 #undef USEONCE2
71 #define LISTQ
72 #undef ADAPTIVE
73
74 #endif /* defined(PDSIM) */
75
76 #if !defined(CLOCKPRO_COLDPCT)
77 #define CLOCKPRO_COLDPCT 10
78 #endif /* !defined(CLOCKPRO_COLDPCT) */
79
80 #define CLOCKPRO_COLDPCTMAX 90
81
82 #if !defined(CLOCKPRO_HASHFACTOR)
83 #define CLOCKPRO_HASHFACTOR 2
84 #endif /* !defined(CLOCKPRO_HASHFACTOR) */
85
86 #define CLOCKPRO_NEWQMIN ((1024 * 1024) >> PAGE_SHIFT) /* XXX */
87
88 int clockpro_hashfactor = CLOCKPRO_HASHFACTOR;
89
90 PDPOL_EVCNT_DEFINE(nresrecord)
91 PDPOL_EVCNT_DEFINE(nreslookup)
92 PDPOL_EVCNT_DEFINE(nresfoundobj)
93 PDPOL_EVCNT_DEFINE(nresfoundanon)
94 PDPOL_EVCNT_DEFINE(nresanonfree)
95 PDPOL_EVCNT_DEFINE(nresconflict)
96 PDPOL_EVCNT_DEFINE(nresoverwritten)
97 PDPOL_EVCNT_DEFINE(nreshandhot)
98
99 PDPOL_EVCNT_DEFINE(hhottakeover)
100 PDPOL_EVCNT_DEFINE(hhotref)
101 PDPOL_EVCNT_DEFINE(hhotunref)
102 PDPOL_EVCNT_DEFINE(hhotcold)
103 PDPOL_EVCNT_DEFINE(hhotcoldtest)
104
105 PDPOL_EVCNT_DEFINE(hcoldtakeover)
106 PDPOL_EVCNT_DEFINE(hcoldref)
107 PDPOL_EVCNT_DEFINE(hcoldunref)
108 PDPOL_EVCNT_DEFINE(hcoldreftest)
109 PDPOL_EVCNT_DEFINE(hcoldunreftest)
110 PDPOL_EVCNT_DEFINE(hcoldunreftestspeculative)
111 PDPOL_EVCNT_DEFINE(hcoldhot)
112
113 PDPOL_EVCNT_DEFINE(speculativeenqueue)
114 PDPOL_EVCNT_DEFINE(speculativehit1)
115 PDPOL_EVCNT_DEFINE(speculativehit2)
116 PDPOL_EVCNT_DEFINE(speculativemiss)
117
118 #define PQ_REFERENCED PQ_PRIVATE1
119 #define PQ_HOT PQ_PRIVATE2
120 #define PQ_TEST PQ_PRIVATE3
121 #define PQ_INITIALREF PQ_PRIVATE4
122 #if PQ_PRIVATE6 != PQ_PRIVATE5 * 2 || PQ_PRIVATE7 != PQ_PRIVATE6 * 2
123 #error PQ_PRIVATE
124 #endif
125 #define PQ_QMASK (PQ_PRIVATE5|PQ_PRIVATE6|PQ_PRIVATE7)
126 #define PQ_QFACTOR PQ_PRIVATE5
127 #define PQ_SPECULATIVE PQ_PRIVATE8
128
129 #define CLOCKPRO_NOQUEUE 0
130 #define CLOCKPRO_NEWQ 1 /* small queue to clear initial ref. */
131 #if defined(LISTQ)
132 #define CLOCKPRO_COLDQ 2
133 #define CLOCKPRO_HOTQ 3
134 #else /* defined(LISTQ) */
135 #define CLOCKPRO_COLDQ (2 + coldqidx) /* XXX */
136 #define CLOCKPRO_HOTQ (3 - coldqidx) /* XXX */
137 #endif /* defined(LISTQ) */
138 #define CLOCKPRO_LISTQ 4
139 #define CLOCKPRO_NQUEUE 4
140
141 static inline void
142 clockpro_setq(struct vm_page *pg, int qidx)
143 {
144 KASSERT(qidx >= CLOCKPRO_NOQUEUE);
145 KASSERT(qidx <= CLOCKPRO_NQUEUE);
146
147 pg->pqflags = (pg->pqflags & ~PQ_QMASK) | (qidx * PQ_QFACTOR);
148 }
149
150 static inline int
151 clockpro_getq(struct vm_page *pg)
152 {
153 int qidx;
154
155 qidx = (pg->pqflags & PQ_QMASK) / PQ_QFACTOR;
156 KASSERT(qidx >= CLOCKPRO_NOQUEUE);
157 KASSERT(qidx <= CLOCKPRO_NQUEUE);
158 return qidx;
159 }
160
161 typedef struct {
162 struct pglist q_q;
163 int q_len;
164 } pageq_t;
165
166 struct clockpro_state {
167 int s_npages;
168 int s_coldtarget;
169 int s_ncold;
170
171 int s_newqlenmax;
172 pageq_t s_q[CLOCKPRO_NQUEUE];
173 };
174
175 static pageq_t *
176 clockpro_queue(struct clockpro_state *s, int qidx)
177 {
178
179 KASSERT(CLOCKPRO_NOQUEUE < qidx);
180 KASSERT(qidx <= CLOCKPRO_NQUEUE);
181
182 return &s->s_q[qidx - 1];
183 }
184
185 #if !defined(LISTQ)
186
187 static int coldqidx;
188
189 static void
190 clockpro_switchqueue(void)
191 {
192
193 coldqidx = 1 - coldqidx;
194 }
195
196 #endif /* !defined(LISTQ) */
197
198 static struct clockpro_state clockpro;
199 static struct clockpro_scanstate {
200 int ss_nscanned;
201 } scanstate;
202
203 /* ---------------------------------------- */
204
205 static void
206 pageq_init(pageq_t *q)
207 {
208
209 TAILQ_INIT(&q->q_q);
210 q->q_len = 0;
211 }
212
213 static int
214 pageq_len(const pageq_t *q)
215 {
216
217 return q->q_len;
218 }
219
220 static struct vm_page *
221 pageq_first(const pageq_t *q)
222 {
223
224 return TAILQ_FIRST(&q->q_q);
225 }
226
227 static void
228 pageq_insert_tail(pageq_t *q, struct vm_page *pg)
229 {
230
231 TAILQ_INSERT_TAIL(&q->q_q, pg, pageq);
232 q->q_len++;
233 }
234
235 static void
236 pageq_insert_head(pageq_t *q, struct vm_page *pg)
237 {
238
239 TAILQ_INSERT_HEAD(&q->q_q, pg, pageq);
240 q->q_len++;
241 }
242
243 static void
244 pageq_remove(pageq_t *q, struct vm_page *pg)
245 {
246
247 #if 1
248 KASSERT(clockpro_queue(&clockpro, clockpro_getq(pg)) == q);
249 #endif
250 KASSERT(q->q_len > 0);
251 TAILQ_REMOVE(&q->q_q, pg, pageq);
252 q->q_len--;
253 }
254
255 static struct vm_page *
256 pageq_remove_head(pageq_t *q)
257 {
258 struct vm_page *pg;
259
260 pg = TAILQ_FIRST(&q->q_q);
261 if (pg == NULL) {
262 KASSERT(q->q_len == 0);
263 return NULL;
264 }
265 pageq_remove(q, pg);
266 return pg;
267 }
268
269 /* ---------------------------------------- */
270
271 static void
272 clockpro_insert_tail(struct clockpro_state *s, int qidx, struct vm_page *pg)
273 {
274 pageq_t *q = clockpro_queue(s, qidx);
275
276 clockpro_setq(pg, qidx);
277 pageq_insert_tail(q, pg);
278 }
279
280 static void
281 clockpro_insert_head(struct clockpro_state *s, int qidx, struct vm_page *pg)
282 {
283 pageq_t *q = clockpro_queue(s, qidx);
284
285 clockpro_setq(pg, qidx);
286 pageq_insert_head(q, pg);
287 }
288
289 /* ---------------------------------------- */
290
291 typedef uint32_t nonres_cookie_t;
292 #define NONRES_COOKIE_INVAL 0
293
294 typedef uintptr_t objid_t;
295
296 /*
297 * XXX maybe these hash functions need reconsideration,
298 * given that hash distribution is critical here.
299 */
300
301 static uint32_t
302 pageidentityhash1(objid_t obj, off_t idx)
303 {
304 uint32_t hash = HASH32_BUF_INIT;
305
306 #if 1
307 hash = hash32_buf(&idx, sizeof(idx), hash);
308 hash = hash32_buf(&obj, sizeof(obj), hash);
309 #else
310 hash = hash32_buf(&obj, sizeof(obj), hash);
311 hash = hash32_buf(&idx, sizeof(idx), hash);
312 #endif
313 return hash;
314 }
315
316 static uint32_t
317 pageidentityhash2(objid_t obj, off_t idx)
318 {
319 uint32_t hash = HASH32_BUF_INIT;
320
321 hash = hash32_buf(&obj, sizeof(obj), hash);
322 hash = hash32_buf(&idx, sizeof(idx), hash);
323 return hash;
324 }
325
326 static nonres_cookie_t
327 calccookie(objid_t obj, off_t idx)
328 {
329 uint32_t hash = pageidentityhash2(obj, idx);
330 nonres_cookie_t cookie = hash;
331
332 if (__predict_false(cookie == NONRES_COOKIE_INVAL)) {
333 cookie++; /* XXX */
334 }
335 return cookie;
336 }
337
338 #define BUCKETSIZE 16
339 struct bucket {
340 int cycle;
341 int cur;
342 nonres_cookie_t pages[BUCKETSIZE];
343 };
344 static int cycle_target;
345 static int cycle_target_frac;
346
347 static struct bucket *buckets;
348 static size_t hashsize;
349
350 static int coldadj;
351 #define COLDTARGET_ADJ(d) coldadj += (d)
352
353 #if defined(PDSIM)
354
355 static void *
356 clockpro_hashalloc(int n)
357 {
358 size_t allocsz = sizeof(*buckets) * n;
359
360 return malloc(allocsz);
361 }
362
363 static void
364 clockpro_hashfree(void *p, int n)
365 {
366
367 free(p);
368 }
369
370 #else /* defined(PDSIM) */
371
372 static void *
373 clockpro_hashalloc(int n)
374 {
375 size_t allocsz = round_page(sizeof(*buckets) * n);
376
377 return (void *)uvm_km_alloc(kernel_map, allocsz, 0, UVM_KMF_WIRED);
378 }
379
380 static void
381 clockpro_hashfree(void *p, int n)
382 {
383 size_t allocsz = round_page(sizeof(*buckets) * n);
384
385 uvm_km_free(kernel_map, (vaddr_t)p, allocsz, UVM_KMF_WIRED);
386 }
387
388 #endif /* defined(PDSIM) */
389
390 static void
391 clockpro_hashinit(uint64_t n)
392 {
393 struct bucket *newbuckets;
394 struct bucket *oldbuckets;
395 size_t sz;
396 size_t oldsz;
397 int i;
398
399 sz = howmany(n, BUCKETSIZE);
400 sz *= clockpro_hashfactor;
401 newbuckets = clockpro_hashalloc(sz);
402 if (newbuckets == NULL) {
403 panic("%s: allocation failure", __func__);
404 }
405 for (i = 0; i < sz; i++) {
406 struct bucket *b = &newbuckets[i];
407 int j;
408
409 b->cycle = cycle_target;
410 b->cur = 0;
411 for (j = 0; j < BUCKETSIZE; j++) {
412 b->pages[j] = NONRES_COOKIE_INVAL;
413 }
414 }
415 /* XXX lock */
416 oldbuckets = buckets;
417 oldsz = hashsize;
418 buckets = newbuckets;
419 hashsize = sz;
420 /* XXX unlock */
421 if (oldbuckets) {
422 clockpro_hashfree(oldbuckets, oldsz);
423 }
424 }
425
426 static struct bucket *
427 nonresident_getbucket(objid_t obj, off_t idx)
428 {
429 uint32_t hash;
430 static struct bucket static_bucket;
431
432 if (hashsize == 0) {
433 return &static_bucket;
434 }
435
436 hash = pageidentityhash1(obj, idx);
437 return &buckets[hash % hashsize];
438 }
439
440 static void
441 nonresident_rotate(struct bucket *b)
442 {
443
444 while (b->cycle - cycle_target < 0) {
445 if (b->pages[b->cur] != NONRES_COOKIE_INVAL) {
446 PDPOL_EVCNT_INCR(nreshandhot);
447 COLDTARGET_ADJ(-1);
448 }
449 b->pages[b->cur] = NONRES_COOKIE_INVAL;
450 b->cur = (b->cur + 1) % BUCKETSIZE;
451 b->cycle++;
452 }
453 }
454
455 static boolean_t
456 nonresident_lookupremove(objid_t obj, off_t idx)
457 {
458 struct bucket *b = nonresident_getbucket(obj, idx);
459 nonres_cookie_t cookie = calccookie(obj, idx);
460 int i;
461
462 nonresident_rotate(b);
463 for (i = 0; i < BUCKETSIZE; i++) {
464 if (b->pages[i] == cookie) {
465 b->pages[i] = NONRES_COOKIE_INVAL;
466 return TRUE;
467 }
468 }
469 return FALSE;
470 }
471
472 static objid_t
473 pageobj(struct vm_page *pg)
474 {
475 const void *obj;
476
477 /*
478 * XXX object pointer is often freed and reused for unrelated object.
479 * for vnodes, it would be better to use something like
480 * a hash of fsid/fileid/generation.
481 */
482
483 obj = pg->uobject;
484 if (obj == NULL) {
485 obj = pg->uanon;
486 KASSERT(obj != NULL);
487 KASSERT(pg->offset == 0);
488 }
489
490 return (objid_t)obj;
491 }
492
493 static off_t
494 pageidx(struct vm_page *pg)
495 {
496
497 KASSERT((pg->offset & PAGE_MASK) == 0);
498 return pg->offset >> PAGE_SHIFT;
499 }
500
501 static boolean_t
502 nonresident_pagelookupremove(struct vm_page *pg)
503 {
504 boolean_t found = nonresident_lookupremove(pageobj(pg), pageidx(pg));
505
506 PDPOL_EVCNT_INCR(nreslookup);
507 if (found) {
508 if (pg->uobject) {
509 PDPOL_EVCNT_INCR(nresfoundobj);
510 } else {
511 PDPOL_EVCNT_INCR(nresfoundanon);
512 }
513 }
514 return found;
515 }
516
517 static void
518 nonresident_pagerecord(struct vm_page *pg)
519 {
520 objid_t obj = pageobj(pg);
521 off_t idx = pageidx(pg);
522 struct bucket *b = nonresident_getbucket(obj, idx);
523 nonres_cookie_t cookie = calccookie(obj, idx);
524
525 #if defined(DEBUG)
526 int i;
527
528 for (i = 0; i < BUCKETSIZE; i++) {
529 if (b->pages[i] == cookie) {
530 PDPOL_EVCNT_INCR(nresconflict);
531 }
532 }
533 #endif /* defined(DEBUG) */
534
535 PDPOL_EVCNT_INCR(nresrecord);
536 nonresident_rotate(b);
537 if (b->pages[b->cur] != NONRES_COOKIE_INVAL) {
538 PDPOL_EVCNT_INCR(nresoverwritten);
539 COLDTARGET_ADJ(-1);
540 }
541 b->pages[b->cur] = cookie;
542 b->cur = (b->cur + 1) % BUCKETSIZE;
543 }
544
545 /* ---------------------------------------- */
546
547 #if defined(CLOCKPRO_DEBUG)
548 static void
549 check_sanity(void)
550 {
551 }
552 #else /* defined(CLOCKPRO_DEBUG) */
553 #define check_sanity() /* nothing */
554 #endif /* defined(CLOCKPRO_DEBUG) */
555
556 static void
557 clockpro_reinit(void)
558 {
559
560 clockpro_hashinit(uvmexp.npages);
561 }
562
563 static void
564 clockpro_init(void)
565 {
566 struct clockpro_state *s = &clockpro;
567 int i;
568
569 for (i = 0; i < CLOCKPRO_NQUEUE; i++) {
570 pageq_init(&s->s_q[i]);
571 }
572 s->s_newqlenmax = 1;
573 s->s_coldtarget = 1;
574 }
575
576 static void
577 clockpro_tune(void)
578 {
579 struct clockpro_state *s = &clockpro;
580 int coldtarget;
581
582 #if defined(ADAPTIVE)
583 int coldmax = s->s_npages * CLOCKPRO_COLDPCTMAX / 100;
584 int coldmin = 1;
585
586 coldtarget = s->s_coldtarget;
587 if (coldtarget + coldadj < coldmin) {
588 coldadj = coldmin - coldtarget;
589 } else if (coldtarget + coldadj > coldmax) {
590 coldadj = coldmax - coldtarget;
591 }
592 coldtarget += coldadj;
593 #else /* defined(ADAPTIVE) */
594 coldtarget = s->s_npages * CLOCKPRO_COLDPCT / 100;
595 if (coldtarget < 1) {
596 coldtarget = 1;
597 }
598 #endif /* defined(ADAPTIVE) */
599
600 s->s_coldtarget = coldtarget;
601 s->s_newqlenmax = coldtarget / 4;
602 if (s->s_newqlenmax < CLOCKPRO_NEWQMIN) {
603 s->s_newqlenmax = CLOCKPRO_NEWQMIN;
604 }
605 }
606
607 static void
608 clockpro_movereferencebit(struct vm_page *pg)
609 {
610 boolean_t referenced;
611
612 referenced = pmap_clear_reference(pg);
613 if (referenced) {
614 pg->pqflags |= PQ_REFERENCED;
615 }
616 }
617
618 static void
619 clockpro_clearreferencebit(struct vm_page *pg)
620 {
621
622 clockpro_movereferencebit(pg);
623 pg->pqflags &= ~PQ_REFERENCED;
624 }
625
626 static void
627 clockpro___newqrotate(int len)
628 {
629 struct clockpro_state * const s = &clockpro;
630 pageq_t * const newq = clockpro_queue(s, CLOCKPRO_NEWQ);
631 struct vm_page *pg;
632
633 while (pageq_len(newq) > len) {
634 pg = pageq_remove_head(newq);
635 KASSERT(pg != NULL);
636 KASSERT(clockpro_getq(pg) == CLOCKPRO_NEWQ);
637 if ((pg->pqflags & PQ_INITIALREF) != 0) {
638 clockpro_clearreferencebit(pg);
639 pg->pqflags &= ~PQ_INITIALREF;
640 }
641 /* place at the list head */
642 clockpro_insert_tail(s, CLOCKPRO_COLDQ, pg);
643 }
644 }
645
646 static void
647 clockpro_newqrotate(void)
648 {
649 struct clockpro_state * const s = &clockpro;
650
651 check_sanity();
652 clockpro___newqrotate(s->s_newqlenmax);
653 check_sanity();
654 }
655
656 static void
657 clockpro_newqflush(int n)
658 {
659
660 check_sanity();
661 clockpro___newqrotate(n);
662 check_sanity();
663 }
664
665 static void
666 clockpro_newqflushone(void)
667 {
668 struct clockpro_state * const s = &clockpro;
669
670 clockpro_newqflush(
671 MAX(pageq_len(clockpro_queue(s, CLOCKPRO_NEWQ)) - 1, 0));
672 }
673
674 /*
675 * our "tail" is called "list-head" in the paper.
676 */
677
678 static void
679 clockpro___enqueuetail(struct vm_page *pg)
680 {
681 struct clockpro_state * const s = &clockpro;
682
683 KASSERT(clockpro_getq(pg) == CLOCKPRO_NOQUEUE);
684
685 check_sanity();
686 #if !defined(USEONCE2)
687 clockpro_insert_tail(s, CLOCKPRO_NEWQ, pg);
688 clockpro_newqrotate();
689 #else /* !defined(USEONCE2) */
690 #if defined(LISTQ)
691 KASSERT((pg->pqflags & PQ_REFERENCED) == 0);
692 #endif /* defined(LISTQ) */
693 clockpro_insert_tail(s, CLOCKPRO_COLDQ, pg);
694 #endif /* !defined(USEONCE2) */
695 check_sanity();
696 }
697
698 static void
699 clockpro_pageenqueue(struct vm_page *pg)
700 {
701 struct clockpro_state * const s = &clockpro;
702 boolean_t hot;
703 boolean_t speculative = (pg->pqflags & PQ_SPECULATIVE) != 0; /* XXX */
704
705 KASSERT((~pg->pqflags & (PQ_INITIALREF|PQ_SPECULATIVE)) != 0);
706 UVM_LOCK_ASSERT_PAGEQ();
707 check_sanity();
708 KASSERT(clockpro_getq(pg) == CLOCKPRO_NOQUEUE);
709 s->s_npages++;
710 pg->pqflags &= ~(PQ_HOT|PQ_TEST);
711 if (speculative) {
712 hot = FALSE;
713 PDPOL_EVCNT_INCR(speculativeenqueue);
714 } else {
715 hot = nonresident_pagelookupremove(pg);
716 if (hot) {
717 COLDTARGET_ADJ(1);
718 }
719 }
720
721 /*
722 * consider mmap'ed file:
723 *
724 * - read-ahead enqueues a page.
725 *
726 * - on the following read-ahead hit, the fault handler activates it.
727 *
728 * - finally, the userland code which caused the above fault
729 * actually accesses the page. it makes its reference bit set.
730 *
731 * we want to count the above as a single access, rather than
732 * three accesses with short reuse distances.
733 */
734
735 #if defined(USEONCE2)
736 pg->pqflags &= ~PQ_INITIALREF;
737 if (hot) {
738 pg->pqflags |= PQ_TEST;
739 }
740 s->s_ncold++;
741 clockpro_clearreferencebit(pg);
742 clockpro___enqueuetail(pg);
743 #else /* defined(USEONCE2) */
744 if (speculative) {
745 s->s_ncold++;
746 } else if (hot) {
747 pg->pqflags |= PQ_HOT;
748 } else {
749 pg->pqflags |= PQ_TEST;
750 s->s_ncold++;
751 }
752 clockpro___enqueuetail(pg);
753 #endif /* defined(USEONCE2) */
754 KASSERT(s->s_ncold <= s->s_npages);
755 }
756
757 static pageq_t *
758 clockpro_pagequeue(struct vm_page *pg)
759 {
760 struct clockpro_state * const s = &clockpro;
761 int qidx;
762
763 qidx = clockpro_getq(pg);
764 KASSERT(qidx != CLOCKPRO_NOQUEUE);
765
766 return clockpro_queue(s, qidx);
767 }
768
769 static void
770 clockpro_pagedequeue(struct vm_page *pg)
771 {
772 struct clockpro_state * const s = &clockpro;
773 pageq_t *q;
774
775 KASSERT(s->s_npages > 0);
776 check_sanity();
777 q = clockpro_pagequeue(pg);
778 pageq_remove(q, pg);
779 check_sanity();
780 clockpro_setq(pg, CLOCKPRO_NOQUEUE);
781 if ((pg->pqflags & PQ_HOT) == 0) {
782 KASSERT(s->s_ncold > 0);
783 s->s_ncold--;
784 }
785 KASSERT(s->s_npages > 0);
786 s->s_npages--;
787 check_sanity();
788 }
789
790 static void
791 clockpro_pagerequeue(struct vm_page *pg)
792 {
793 struct clockpro_state * const s = &clockpro;
794 int qidx;
795
796 qidx = clockpro_getq(pg);
797 KASSERT(qidx == CLOCKPRO_HOTQ || qidx == CLOCKPRO_COLDQ);
798 pageq_remove(clockpro_queue(s, qidx), pg);
799 check_sanity();
800 clockpro_setq(pg, CLOCKPRO_NOQUEUE);
801
802 clockpro___enqueuetail(pg);
803 }
804
805 static void
806 handhot_endtest(struct vm_page *pg)
807 {
808
809 KASSERT((pg->pqflags & PQ_HOT) == 0);
810 if ((pg->pqflags & PQ_TEST) != 0) {
811 PDPOL_EVCNT_INCR(hhotcoldtest);
812 COLDTARGET_ADJ(-1);
813 pg->pqflags &= ~PQ_TEST;
814 } else {
815 PDPOL_EVCNT_INCR(hhotcold);
816 }
817 }
818
819 static void
820 handhot_advance(void)
821 {
822 struct clockpro_state * const s = &clockpro;
823 struct vm_page *pg;
824 pageq_t *hotq;
825 int hotqlen;
826
827 clockpro_tune();
828
829 dump("hot called");
830 if (s->s_ncold >= s->s_coldtarget) {
831 return;
832 }
833 hotq = clockpro_queue(s, CLOCKPRO_HOTQ);
834 again:
835 pg = pageq_first(hotq);
836 if (pg == NULL) {
837 DPRINTF("%s: HHOT TAKEOVER\n", __func__);
838 dump("hhottakeover");
839 PDPOL_EVCNT_INCR(hhottakeover);
840 #if defined(LISTQ)
841 while (/* CONSTCOND */ 1) {
842 pageq_t *coldq = clockpro_queue(s, CLOCKPRO_COLDQ);
843
844 pg = pageq_first(coldq);
845 if (pg == NULL) {
846 clockpro_newqflushone();
847 pg = pageq_first(coldq);
848 if (pg == NULL) {
849 WARN("hhot: no page?\n");
850 return;
851 }
852 }
853 KASSERT(clockpro_pagequeue(pg) == coldq);
854 pageq_remove(coldq, pg);
855 check_sanity();
856 if ((pg->pqflags & PQ_HOT) == 0) {
857 handhot_endtest(pg);
858 clockpro_insert_tail(s, CLOCKPRO_LISTQ, pg);
859 } else {
860 clockpro_insert_head(s, CLOCKPRO_HOTQ, pg);
861 break;
862 }
863 }
864 #else /* defined(LISTQ) */
865 clockpro_newqflush(0); /* XXX XXX */
866 clockpro_switchqueue();
867 hotq = clockpro_queue(s, CLOCKPRO_HOTQ);
868 goto again;
869 #endif /* defined(LISTQ) */
870 }
871
872 KASSERT(clockpro_pagequeue(pg) == hotq);
873
874 /*
875 * terminate test period of nonresident pages by cycling them.
876 */
877
878 cycle_target_frac += BUCKETSIZE;
879 hotqlen = pageq_len(hotq);
880 while (cycle_target_frac >= hotqlen) {
881 cycle_target++;
882 cycle_target_frac -= hotqlen;
883 }
884
885 if ((pg->pqflags & PQ_HOT) == 0) {
886 #if defined(LISTQ)
887 panic("cold page in hotq: %p", pg);
888 #else /* defined(LISTQ) */
889 handhot_endtest(pg);
890 goto next;
891 #endif /* defined(LISTQ) */
892 }
893 KASSERT((pg->pqflags & PQ_TEST) == 0);
894 KASSERT((pg->pqflags & PQ_INITIALREF) == 0);
895 KASSERT((pg->pqflags & PQ_SPECULATIVE) == 0);
896
897 /*
898 * once we met our target,
899 * stop at a hot page so that no cold pages in test period
900 * have larger recency than any hot pages.
901 */
902
903 if (s->s_ncold >= s->s_coldtarget) {
904 dump("hot done");
905 return;
906 }
907 clockpro_movereferencebit(pg);
908 if ((pg->pqflags & PQ_REFERENCED) == 0) {
909 PDPOL_EVCNT_INCR(hhotunref);
910 uvmexp.pddeact++;
911 pg->pqflags &= ~PQ_HOT;
912 clockpro.s_ncold++;
913 KASSERT(s->s_ncold <= s->s_npages);
914 } else {
915 PDPOL_EVCNT_INCR(hhotref);
916 }
917 pg->pqflags &= ~PQ_REFERENCED;
918 #if !defined(LISTQ)
919 next:
920 #endif /* !defined(LISTQ) */
921 clockpro_pagerequeue(pg);
922 dump("hot");
923 goto again;
924 }
925
926 static struct vm_page *
927 handcold_advance(void)
928 {
929 struct clockpro_state * const s = &clockpro;
930 struct vm_page *pg;
931
932 for (;;) {
933 pageq_t *listq = clockpro_queue(s, CLOCKPRO_LISTQ);
934 pageq_t *coldq;
935
936 clockpro_newqrotate();
937 handhot_advance();
938 #if defined(LISTQ)
939 pg = pageq_first(listq);
940 if (pg != NULL) {
941 KASSERT(clockpro_getq(pg) == CLOCKPRO_LISTQ);
942 KASSERT((pg->pqflags & PQ_TEST) == 0);
943 KASSERT((pg->pqflags & PQ_HOT) == 0);
944 KASSERT((pg->pqflags & PQ_INITIALREF) == 0);
945 pageq_remove(listq, pg);
946 check_sanity();
947 clockpro_insert_head(s, CLOCKPRO_COLDQ, pg); /* XXX */
948 goto gotcold;
949 }
950 #endif /* defined(LISTQ) */
951 check_sanity();
952 coldq = clockpro_queue(s, CLOCKPRO_COLDQ);
953 pg = pageq_first(coldq);
954 if (pg == NULL) {
955 clockpro_newqflushone();
956 pg = pageq_first(coldq);
957 }
958 if (pg == NULL) {
959 DPRINTF("%s: HCOLD TAKEOVER\n", __func__);
960 dump("hcoldtakeover");
961 PDPOL_EVCNT_INCR(hcoldtakeover);
962 KASSERT(
963 pageq_len(clockpro_queue(s, CLOCKPRO_NEWQ)) == 0);
964 #if defined(LISTQ)
965 KASSERT(
966 pageq_len(clockpro_queue(s, CLOCKPRO_HOTQ)) == 0);
967 #else /* defined(LISTQ) */
968 clockpro_switchqueue();
969 coldq = clockpro_queue(s, CLOCKPRO_COLDQ);
970 pg = pageq_first(coldq);
971 #endif /* defined(LISTQ) */
972 }
973 if (pg == NULL) {
974 WARN("hcold: no page?\n");
975 return NULL;
976 }
977 KASSERT((pg->pqflags & PQ_INITIALREF) == 0);
978 if ((pg->pqflags & PQ_HOT) != 0) {
979 PDPOL_EVCNT_INCR(hcoldhot);
980 pageq_remove(coldq, pg);
981 clockpro_insert_tail(s, CLOCKPRO_HOTQ, pg);
982 check_sanity();
983 KASSERT((pg->pqflags & PQ_TEST) == 0);
984 uvmexp.pdscans++;
985 continue;
986 }
987 #if defined(LISTQ)
988 gotcold:
989 #endif /* defined(LISTQ) */
990 KASSERT((pg->pqflags & PQ_HOT) == 0);
991 uvmexp.pdscans++;
992 clockpro_movereferencebit(pg);
993 if ((pg->pqflags & PQ_SPECULATIVE) != 0) {
994 KASSERT((pg->pqflags & PQ_TEST) == 0);
995 if ((pg->pqflags & PQ_REFERENCED) != 0) {
996 PDPOL_EVCNT_INCR(speculativehit2);
997 pg->pqflags &= ~(PQ_SPECULATIVE|PQ_REFERENCED);
998 clockpro_pagedequeue(pg);
999 clockpro_pageenqueue(pg);
1000 continue;
1001 }
1002 PDPOL_EVCNT_INCR(speculativemiss);
1003 }
1004 switch (pg->pqflags & (PQ_REFERENCED|PQ_TEST)) {
1005 case PQ_TEST:
1006 PDPOL_EVCNT_INCR(hcoldunreftest);
1007 nonresident_pagerecord(pg);
1008 goto gotit;
1009 case 0:
1010 PDPOL_EVCNT_INCR(hcoldunref);
1011 gotit:
1012 KASSERT(s->s_ncold > 0);
1013 clockpro_pagerequeue(pg); /* XXX */
1014 dump("cold done");
1015 /* XXX "pg" is still in queue */
1016 handhot_advance();
1017 goto done;
1018
1019 case PQ_REFERENCED|PQ_TEST:
1020 PDPOL_EVCNT_INCR(hcoldreftest);
1021 s->s_ncold--;
1022 COLDTARGET_ADJ(1);
1023 pg->pqflags |= PQ_HOT;
1024 pg->pqflags &= ~PQ_TEST;
1025 break;
1026
1027 case PQ_REFERENCED:
1028 PDPOL_EVCNT_INCR(hcoldref);
1029 pg->pqflags |= PQ_TEST;
1030 break;
1031 }
1032 pg->pqflags &= ~PQ_REFERENCED;
1033 uvmexp.pdreact++;
1034 /* move to the list head */
1035 clockpro_pagerequeue(pg);
1036 dump("cold");
1037 }
1038 done:;
1039 return pg;
1040 }
1041
1042 void
1043 uvmpdpol_pageactivate(struct vm_page *pg)
1044 {
1045
1046 if (!uvmpdpol_pageisqueued_p(pg)) {
1047 KASSERT((pg->pqflags & PQ_SPECULATIVE) == 0);
1048 pg->pqflags |= PQ_INITIALREF;
1049 clockpro_pageenqueue(pg);
1050 } else if ((pg->pqflags & PQ_SPECULATIVE)) {
1051 PDPOL_EVCNT_INCR(speculativehit1);
1052 pg->pqflags &= ~PQ_SPECULATIVE;
1053 pg->pqflags |= PQ_INITIALREF;
1054 clockpro_pagedequeue(pg);
1055 clockpro_pageenqueue(pg);
1056 }
1057 pg->pqflags |= PQ_REFERENCED;
1058 }
1059
1060 void
1061 uvmpdpol_pagedeactivate(struct vm_page *pg)
1062 {
1063
1064 pg->pqflags &= ~PQ_REFERENCED;
1065 }
1066
1067 void
1068 uvmpdpol_pagedequeue(struct vm_page *pg)
1069 {
1070
1071 if (!uvmpdpol_pageisqueued_p(pg)) {
1072 return;
1073 }
1074 clockpro_pagedequeue(pg);
1075 pg->pqflags &= ~PQ_SPECULATIVE;
1076 }
1077
1078 void
1079 uvmpdpol_pageenqueue(struct vm_page *pg)
1080 {
1081
1082 #if 1
1083 if (uvmpdpol_pageisqueued_p(pg)) {
1084 return;
1085 }
1086 clockpro_clearreferencebit(pg);
1087 pg->pqflags |= PQ_SPECULATIVE;
1088 clockpro_pageenqueue(pg);
1089 #else
1090 uvmpdpol_pageactivate(pg);
1091 #endif
1092 }
1093
1094 void
1095 uvmpdpol_anfree(struct vm_anon *an)
1096 {
1097
1098 KASSERT(an->an_page == NULL);
1099 if (nonresident_lookupremove((objid_t)an, 0)) {
1100 PDPOL_EVCNT_INCR(nresanonfree);
1101 }
1102 }
1103
1104 void
1105 uvmpdpol_init(void)
1106 {
1107
1108 clockpro_init();
1109 }
1110
1111 void
1112 uvmpdpol_reinit(void)
1113 {
1114
1115 clockpro_reinit();
1116 }
1117
1118 void
1119 uvmpdpol_estimatepageable(int *active, int *inactive)
1120 {
1121 struct clockpro_state * const s = &clockpro;
1122
1123 if (active) {
1124 *active = s->s_npages - s->s_ncold;
1125 }
1126 if (inactive) {
1127 *inactive = s->s_ncold;
1128 }
1129 }
1130
1131 boolean_t
1132 uvmpdpol_pageisqueued_p(struct vm_page *pg)
1133 {
1134
1135 return clockpro_getq(pg) != CLOCKPRO_NOQUEUE;
1136 }
1137
1138 void
1139 uvmpdpol_scaninit(void)
1140 {
1141 struct clockpro_scanstate * const ss = &scanstate;
1142
1143 ss->ss_nscanned = 0;
1144 }
1145
1146 struct vm_page *
1147 uvmpdpol_selectvictim(void)
1148 {
1149 struct clockpro_state * const s = &clockpro;
1150 struct clockpro_scanstate * const ss = &scanstate;
1151 struct vm_page *pg;
1152
1153 if (ss->ss_nscanned > s->s_npages) {
1154 DPRINTF("scan too much\n");
1155 return NULL;
1156 }
1157 pg = handcold_advance();
1158 ss->ss_nscanned++;
1159 return pg;
1160 }
1161
1162 static void
1163 clockpro_dropswap(pageq_t *q, int *todo)
1164 {
1165 struct vm_page *pg;
1166
1167 TAILQ_FOREACH_REVERSE(pg, &q->q_q, pglist, pageq) {
1168 if (*todo <= 0) {
1169 break;
1170 }
1171 if ((pg->pqflags & PQ_HOT) == 0) {
1172 continue;
1173 }
1174 if ((pg->pqflags & PQ_SWAPBACKED) == 0) {
1175 continue;
1176 }
1177 if (uvmpd_trydropswap(pg)) {
1178 (*todo)--;
1179 }
1180 }
1181 }
1182
1183 void
1184 uvmpdpol_balancequeue(int swap_shortage)
1185 {
1186 struct clockpro_state * const s = &clockpro;
1187 int todo = swap_shortage;
1188
1189 if (todo == 0) {
1190 return;
1191 }
1192
1193 /*
1194 * reclaim swap slots from hot pages
1195 */
1196
1197 DPRINTF("%s: swap_shortage=%d\n", __func__, swap_shortage);
1198
1199 clockpro_dropswap(clockpro_queue(s, CLOCKPRO_NEWQ), &todo);
1200 clockpro_dropswap(clockpro_queue(s, CLOCKPRO_COLDQ), &todo);
1201 clockpro_dropswap(clockpro_queue(s, CLOCKPRO_HOTQ), &todo);
1202
1203 DPRINTF("%s: done=%d\n", __func__, swap_shortage - todo);
1204 }
1205
1206 boolean_t
1207 uvmpdpol_needsscan_p(void)
1208 {
1209 struct clockpro_state * const s = &clockpro;
1210
1211 if (s->s_ncold < s->s_coldtarget) {
1212 return TRUE;
1213 }
1214 return FALSE;
1215 }
1216
1217 void
1218 uvmpdpol_tune(void)
1219 {
1220
1221 clockpro_tune();
1222 }
1223
1224 void
1225 uvmpdpol_sysctlsetup(void)
1226 {
1227
1228 /* nothing */
1229 }
1230
1231 #if defined(DDB)
1232
1233 void clockpro_dump(void);
1234
1235 void
1236 clockpro_dump(void)
1237 {
1238 struct clockpro_state * const s = &clockpro;
1239
1240 struct vm_page *pg;
1241 int ncold, nhot, ntest, nspeculative, ninitialref, nref;
1242 int newqlen, coldqlen, hotqlen, listqlen;
1243
1244 newqlen = coldqlen = hotqlen = listqlen = 0;
1245 printf("npages=%d, ncold=%d, coldtarget=%d, newqlenmax=%d\n",
1246 s->s_npages, s->s_ncold, s->s_coldtarget, s->s_newqlenmax);
1247
1248 #define INITCOUNT() \
1249 ncold = nhot = ntest = nspeculative = ninitialref = nref = 0
1250
1251 #define COUNT(pg) \
1252 if ((pg->pqflags & PQ_HOT) != 0) { \
1253 nhot++; \
1254 } else { \
1255 ncold++; \
1256 if ((pg->pqflags & PQ_TEST) != 0) { \
1257 ntest++; \
1258 } \
1259 if ((pg->pqflags & PQ_SPECULATIVE) != 0) { \
1260 nspeculative++; \
1261 } \
1262 if ((pg->pqflags & PQ_INITIALREF) != 0) { \
1263 ninitialref++; \
1264 } else if ((pg->pqflags & PQ_REFERENCED) != 0 || \
1265 pmap_is_referenced(pg)) { \
1266 nref++; \
1267 } \
1268 }
1269
1270 #define PRINTCOUNT(name) \
1271 printf("%s hot=%d, cold=%d, test=%d, speculative=%d, initialref=%d, " \
1272 "nref=%d\n", \
1273 (name), nhot, ncold, ntest, nspeculative, ninitialref, nref)
1274
1275 INITCOUNT();
1276 TAILQ_FOREACH(pg, &clockpro_queue(s, CLOCKPRO_NEWQ)->q_q, pageq) {
1277 if (clockpro_getq(pg) != CLOCKPRO_NEWQ) {
1278 printf("newq corrupt %p\n", pg);
1279 }
1280 COUNT(pg)
1281 newqlen++;
1282 }
1283 PRINTCOUNT("newq");
1284
1285 INITCOUNT();
1286 TAILQ_FOREACH(pg, &clockpro_queue(s, CLOCKPRO_COLDQ)->q_q, pageq) {
1287 if (clockpro_getq(pg) != CLOCKPRO_COLDQ) {
1288 printf("coldq corrupt %p\n", pg);
1289 }
1290 COUNT(pg)
1291 coldqlen++;
1292 }
1293 PRINTCOUNT("coldq");
1294
1295 INITCOUNT();
1296 TAILQ_FOREACH(pg, &clockpro_queue(s, CLOCKPRO_HOTQ)->q_q, pageq) {
1297 if (clockpro_getq(pg) != CLOCKPRO_HOTQ) {
1298 printf("hotq corrupt %p\n", pg);
1299 }
1300 #if defined(LISTQ)
1301 if ((pg->pqflags & PQ_HOT) == 0) {
1302 printf("cold page in hotq: %p\n", pg);
1303 }
1304 #endif /* defined(LISTQ) */
1305 COUNT(pg)
1306 hotqlen++;
1307 }
1308 PRINTCOUNT("hotq");
1309
1310 INITCOUNT();
1311 TAILQ_FOREACH(pg, &clockpro_queue(s, CLOCKPRO_LISTQ)->q_q, pageq) {
1312 #if !defined(LISTQ)
1313 printf("listq %p\n");
1314 #endif /* !defined(LISTQ) */
1315 if (clockpro_getq(pg) != CLOCKPRO_LISTQ) {
1316 printf("listq corrupt %p\n", pg);
1317 }
1318 COUNT(pg)
1319 listqlen++;
1320 }
1321 PRINTCOUNT("listq");
1322
1323 printf("newqlen=%d/%d, coldqlen=%d/%d, hotqlen=%d/%d, listqlen=%d/%d\n",
1324 newqlen, pageq_len(clockpro_queue(s, CLOCKPRO_NEWQ)),
1325 coldqlen, pageq_len(clockpro_queue(s, CLOCKPRO_COLDQ)),
1326 hotqlen, pageq_len(clockpro_queue(s, CLOCKPRO_HOTQ)),
1327 listqlen, pageq_len(clockpro_queue(s, CLOCKPRO_LISTQ)));
1328 }
1329
1330 #endif /* defined(DDB) */
1331
1332 #if defined(PDSIM)
1333 static void
1334 pdsim_dumpq(int qidx)
1335 {
1336 struct clockpro_state * const s = &clockpro;
1337 pageq_t *q = clockpro_queue(s, qidx);
1338 struct vm_page *pg;
1339
1340 TAILQ_FOREACH(pg, &q->q_q, pageq) {
1341 DPRINTF(" %" PRIu64 "%s%s%s%s%s%s",
1342 pg->offset >> PAGE_SHIFT,
1343 (pg->pqflags & PQ_HOT) ? "H" : "",
1344 (pg->pqflags & PQ_TEST) ? "T" : "",
1345 (pg->pqflags & PQ_REFERENCED) ? "R" : "",
1346 pmap_is_referenced(pg) ? "r" : "",
1347 (pg->pqflags & PQ_INITIALREF) ? "I" : "",
1348 (pg->pqflags & PQ_SPECULATIVE) ? "S" : ""
1349 );
1350 }
1351 }
1352
1353 void
1354 pdsim_dump(const char *id)
1355 {
1356 #if defined(DEBUG)
1357 struct clockpro_state * const s = &clockpro;
1358
1359 DPRINTF(" %s L(", id);
1360 pdsim_dumpq(CLOCKPRO_LISTQ);
1361 DPRINTF(" ) H(");
1362 pdsim_dumpq(CLOCKPRO_HOTQ);
1363 DPRINTF(" ) C(");
1364 pdsim_dumpq(CLOCKPRO_COLDQ);
1365 DPRINTF(" ) N(");
1366 pdsim_dumpq(CLOCKPRO_NEWQ);
1367 DPRINTF(" ) ncold=%d/%d, coldadj=%d\n",
1368 s->s_ncold, s->s_coldtarget, coldadj);
1369 #endif /* defined(DEBUG) */
1370 }
1371 #endif /* defined(PDSIM) */
1372