uvm_pdpolicy_clockpro.c revision 1.1.2.1 1 /* $NetBSD: uvm_pdpolicy_clockpro.c,v 1.1.2.1 2006/03/06 12:53:44 yamt Exp $ */
2
3 /*-
4 * Copyright (c)2005, 2006 YAMAMOTO Takashi,
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29 /*
30 * CLOCK-Pro replacement policy:
31 * http://www.cs.wm.edu/hpcs/WWW/HTML/publications/abs05-3.html
32 *
33 * approximation of the list of non-resident pages using hash:
34 * http://linux-mm.org/ClockProApproximation
35 */
36
37 /* #define CLOCKPRO_DEBUG */
38
39 #if defined(PDSIM)
40
41 #include "pdsim.h"
42
43 #else /* defined(PDSIM) */
44
45 #include <sys/cdefs.h>
46 __KERNEL_RCSID(0, "$NetBSD: uvm_pdpolicy_clockpro.c,v 1.1.2.1 2006/03/06 12:53:44 yamt Exp $");
47
48 #include "opt_ddb.h"
49
50 #include <sys/param.h>
51 #include <sys/proc.h>
52 #include <sys/systm.h>
53 #include <sys/kernel.h>
54 #include <sys/vnode.h>
55 #include <sys/hash.h>
56
57 #include <uvm/uvm.h>
58 #include <uvm/uvm_pdpolicy_impl.h>
59
60 #if ((__STDC_VERSION__ - 0) >= 199901L)
61 #define DPRINTF(...) /* nothing */
62 #define WARN(...) printf(__VA_ARGS__)
63 #else /* ((__STDC_VERSION__ - 0) >= 199901L) */
64 #define DPRINTF(a...) /* nothing */ /* GCC */
65 #define WARN(a...) printf(a)
66 #endif /* ((__STDC_VERSION__ - 0) >= 199901L) */
67
68 #define dump(a) /* nothing */
69
70 #undef USEONCE2
71 #define LISTQ
72 #undef ADAPTIVE
73
74 #endif /* defined(PDSIM) */
75
76 #define NEWQ
77
78 #if !defined(CLOCKPRO_COLDPCT)
79 #define CLOCKPRO_COLDPCT 10
80 #endif /* !defined(CLOCKPRO_COLDPCT) */
81
82 #define CLOCKPRO_COLDPCTMAX 90
83
84 #if !defined(CLOCKPRO_HASHFACTOR)
85 #define CLOCKPRO_HASHFACTOR 2
86 #endif /* !defined(CLOCKPRO_HASHFACTOR) */
87
88 #define CLOCKPRO_NEWQMIN ((1024 * 1024) >> PAGE_SHIFT) /* XXX */
89
90 int clockpro_hashfactor = CLOCKPRO_HASHFACTOR;
91
92 PDPOL_EVCNT_DEFINE(nresrecord)
93 PDPOL_EVCNT_DEFINE(nreslookup)
94 PDPOL_EVCNT_DEFINE(nresfoundobj)
95 PDPOL_EVCNT_DEFINE(nresfoundanon)
96 PDPOL_EVCNT_DEFINE(nresanonfree)
97 PDPOL_EVCNT_DEFINE(nresconflict)
98 PDPOL_EVCNT_DEFINE(nresoverwritten)
99 PDPOL_EVCNT_DEFINE(nreshandhot)
100
101 PDPOL_EVCNT_DEFINE(hhottakeover)
102 PDPOL_EVCNT_DEFINE(hhotref)
103 PDPOL_EVCNT_DEFINE(hhotunref)
104 PDPOL_EVCNT_DEFINE(hhotcold)
105 PDPOL_EVCNT_DEFINE(hhotcoldtest)
106
107 PDPOL_EVCNT_DEFINE(hcoldtakeover)
108 PDPOL_EVCNT_DEFINE(hcoldref)
109 PDPOL_EVCNT_DEFINE(hcoldunref)
110 PDPOL_EVCNT_DEFINE(hcoldreftest)
111 PDPOL_EVCNT_DEFINE(hcoldunreftest)
112 PDPOL_EVCNT_DEFINE(hcoldunreftestspeculative);
113
114 PDPOL_EVCNT_DEFINE(speculativeenqueue);
115 PDPOL_EVCNT_DEFINE(speculativehit1);
116 PDPOL_EVCNT_DEFINE(speculativehit2);
117 PDPOL_EVCNT_DEFINE(speculativemiss);
118
119 #define PQ_REFERENCED PQ_PRIVATE1
120 #define PQ_HOT PQ_PRIVATE2
121 #define PQ_TEST PQ_PRIVATE3
122 #define PQ_INITIALREF PQ_PRIVATE4
123 #if PQ_PRIVATE6 != PQ_PRIVATE5 * 2 || PQ_PRIVATE7 != PQ_PRIVATE6 * 2
124 #error PQ_PRIVATE
125 #endif
126 #define PQ_QMASK (PQ_PRIVATE5|PQ_PRIVATE6|PQ_PRIVATE7)
127 #define PQ_QFACTOR PQ_PRIVATE5
128 #define PQ_SPECULATIVE PQ_PRIVATE8
129
130 #define CLOCKPRO_NOQUEUE 0
131 #define CLOCKPRO_NEWQ 1
132 #if defined(LISTQ)
133 #define CLOCKPRO_COLDQ 2
134 #define CLOCKPRO_HOTQ 3
135 #else /* defined(LISTQ) */
136 #define CLOCKPRO_COLDQ (2 + coldqidx) /* XXX */
137 #define CLOCKPRO_HOTQ (3 - coldqidx) /* XXX */
138 #endif /* defined(LISTQ) */
139 #define CLOCKPRO_LISTQ 4
140 #define CLOCKPRO_NQUEUE 4
141
142 static inline void
143 clockpro_setq(struct vm_page *pg, int qidx)
144 {
145 KASSERT(qidx >= CLOCKPRO_NOQUEUE);
146 KASSERT(qidx <= CLOCKPRO_NQUEUE);
147
148 pg->pqflags = (pg->pqflags & ~PQ_QMASK) | (qidx * PQ_QFACTOR);
149 }
150
151 static inline int
152 clockpro_getq(struct vm_page *pg)
153 {
154 int qidx;
155
156 qidx = (pg->pqflags & PQ_QMASK) / PQ_QFACTOR;
157 KASSERT(qidx >= CLOCKPRO_NOQUEUE);
158 KASSERT(qidx <= CLOCKPRO_NQUEUE);
159 return qidx;
160 }
161
162 typedef struct {
163 struct pglist q_q;
164 int q_len;
165 } pageq_t;
166
167 struct clockpro_state {
168 int s_npages;
169 int s_coldtarget;
170 int s_ncold;
171
172 int s_newqlenmax;
173 pageq_t s_q[CLOCKPRO_NQUEUE];
174 };
175
176 static pageq_t *
177 clockpro_queue(struct clockpro_state *s, int qidx)
178 {
179
180 KASSERT(CLOCKPRO_NOQUEUE < qidx);
181 KASSERT(qidx <= CLOCKPRO_NQUEUE);
182
183 return &s->s_q[qidx - 1];
184 }
185
186 #if !defined(LISTQ)
187
188 static int coldqidx;
189
190 static void
191 clockpro_switchqueue(void)
192 {
193
194 coldqidx = 1 - coldqidx;
195 }
196
197 #endif /* !defined(LISTQ) */
198
199 static struct clockpro_state clockpro;
200 static struct clockpro_scanstate {
201 int ss_nscanned;
202 } scanstate;
203
204 /* ---------------------------------------- */
205
206 static void
207 pageq_init(pageq_t *q)
208 {
209
210 TAILQ_INIT(&q->q_q);
211 q->q_len = 0;
212 }
213
214 static int
215 pageq_len(const pageq_t *q)
216 {
217
218 return q->q_len;
219 }
220
221 static struct vm_page *
222 pageq_first(const pageq_t *q)
223 {
224
225 return TAILQ_FIRST(&q->q_q);
226 }
227
228 static void
229 pageq_insert_tail(pageq_t *q, struct vm_page *pg)
230 {
231
232 TAILQ_INSERT_TAIL(&q->q_q, pg, pageq);
233 q->q_len++;
234 }
235
236 static void
237 pageq_insert_head(pageq_t *q, struct vm_page *pg)
238 {
239
240 TAILQ_INSERT_HEAD(&q->q_q, pg, pageq);
241 q->q_len++;
242 }
243
244 static void
245 pageq_remove(pageq_t *q, struct vm_page *pg)
246 {
247
248 #if 1
249 KASSERT(clockpro_queue(&clockpro, clockpro_getq(pg)) == q);
250 #endif
251 KASSERT(q->q_len > 0);
252 TAILQ_REMOVE(&q->q_q, pg, pageq);
253 q->q_len--;
254 }
255
256 static struct vm_page *
257 pageq_remove_head(pageq_t *q)
258 {
259 struct vm_page *pg;
260
261 pg = TAILQ_FIRST(&q->q_q);
262 if (pg == NULL) {
263 KASSERT(q->q_len == 0);
264 return NULL;
265 }
266 pageq_remove(q, pg);
267 return pg;
268 }
269
270 /* ---------------------------------------- */
271
272 static void
273 clockpro_insert_tail(struct clockpro_state *s, int qidx, struct vm_page *pg)
274 {
275 pageq_t *q = clockpro_queue(s, qidx);
276
277 clockpro_setq(pg, qidx);
278 pageq_insert_tail(q, pg);
279 }
280
281 static void
282 clockpro_insert_head(struct clockpro_state *s, int qidx, struct vm_page *pg)
283 {
284 pageq_t *q = clockpro_queue(s, qidx);
285
286 clockpro_setq(pg, qidx);
287 pageq_insert_head(q, pg);
288 }
289
290 /* ---------------------------------------- */
291
292 typedef uint32_t nonres_cookie_t;
293 #define NONRES_COOKIE_INVAL 0
294
295 typedef uintptr_t objid_t;
296
297 /*
298 * XXX maybe these hash functions need reconsideration,
299 * given that hash distribution is critical here.
300 */
301
302 static uint32_t
303 pageidentityhash1(objid_t obj, off_t idx)
304 {
305 uint32_t hash = HASH32_BUF_INIT;
306
307 #if 1
308 hash = hash32_buf(&idx, sizeof(idx), hash);
309 hash = hash32_buf(&obj, sizeof(obj), hash);
310 #else
311 hash = hash32_buf(&obj, sizeof(obj), hash);
312 hash = hash32_buf(&idx, sizeof(idx), hash);
313 #endif
314 return hash;
315 }
316
317 static uint32_t
318 pageidentityhash2(objid_t obj, off_t idx)
319 {
320 uint32_t hash = HASH32_BUF_INIT;
321
322 hash = hash32_buf(&obj, sizeof(obj), hash);
323 hash = hash32_buf(&idx, sizeof(idx), hash);
324 return hash;
325 }
326
327 static nonres_cookie_t
328 calccookie(objid_t obj, off_t idx)
329 {
330 uint32_t hash = pageidentityhash2(obj, idx);
331 nonres_cookie_t cookie = hash;
332
333 if (__predict_false(cookie == NONRES_COOKIE_INVAL)) {
334 cookie++; /* XXX */
335 }
336 return cookie;
337 }
338
339 #define BUCKETSIZE 16
340 struct bucket {
341 int cycle;
342 int cur;
343 nonres_cookie_t pages[BUCKETSIZE];
344 };
345 static int cycle_target;
346 static int cycle_target_frac;
347
348 static struct bucket *buckets;
349 static size_t hashsize;
350
351 static int coldadj;
352 #define COLDTARGET_ADJ(d) coldadj += (d)
353
354 #if defined(PDSIM)
355
356 static void *
357 clockpro_hashalloc(int n)
358 {
359 size_t allocsz = sizeof(*buckets) * n;
360
361 return malloc(allocsz);
362 }
363
364 static void
365 clockpro_hashfree(void *p, int n)
366 {
367
368 free(p);
369 }
370
371 #else /* defined(PDSIM) */
372
373 static void *
374 clockpro_hashalloc(int n)
375 {
376 size_t allocsz = round_page(sizeof(*buckets) * n);
377
378 return (void *)uvm_km_alloc(kernel_map, allocsz, 0, UVM_KMF_WIRED);
379 }
380
381 static void
382 clockpro_hashfree(void *p, int n)
383 {
384 size_t allocsz = round_page(sizeof(*buckets) * n);
385
386 uvm_km_free(kernel_map, (vaddr_t)p, allocsz, UVM_KMF_WIRED);
387 }
388
389 #endif /* defined(PDSIM) */
390
391 static void
392 clockpro_hashinit(uint64_t n)
393 {
394 struct bucket *newbuckets;
395 struct bucket *oldbuckets;
396 size_t sz;
397 size_t oldsz;
398 int i;
399
400 sz = howmany(n, BUCKETSIZE);
401 sz *= clockpro_hashfactor;
402 newbuckets = clockpro_hashalloc(sz);
403 if (newbuckets == NULL) {
404 panic("%s: allocation failure", __func__);
405 }
406 for (i = 0; i < sz; i++) {
407 struct bucket *b = &newbuckets[i];
408 int j;
409
410 b->cycle = cycle_target;
411 b->cur = 0;
412 for (j = 0; j < BUCKETSIZE; j++) {
413 b->pages[j] = NONRES_COOKIE_INVAL;
414 }
415 }
416 /* XXX lock */
417 oldbuckets = buckets;
418 oldsz = hashsize;
419 buckets = newbuckets;
420 hashsize = sz;
421 /* XXX unlock */
422 if (oldbuckets) {
423 clockpro_hashfree(oldbuckets, oldsz);
424 }
425 }
426
427 static struct bucket *
428 nonresident_getbucket(objid_t obj, off_t idx)
429 {
430 uint32_t hash;
431 static struct bucket static_bucket;
432
433 if (hashsize == 0) {
434 return &static_bucket;
435 }
436
437 hash = pageidentityhash1(obj, idx);
438 return &buckets[hash % hashsize];
439 }
440
441 static void
442 nonresident_rotate(struct bucket *b)
443 {
444
445 while (b->cycle - cycle_target < 0) {
446 if (b->pages[b->cur] != NONRES_COOKIE_INVAL) {
447 PDPOL_EVCNT_INCR(nreshandhot);
448 COLDTARGET_ADJ(-1);
449 }
450 b->pages[b->cur] = NONRES_COOKIE_INVAL;
451 b->cur = (b->cur + 1) % BUCKETSIZE;
452 b->cycle++;
453 }
454 }
455
456 static boolean_t
457 nonresident_lookupremove(objid_t obj, off_t idx)
458 {
459 struct bucket *b = nonresident_getbucket(obj, idx);
460 nonres_cookie_t cookie = calccookie(obj, idx);
461 int i;
462
463 nonresident_rotate(b);
464 for (i = 0; i < BUCKETSIZE; i++) {
465 if (b->pages[i] == cookie) {
466 b->pages[i] = NONRES_COOKIE_INVAL;
467 return TRUE;
468 }
469 }
470 return FALSE;
471 }
472
473 static objid_t
474 pageobj(struct vm_page *pg)
475 {
476 const void *obj;
477
478 /*
479 * XXX object pointer is often freed and reused for unrelated object.
480 * for vnodes, it would be better to use something like
481 * a hash of fsid/fileid/generation.
482 */
483
484 obj = pg->uobject;
485 if (obj == NULL) {
486 obj = pg->uanon;
487 KASSERT(obj != NULL);
488 KASSERT(pg->offset == 0);
489 }
490
491 return (objid_t)obj;
492 }
493
494 static off_t
495 pageidx(struct vm_page *pg)
496 {
497
498 KASSERT((pg->offset & PAGE_MASK) == 0);
499 return pg->offset >> PAGE_SHIFT;
500 }
501
502 static boolean_t
503 nonresident_pagelookupremove(struct vm_page *pg)
504 {
505 boolean_t found = nonresident_lookupremove(pageobj(pg), pageidx(pg));
506
507 PDPOL_EVCNT_INCR(nreslookup);
508 if (found) {
509 if (pg->uobject) {
510 PDPOL_EVCNT_INCR(nresfoundobj);
511 } else {
512 PDPOL_EVCNT_INCR(nresfoundanon);
513 }
514 }
515 return found;
516 }
517
518 static void
519 nonresident_pagerecord(struct vm_page *pg)
520 {
521 objid_t obj = pageobj(pg);
522 off_t idx = pageidx(pg);
523 struct bucket *b = nonresident_getbucket(obj, idx);
524 nonres_cookie_t cookie = calccookie(obj, idx);
525
526 PDPOL_EVCNT_INCR(nresrecord);
527
528 #if defined(DEBUG)
529 int i;
530
531 for (i = 0; i < BUCKETSIZE; i++) {
532 if (b->pages[i] == cookie) {
533 PDPOL_EVCNT_INCR(nresconflict);
534 }
535 }
536 #endif /* defined(DEBUG) */
537 nonresident_rotate(b);
538 if (b->pages[b->cur] != NONRES_COOKIE_INVAL) {
539 PDPOL_EVCNT_INCR(nresoverwritten);
540 COLDTARGET_ADJ(-1);
541 }
542 b->pages[b->cur] = cookie;
543 b->cur = (b->cur + 1) % BUCKETSIZE;
544 }
545
546 /* ---------------------------------------- */
547
548 #if defined(CLOCKPRO_DEBUG)
549 static void
550 check_sanity(void)
551 {
552 }
553 #else /* defined(CLOCKPRO_DEBUG) */
554 #define check_sanity() /* nothing */
555 #endif /* defined(CLOCKPRO_DEBUG) */
556
557 static void
558 clockpro_reinit(void)
559 {
560
561 clockpro_hashinit(uvmexp.npages);
562 }
563
564 static void
565 clockpro_init(void)
566 {
567 struct clockpro_state *s = &clockpro;
568 int i;
569
570 for (i = 0; i < CLOCKPRO_NQUEUE; i++) {
571 pageq_init(&s->s_q[i]);
572 }
573 s->s_newqlenmax = 1;
574 s->s_coldtarget = 1;
575 }
576
577 static void
578 clockpro_tune(void)
579 {
580 struct clockpro_state *s = &clockpro;
581 int coldtarget;
582
583 #if defined(ADAPTIVE)
584 int coldmax = s->s_npages * CLOCKPRO_COLDPCTMAX / 100;
585 int coldmin = 1;
586
587 coldtarget = s->s_coldtarget;
588 if (coldtarget + coldadj < coldmin) {
589 coldadj = coldmin - coldtarget;
590 } else if (coldtarget + coldadj > coldmax) {
591 coldadj = coldmax - coldtarget;
592 }
593 coldtarget += coldadj;
594 #else /* defined(ADAPTIVE) */
595 coldtarget = s->s_npages * CLOCKPRO_COLDPCT / 100;
596 if (coldtarget < 1) {
597 coldtarget = 1;
598 }
599 #endif /* defined(ADAPTIVE) */
600
601 s->s_coldtarget = coldtarget;
602 s->s_newqlenmax = coldtarget / 4;
603 if (s->s_newqlenmax < CLOCKPRO_NEWQMIN) {
604 s->s_newqlenmax = CLOCKPRO_NEWQMIN;
605 }
606 }
607
608 static void
609 clockpro_movereferencebit(struct vm_page *pg)
610 {
611 boolean_t referenced;
612
613 referenced = pmap_clear_reference(pg);
614 if (referenced) {
615 pg->pqflags |= PQ_REFERENCED;
616 }
617 }
618
619 static void
620 clockpro_clearreferencebit(struct vm_page *pg)
621 {
622
623 clockpro_movereferencebit(pg);
624 pg->pqflags &= ~PQ_REFERENCED;
625 }
626
627 static void
628 clockpro___newqrotate(int len)
629 {
630 struct clockpro_state * const s = &clockpro;
631 pageq_t * const newq = clockpro_queue(s, CLOCKPRO_NEWQ);
632 struct vm_page *pg;
633
634 while (pageq_len(newq) > len) {
635 pg = pageq_remove_head(newq);
636 KASSERT(pg != NULL);
637 KASSERT(clockpro_getq(pg) == CLOCKPRO_NEWQ);
638 if ((pg->pqflags & PQ_INITIALREF) != 0) {
639 clockpro_clearreferencebit(pg);
640 pg->pqflags &= ~PQ_INITIALREF;
641 }
642 /* place at the list head */
643 clockpro_insert_tail(s, CLOCKPRO_COLDQ, pg);
644 }
645 }
646
647 static void
648 clockpro_newqrotate(void)
649 {
650 struct clockpro_state * const s = &clockpro;
651
652 check_sanity();
653 clockpro___newqrotate(s->s_newqlenmax);
654 check_sanity();
655 }
656
657 static void
658 clockpro_newqflush(int n)
659 {
660
661 check_sanity();
662 clockpro___newqrotate(n);
663 check_sanity();
664 }
665
666 static void
667 clockpro_newqflushone(void)
668 {
669 struct clockpro_state * const s = &clockpro;
670
671 clockpro_newqflush(
672 MAX(pageq_len(clockpro_queue(s, CLOCKPRO_NEWQ)) - 1, 0));
673 }
674
675 /*
676 * our "tail" is called "list-head" in the paper.
677 */
678
679 static void
680 clockpro___enqueuetail(struct vm_page *pg)
681 {
682 struct clockpro_state * const s = &clockpro;
683
684 KASSERT(clockpro_getq(pg) == CLOCKPRO_NOQUEUE);
685
686 check_sanity();
687 #if defined(NEWQ) && !defined(USEONCE2)
688 clockpro_insert_tail(s, CLOCKPRO_NEWQ, pg);
689 clockpro_newqrotate();
690 #else /* defined(NEWQ) && !defined(USEONCE2) */
691 #if defined(LISTQ)
692 KASSERT((pg->pqflags & PQ_REFERENCED) == 0);
693 #endif /* defined(LISTQ) */
694 clockpro_insert_tail(s, CLOCKPRO_COLDQ, pg);
695 #endif /* defined(NEWQ) && !defined(USEONCE2) */
696 check_sanity();
697 }
698
699 static void
700 clockpro_pageenqueue(struct vm_page *pg)
701 {
702 struct clockpro_state * const s = &clockpro;
703 boolean_t hot;
704 boolean_t speculative = (pg->pqflags & PQ_SPECULATIVE) != 0; /* XXX */
705
706 KASSERT((~pg->pqflags & (PQ_INITIALREF|PQ_SPECULATIVE)) != 0);
707 UVM_LOCK_ASSERT_PAGEQ();
708 check_sanity();
709 KASSERT(clockpro_getq(pg) == CLOCKPRO_NOQUEUE);
710 s->s_npages++;
711 pg->pqflags &= ~(PQ_HOT|PQ_TEST);
712 if (speculative) {
713 hot = FALSE;
714 PDPOL_EVCNT_INCR(speculativeenqueue);
715 } else {
716 hot = nonresident_pagelookupremove(pg);
717 if (hot) {
718 COLDTARGET_ADJ(1);
719 }
720 }
721
722 /*
723 * consider mmap'ed file:
724 *
725 * - read-ahead enqueues a page.
726 *
727 * - on the following read-ahead hit, the fault handler activates it.
728 *
729 * - finally, the userland code which caused the above fault
730 * actually accesses the page. it makes its reference bit set.
731 *
732 * we want to count the above as a single access, rather than
733 * three accesses with short reuse distances.
734 */
735
736 #if defined(USEONCE2)
737 pg->pqflags &= ~PQ_INITIALREF;
738 if (hot) {
739 pg->pqflags |= PQ_TEST;
740 }
741 s->s_ncold++;
742 clockpro_clearreferencebit(pg);
743 clockpro___enqueuetail(pg);
744 #else /* defined(USEONCE2) */
745 if (speculative) {
746 s->s_ncold++;
747 } else if (hot) {
748 pg->pqflags |= PQ_HOT;
749 } else {
750 pg->pqflags |= PQ_TEST;
751 s->s_ncold++;
752 }
753 clockpro___enqueuetail(pg);
754 #endif /* defined(USEONCE2) */
755 KASSERT(s->s_ncold <= s->s_npages);
756 }
757
758 static pageq_t *
759 clockpro_pagequeue(struct vm_page *pg)
760 {
761 struct clockpro_state * const s = &clockpro;
762 int qidx;
763
764 qidx = clockpro_getq(pg);
765 KASSERT(qidx != CLOCKPRO_NOQUEUE);
766
767 return clockpro_queue(s, qidx);
768 }
769
770 static void
771 clockpro_pagedequeue(struct vm_page *pg)
772 {
773 struct clockpro_state * const s = &clockpro;
774 pageq_t *q;
775
776 KASSERT(s->s_npages > 0);
777 check_sanity();
778 q = clockpro_pagequeue(pg);
779 pageq_remove(q, pg);
780 check_sanity();
781 clockpro_setq(pg, CLOCKPRO_NOQUEUE);
782 if ((pg->pqflags & PQ_HOT) == 0) {
783 KASSERT(s->s_ncold > 0);
784 s->s_ncold--;
785 }
786 KASSERT(s->s_npages > 0);
787 s->s_npages--;
788 check_sanity();
789 }
790
791 static void
792 clockpro_pagerequeue(struct vm_page *pg)
793 {
794 struct clockpro_state * const s = &clockpro;
795 int qidx;
796
797 qidx = clockpro_getq(pg);
798 KASSERT(qidx == CLOCKPRO_HOTQ || qidx == CLOCKPRO_COLDQ);
799 pageq_remove(clockpro_queue(s, qidx), pg);
800 check_sanity();
801 clockpro_setq(pg, CLOCKPRO_NOQUEUE);
802
803 clockpro___enqueuetail(pg);
804 }
805
806 static void
807 handhot_endtest(struct vm_page *pg)
808 {
809
810 KASSERT((pg->pqflags & PQ_HOT) == 0);
811 if ((pg->pqflags & PQ_TEST) != 0) {
812 PDPOL_EVCNT_INCR(hhotcoldtest);
813 COLDTARGET_ADJ(-1);
814 pg->pqflags &= ~PQ_TEST;
815 } else {
816 PDPOL_EVCNT_INCR(hhotcold);
817 }
818 }
819
820 static void
821 handhot_advance(void)
822 {
823 struct clockpro_state * const s = &clockpro;
824 struct vm_page *pg;
825 pageq_t *hotq;
826 int hotqlen;
827
828 clockpro_tune();
829
830 dump("hot called");
831 if (s->s_ncold >= s->s_coldtarget) {
832 return;
833 }
834 hotq = clockpro_queue(s, CLOCKPRO_HOTQ);
835 again:
836 pg = pageq_first(hotq);
837 if (pg == NULL) {
838 DPRINTF("%s: HHOT TAKEOVER\n", __func__);
839 dump("hhottakeover");
840 PDPOL_EVCNT_INCR(hhottakeover);
841 #if defined(LISTQ)
842 while (/* CONSTCOND */ 1) {
843 pageq_t *coldq = clockpro_queue(s, CLOCKPRO_COLDQ);
844
845 pg = pageq_first(coldq);
846 if (pg == NULL) {
847 clockpro_newqflushone();
848 pg = pageq_first(coldq);
849 if (pg == NULL) {
850 WARN("hhot: no page?\n");
851 return;
852 }
853 }
854 KASSERT(clockpro_pagequeue(pg) == coldq);
855 pageq_remove(coldq, pg);
856 check_sanity();
857 if ((pg->pqflags & PQ_HOT) == 0) {
858 handhot_endtest(pg);
859 clockpro_insert_tail(s, CLOCKPRO_LISTQ, pg);
860 } else {
861 clockpro_insert_head(s, CLOCKPRO_HOTQ, pg);
862 break;
863 }
864 }
865 #else /* defined(LISTQ) */
866 clockpro_newqflush(0); /* XXX XXX */
867 clockpro_switchqueue();
868 hotq = clockpro_queue(s, CLOCKPRO_HOTQ);
869 goto again;
870 #endif /* defined(LISTQ) */
871 }
872
873 KASSERT(clockpro_pagequeue(pg) == hotq);
874
875 cycle_target_frac += BUCKETSIZE;
876 hotqlen = pageq_len(hotq);
877 while (cycle_target_frac >= hotqlen) {
878 cycle_target++;
879 cycle_target_frac -= hotqlen;
880 }
881
882 if ((pg->pqflags & PQ_HOT) == 0) {
883 #if defined(LISTQ)
884 panic("cold page in hotq: %p", pg);
885 #else /* defined(LISTQ) */
886 handhot_endtest(pg);
887 goto next;
888 #endif /* defined(LISTQ) */
889 }
890 KASSERT((pg->pqflags & PQ_TEST) == 0);
891 KASSERT((pg->pqflags & PQ_INITIALREF) == 0);
892 KASSERT((pg->pqflags & PQ_SPECULATIVE) == 0);
893 if (s->s_ncold >= s->s_coldtarget) {
894 dump("hot done");
895 return;
896 }
897 clockpro_movereferencebit(pg);
898 if ((pg->pqflags & PQ_REFERENCED) == 0) {
899 PDPOL_EVCNT_INCR(hhotunref);
900 uvmexp.pddeact++;
901 pg->pqflags &= ~PQ_HOT;
902 clockpro.s_ncold++;
903 KASSERT(s->s_ncold <= s->s_npages);
904 } else {
905 PDPOL_EVCNT_INCR(hhotref);
906 }
907 pg->pqflags &= ~PQ_REFERENCED;
908 #if !defined(LISTQ)
909 next:
910 #endif /* !defined(LISTQ) */
911 clockpro_pagerequeue(pg);
912 dump("hot");
913 goto again;
914 }
915
916 static struct vm_page *
917 handcold_advance(void)
918 {
919 struct clockpro_state * const s = &clockpro;
920 struct vm_page *pg;
921
922 for (;;) {
923 pageq_t *listq = clockpro_queue(s, CLOCKPRO_LISTQ);
924 pageq_t *coldq;
925
926 clockpro_newqrotate();
927 handhot_advance();
928 #if defined(LISTQ)
929 pg = pageq_first(listq);
930 if (pg != NULL) {
931 KASSERT(clockpro_getq(pg) == CLOCKPRO_LISTQ);
932 KASSERT((pg->pqflags & PQ_TEST) == 0);
933 KASSERT((pg->pqflags & PQ_HOT) == 0);
934 KASSERT((pg->pqflags & PQ_INITIALREF) == 0);
935 pageq_remove(listq, pg);
936 check_sanity();
937 clockpro_insert_head(s, CLOCKPRO_COLDQ, pg); /* XXX */
938 goto gotcold;
939 }
940 #endif /* defined(LISTQ) */
941 check_sanity();
942 coldq = clockpro_queue(s, CLOCKPRO_COLDQ);
943 pg = pageq_first(coldq);
944 if (pg == NULL) {
945 clockpro_newqflushone();
946 pg = pageq_first(coldq);
947 }
948 if (pg == NULL) {
949 DPRINTF("%s: HCOLD TAKEOVER\n", __func__);
950 dump("hcoldtakeover");
951 PDPOL_EVCNT_INCR(hcoldtakeover);
952 KASSERT(
953 pageq_len(clockpro_queue(s, CLOCKPRO_NEWQ)) == 0);
954 #if defined(LISTQ)
955 KASSERT(
956 pageq_len(clockpro_queue(s, CLOCKPRO_HOTQ)) == 0);
957 #else /* defined(LISTQ) */
958 clockpro_switchqueue();
959 coldq = clockpro_queue(s, CLOCKPRO_COLDQ);
960 pg = pageq_first(coldq);
961 #endif /* defined(LISTQ) */
962 }
963 if (pg == NULL) {
964 WARN("hcold: no page?\n");
965 return NULL;
966 }
967 KASSERT((pg->pqflags & PQ_INITIALREF) == 0);
968 if ((pg->pqflags & PQ_HOT) != 0) {
969 pageq_remove(coldq, pg);
970 clockpro_insert_tail(s, CLOCKPRO_HOTQ, pg);
971 check_sanity();
972 KASSERT((pg->pqflags & PQ_TEST) == 0);
973 uvmexp.pdscans++;
974 continue;
975 }
976 #if defined(LISTQ)
977 gotcold:
978 #endif /* defined(LISTQ) */
979 KASSERT((pg->pqflags & PQ_HOT) == 0);
980 uvmexp.pdscans++;
981 clockpro_movereferencebit(pg);
982 if ((pg->pqflags & PQ_SPECULATIVE) != 0) {
983 KASSERT((pg->pqflags & PQ_TEST) == 0);
984 if ((pg->pqflags & PQ_REFERENCED) != 0) {
985 PDPOL_EVCNT_INCR(speculativehit2);
986 pg->pqflags &= ~(PQ_SPECULATIVE|PQ_REFERENCED);
987 clockpro_pagedequeue(pg);
988 clockpro_pageenqueue(pg);
989 continue;
990 }
991 PDPOL_EVCNT_INCR(speculativemiss);
992 }
993 switch (pg->pqflags & (PQ_REFERENCED|PQ_TEST)) {
994 case PQ_TEST:
995 PDPOL_EVCNT_INCR(hcoldunreftest);
996 nonresident_pagerecord(pg);
997 goto gotit;
998 case 0:
999 PDPOL_EVCNT_INCR(hcoldunref);
1000 gotit:
1001 KASSERT(s->s_ncold > 0);
1002 clockpro_pagerequeue(pg); /* XXX */
1003 dump("cold done");
1004 /* XXX "pg" is still in queue */
1005 handhot_advance();
1006 goto done;
1007
1008 case PQ_REFERENCED|PQ_TEST:
1009 PDPOL_EVCNT_INCR(hcoldreftest);
1010 s->s_ncold--;
1011 COLDTARGET_ADJ(1);
1012 pg->pqflags |= PQ_HOT;
1013 pg->pqflags &= ~PQ_TEST;
1014 break;
1015
1016 case PQ_REFERENCED:
1017 PDPOL_EVCNT_INCR(hcoldref);
1018 pg->pqflags |= PQ_TEST;
1019 break;
1020 }
1021 pg->pqflags &= ~PQ_REFERENCED;
1022 uvmexp.pdreact++;
1023 /* move to the list head */
1024 clockpro_pagerequeue(pg);
1025 dump("cold");
1026 }
1027 done:;
1028 return pg;
1029 }
1030
1031 void
1032 uvmpdpol_pageactivate(struct vm_page *pg)
1033 {
1034
1035 if (!uvmpdpol_pageisqueued_p(pg)) {
1036 KASSERT((pg->pqflags & PQ_SPECULATIVE) == 0);
1037 pg->pqflags |= PQ_INITIALREF;
1038 clockpro_pageenqueue(pg);
1039 } else if ((pg->pqflags & PQ_SPECULATIVE)) {
1040 PDPOL_EVCNT_INCR(speculativehit1);
1041 pg->pqflags &= ~PQ_SPECULATIVE;
1042 pg->pqflags |= PQ_INITIALREF;
1043 clockpro_pagedequeue(pg);
1044 clockpro_pageenqueue(pg);
1045 }
1046 pg->pqflags |= PQ_REFERENCED;
1047 }
1048
1049 void
1050 uvmpdpol_pagedeactivate(struct vm_page *pg)
1051 {
1052
1053 pg->pqflags &= ~PQ_REFERENCED;
1054 }
1055
1056 void
1057 uvmpdpol_pagedequeue(struct vm_page *pg)
1058 {
1059
1060 if (!uvmpdpol_pageisqueued_p(pg)) {
1061 return;
1062 }
1063 clockpro_pagedequeue(pg);
1064 pg->pqflags &= ~PQ_SPECULATIVE;
1065 }
1066
1067 void
1068 uvmpdpol_pageenqueue(struct vm_page *pg)
1069 {
1070
1071 #if 1
1072 if (uvmpdpol_pageisqueued_p(pg)) {
1073 return;
1074 }
1075 clockpro_clearreferencebit(pg);
1076 pg->pqflags |= PQ_SPECULATIVE;
1077 clockpro_pageenqueue(pg);
1078 #else
1079 uvmpdpol_pageactivate(pg);
1080 #endif
1081 }
1082
1083 void
1084 uvmpdpol_anfree(struct vm_anon *an)
1085 {
1086
1087 KASSERT(an->an_page == NULL);
1088 if (nonresident_lookupremove((objid_t)an, 0)) {
1089 PDPOL_EVCNT_INCR(nresanonfree);
1090 }
1091 }
1092
1093 void
1094 uvmpdpol_init(void)
1095 {
1096
1097 clockpro_init();
1098 }
1099
1100 void
1101 uvmpdpol_reinit(void)
1102 {
1103
1104 clockpro_reinit();
1105 }
1106
1107 void
1108 uvmpdpol_estimatepageable(int *active, int *inactive)
1109 {
1110 struct clockpro_state * const s = &clockpro;
1111
1112 if (active) {
1113 *active = s->s_npages - s->s_ncold;
1114 }
1115 if (inactive) {
1116 *inactive = s->s_ncold;
1117 }
1118 }
1119
1120 boolean_t
1121 uvmpdpol_pageisqueued_p(struct vm_page *pg)
1122 {
1123
1124 return clockpro_getq(pg) != CLOCKPRO_NOQUEUE;
1125 }
1126
1127 void
1128 uvmpdpol_scaninit(void)
1129 {
1130 struct clockpro_scanstate * const ss = &scanstate;
1131
1132 ss->ss_nscanned = 0;
1133 }
1134
1135 struct vm_page *
1136 uvmpdpol_selectvictim(void)
1137 {
1138 struct clockpro_state * const s = &clockpro;
1139 struct clockpro_scanstate * const ss = &scanstate;
1140 struct vm_page *pg;
1141
1142 if (ss->ss_nscanned > s->s_npages) {
1143 DPRINTF("scan too much\n");
1144 return NULL;
1145 }
1146 pg = handcold_advance();
1147 ss->ss_nscanned++;
1148 return pg;
1149 }
1150
1151 static void
1152 clockpro_dropswap(pageq_t *q, int *todo)
1153 {
1154 struct vm_page *pg;
1155
1156 TAILQ_FOREACH_REVERSE(pg, &q->q_q, pglist, pageq) {
1157 if (*todo <= 0) {
1158 break;
1159 }
1160 if ((pg->pqflags & PQ_HOT) == 0) {
1161 continue;
1162 }
1163 if ((pg->pqflags & PQ_SWAPBACKED) == 0) {
1164 continue;
1165 }
1166 if (uvmpd_trydropswap(pg)) {
1167 (*todo)--;
1168 }
1169 }
1170 }
1171
1172 void
1173 uvmpdpol_balancequeue(int swap_shortage)
1174 {
1175 struct clockpro_state * const s = &clockpro;
1176 int todo = swap_shortage;
1177
1178 if (todo == 0) {
1179 return;
1180 }
1181
1182 /*
1183 * reclaim swap slots from hot pages
1184 */
1185
1186 DPRINTF("%s: swap_shortage=%d\n", __func__, swap_shortage);
1187
1188 clockpro_dropswap(clockpro_queue(s, CLOCKPRO_NEWQ), &todo);
1189 clockpro_dropswap(clockpro_queue(s, CLOCKPRO_COLDQ), &todo);
1190 clockpro_dropswap(clockpro_queue(s, CLOCKPRO_HOTQ), &todo);
1191
1192 DPRINTF("%s: done=%d\n", __func__, swap_shortage - todo);
1193 }
1194
1195 boolean_t
1196 uvmpdpol_needsscan_p(void)
1197 {
1198 struct clockpro_state * const s = &clockpro;
1199
1200 if (s->s_ncold < s->s_coldtarget) {
1201 return TRUE;
1202 }
1203 return FALSE;
1204 }
1205
1206 void
1207 uvmpdpol_tune(void)
1208 {
1209
1210 clockpro_tune();
1211 }
1212
1213 void
1214 uvmpdpol_sysctlsetup(void)
1215 {
1216 }
1217
1218 #if defined(DDB)
1219
1220 void clockpro_dump(void);
1221
1222 void
1223 clockpro_dump(void)
1224 {
1225 struct clockpro_state * const s = &clockpro;
1226
1227 struct vm_page *pg;
1228 int ncold, nhot, ntest, nspeculative, ninitialref, nref;
1229 int newqlen, coldqlen, hotqlen, listqlen;
1230
1231 newqlen = coldqlen = hotqlen = listqlen = 0;
1232 printf("npages=%d, ncold=%d, coldtarget=%d, newqlenmax=%d\n",
1233 s->s_npages, s->s_ncold, s->s_coldtarget, s->s_newqlenmax);
1234
1235 #define INITCOUNT() \
1236 ncold = nhot = ntest = nspeculative = ninitialref = nref = 0
1237
1238 #define COUNT(pg) \
1239 if ((pg->pqflags & PQ_HOT) != 0) { \
1240 nhot++; \
1241 } else { \
1242 ncold++; \
1243 if ((pg->pqflags & PQ_TEST) != 0) { \
1244 ntest++; \
1245 } \
1246 if ((pg->pqflags & PQ_SPECULATIVE) != 0) { \
1247 nspeculative++; \
1248 } \
1249 if ((pg->pqflags & PQ_INITIALREF) != 0) { \
1250 ninitialref++; \
1251 } else if ((pg->pqflags & PQ_REFERENCED) != 0 || \
1252 pmap_is_referenced(pg)) { \
1253 nref++; \
1254 } \
1255 }
1256
1257 #define PRINTCOUNT(name) \
1258 printf("%s hot=%d, cold=%d, test=%d, speculative=%d, initialref=%d, " \
1259 "nref=%d\n", \
1260 (name), nhot, ncold, ntest, nspeculative, ninitialref, nref)
1261
1262 INITCOUNT();
1263 TAILQ_FOREACH(pg, &clockpro_queue(s, CLOCKPRO_NEWQ)->q_q, pageq) {
1264 if (clockpro_getq(pg) != CLOCKPRO_NEWQ) {
1265 printf("newq corrupt %p\n", pg);
1266 }
1267 COUNT(pg)
1268 newqlen++;
1269 }
1270 PRINTCOUNT("newq");
1271
1272 INITCOUNT();
1273 TAILQ_FOREACH(pg, &clockpro_queue(s, CLOCKPRO_COLDQ)->q_q, pageq) {
1274 if (clockpro_getq(pg) != CLOCKPRO_COLDQ) {
1275 printf("coldq corrupt %p\n", pg);
1276 }
1277 COUNT(pg)
1278 coldqlen++;
1279 }
1280 PRINTCOUNT("coldq");
1281
1282 INITCOUNT();
1283 TAILQ_FOREACH(pg, &clockpro_queue(s, CLOCKPRO_HOTQ)->q_q, pageq) {
1284 if (clockpro_getq(pg) != CLOCKPRO_HOTQ) {
1285 printf("hotq corrupt %p\n", pg);
1286 }
1287 #if defined(LISTQ)
1288 if ((pg->pqflags & PQ_HOT) == 0) {
1289 printf("cold page in hotq: %p\n", pg);
1290 }
1291 #endif /* defined(LISTQ) */
1292 COUNT(pg)
1293 hotqlen++;
1294 }
1295 PRINTCOUNT("hotq");
1296
1297 INITCOUNT();
1298 TAILQ_FOREACH(pg, &clockpro_queue(s, CLOCKPRO_LISTQ)->q_q, pageq) {
1299 #if !defined(LISTQ)
1300 printf("listq %p\n");
1301 #endif /* !defined(LISTQ) */
1302 if (clockpro_getq(pg) != CLOCKPRO_LISTQ) {
1303 printf("listq corrupt %p\n", pg);
1304 }
1305 COUNT(pg)
1306 listqlen++;
1307 }
1308 PRINTCOUNT("listq");
1309
1310 printf("newqlen=%d/%d, coldqlen=%d/%d, hotqlen=%d/%d, listqlen=%d/%d\n",
1311 newqlen, pageq_len(clockpro_queue(s, CLOCKPRO_NEWQ)),
1312 coldqlen, pageq_len(clockpro_queue(s, CLOCKPRO_COLDQ)),
1313 hotqlen, pageq_len(clockpro_queue(s, CLOCKPRO_HOTQ)),
1314 listqlen, pageq_len(clockpro_queue(s, CLOCKPRO_LISTQ)));
1315 }
1316
1317 #endif /* defined(DDB) */
1318
1319 #if defined(PDSIM)
1320 static void
1321 pdsim_dumpq(int qidx)
1322 {
1323 struct clockpro_state * const s = &clockpro;
1324 pageq_t *q = clockpro_queue(s, qidx);
1325 struct vm_page *pg;
1326
1327 TAILQ_FOREACH(pg, &q->q_q, pageq) {
1328 DPRINTF(" %" PRIu64 "%s%s%s%s%s%s",
1329 pg->offset >> PAGE_SHIFT,
1330 (pg->pqflags & PQ_HOT) ? "H" : "",
1331 (pg->pqflags & PQ_TEST) ? "T" : "",
1332 (pg->pqflags & PQ_REFERENCED) ? "R" : "",
1333 pmap_is_referenced(pg) ? "r" : "",
1334 (pg->pqflags & PQ_INITIALREF) ? "I" : "",
1335 (pg->pqflags & PQ_SPECULATIVE) ? "S" : ""
1336 );
1337 }
1338 }
1339
1340 void
1341 pdsim_dump(const char *id)
1342 {
1343 #if defined(DEBUG)
1344 struct clockpro_state * const s = &clockpro;
1345
1346 DPRINTF(" %s L(", id);
1347 pdsim_dumpq(CLOCKPRO_LISTQ);
1348 DPRINTF(" ) H(");
1349 pdsim_dumpq(CLOCKPRO_HOTQ);
1350 DPRINTF(" ) C(");
1351 pdsim_dumpq(CLOCKPRO_COLDQ);
1352 DPRINTF(" ) N(");
1353 pdsim_dumpq(CLOCKPRO_NEWQ);
1354 DPRINTF(" ) ncold=%d/%d, coldadj=%d\n",
1355 s->s_ncold, s->s_coldtarget, coldadj);
1356 #endif /* defined(DEBUG) */
1357 }
1358 #endif /* defined(PDSIM) */
1359