uvm_pdpolicy_clockpro.c revision 1.1.2.2 1 /* $NetBSD: uvm_pdpolicy_clockpro.c,v 1.1.2.2 2006/03/07 13:41:02 yamt Exp $ */
2
3 /*-
4 * Copyright (c)2005, 2006 YAMAMOTO Takashi,
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29 /*
30 * CLOCK-Pro replacement policy:
31 * http://www.cs.wm.edu/hpcs/WWW/HTML/publications/abs05-3.html
32 *
33 * approximation of the list of non-resident pages using hash:
34 * http://linux-mm.org/ClockProApproximation
35 */
36
37 /* #define CLOCKPRO_DEBUG */
38
39 #if defined(PDSIM)
40
41 #include "pdsim.h"
42
43 #else /* defined(PDSIM) */
44
45 #include <sys/cdefs.h>
46 __KERNEL_RCSID(0, "$NetBSD: uvm_pdpolicy_clockpro.c,v 1.1.2.2 2006/03/07 13:41:02 yamt Exp $");
47
48 #include "opt_ddb.h"
49
50 #include <sys/param.h>
51 #include <sys/proc.h>
52 #include <sys/systm.h>
53 #include <sys/kernel.h>
54 #include <sys/vnode.h>
55 #include <sys/hash.h>
56
57 #include <uvm/uvm.h>
58 #include <uvm/uvm_pdpolicy_impl.h>
59
60 #if ((__STDC_VERSION__ - 0) >= 199901L)
61 #define DPRINTF(...) /* nothing */
62 #define WARN(...) printf(__VA_ARGS__)
63 #else /* ((__STDC_VERSION__ - 0) >= 199901L) */
64 #define DPRINTF(a...) /* nothing */ /* GCC */
65 #define WARN(a...) printf(a)
66 #endif /* ((__STDC_VERSION__ - 0) >= 199901L) */
67
68 #define dump(a) /* nothing */
69
70 #undef USEONCE2
71 #define LISTQ
72 #undef ADAPTIVE
73
74 #endif /* defined(PDSIM) */
75
76 #if !defined(CLOCKPRO_COLDPCT)
77 #define CLOCKPRO_COLDPCT 10
78 #endif /* !defined(CLOCKPRO_COLDPCT) */
79
80 #define CLOCKPRO_COLDPCTMAX 90
81
82 #if !defined(CLOCKPRO_HASHFACTOR)
83 #define CLOCKPRO_HASHFACTOR 2
84 #endif /* !defined(CLOCKPRO_HASHFACTOR) */
85
86 #define CLOCKPRO_NEWQMIN ((1024 * 1024) >> PAGE_SHIFT) /* XXX */
87
88 int clockpro_hashfactor = CLOCKPRO_HASHFACTOR;
89
90 PDPOL_EVCNT_DEFINE(nresrecord)
91 PDPOL_EVCNT_DEFINE(nreslookup)
92 PDPOL_EVCNT_DEFINE(nresfoundobj)
93 PDPOL_EVCNT_DEFINE(nresfoundanon)
94 PDPOL_EVCNT_DEFINE(nresanonfree)
95 PDPOL_EVCNT_DEFINE(nresconflict)
96 PDPOL_EVCNT_DEFINE(nresoverwritten)
97 PDPOL_EVCNT_DEFINE(nreshandhot)
98
99 PDPOL_EVCNT_DEFINE(hhottakeover)
100 PDPOL_EVCNT_DEFINE(hhotref)
101 PDPOL_EVCNT_DEFINE(hhotunref)
102 PDPOL_EVCNT_DEFINE(hhotcold)
103 PDPOL_EVCNT_DEFINE(hhotcoldtest)
104
105 PDPOL_EVCNT_DEFINE(hcoldtakeover)
106 PDPOL_EVCNT_DEFINE(hcoldref)
107 PDPOL_EVCNT_DEFINE(hcoldunref)
108 PDPOL_EVCNT_DEFINE(hcoldreftest)
109 PDPOL_EVCNT_DEFINE(hcoldunreftest)
110 PDPOL_EVCNT_DEFINE(hcoldunreftestspeculative);
111
112 PDPOL_EVCNT_DEFINE(speculativeenqueue);
113 PDPOL_EVCNT_DEFINE(speculativehit1);
114 PDPOL_EVCNT_DEFINE(speculativehit2);
115 PDPOL_EVCNT_DEFINE(speculativemiss);
116
117 #define PQ_REFERENCED PQ_PRIVATE1
118 #define PQ_HOT PQ_PRIVATE2
119 #define PQ_TEST PQ_PRIVATE3
120 #define PQ_INITIALREF PQ_PRIVATE4
121 #if PQ_PRIVATE6 != PQ_PRIVATE5 * 2 || PQ_PRIVATE7 != PQ_PRIVATE6 * 2
122 #error PQ_PRIVATE
123 #endif
124 #define PQ_QMASK (PQ_PRIVATE5|PQ_PRIVATE6|PQ_PRIVATE7)
125 #define PQ_QFACTOR PQ_PRIVATE5
126 #define PQ_SPECULATIVE PQ_PRIVATE8
127
128 #define CLOCKPRO_NOQUEUE 0
129 #define CLOCKPRO_NEWQ 1
130 #if defined(LISTQ)
131 #define CLOCKPRO_COLDQ 2
132 #define CLOCKPRO_HOTQ 3
133 #else /* defined(LISTQ) */
134 #define CLOCKPRO_COLDQ (2 + coldqidx) /* XXX */
135 #define CLOCKPRO_HOTQ (3 - coldqidx) /* XXX */
136 #endif /* defined(LISTQ) */
137 #define CLOCKPRO_LISTQ 4
138 #define CLOCKPRO_NQUEUE 4
139
140 static inline void
141 clockpro_setq(struct vm_page *pg, int qidx)
142 {
143 KASSERT(qidx >= CLOCKPRO_NOQUEUE);
144 KASSERT(qidx <= CLOCKPRO_NQUEUE);
145
146 pg->pqflags = (pg->pqflags & ~PQ_QMASK) | (qidx * PQ_QFACTOR);
147 }
148
149 static inline int
150 clockpro_getq(struct vm_page *pg)
151 {
152 int qidx;
153
154 qidx = (pg->pqflags & PQ_QMASK) / PQ_QFACTOR;
155 KASSERT(qidx >= CLOCKPRO_NOQUEUE);
156 KASSERT(qidx <= CLOCKPRO_NQUEUE);
157 return qidx;
158 }
159
160 typedef struct {
161 struct pglist q_q;
162 int q_len;
163 } pageq_t;
164
165 struct clockpro_state {
166 int s_npages;
167 int s_coldtarget;
168 int s_ncold;
169
170 int s_newqlenmax;
171 pageq_t s_q[CLOCKPRO_NQUEUE];
172 };
173
174 static pageq_t *
175 clockpro_queue(struct clockpro_state *s, int qidx)
176 {
177
178 KASSERT(CLOCKPRO_NOQUEUE < qidx);
179 KASSERT(qidx <= CLOCKPRO_NQUEUE);
180
181 return &s->s_q[qidx - 1];
182 }
183
184 #if !defined(LISTQ)
185
186 static int coldqidx;
187
188 static void
189 clockpro_switchqueue(void)
190 {
191
192 coldqidx = 1 - coldqidx;
193 }
194
195 #endif /* !defined(LISTQ) */
196
197 static struct clockpro_state clockpro;
198 static struct clockpro_scanstate {
199 int ss_nscanned;
200 } scanstate;
201
202 /* ---------------------------------------- */
203
204 static void
205 pageq_init(pageq_t *q)
206 {
207
208 TAILQ_INIT(&q->q_q);
209 q->q_len = 0;
210 }
211
212 static int
213 pageq_len(const pageq_t *q)
214 {
215
216 return q->q_len;
217 }
218
219 static struct vm_page *
220 pageq_first(const pageq_t *q)
221 {
222
223 return TAILQ_FIRST(&q->q_q);
224 }
225
226 static void
227 pageq_insert_tail(pageq_t *q, struct vm_page *pg)
228 {
229
230 TAILQ_INSERT_TAIL(&q->q_q, pg, pageq);
231 q->q_len++;
232 }
233
234 static void
235 pageq_insert_head(pageq_t *q, struct vm_page *pg)
236 {
237
238 TAILQ_INSERT_HEAD(&q->q_q, pg, pageq);
239 q->q_len++;
240 }
241
242 static void
243 pageq_remove(pageq_t *q, struct vm_page *pg)
244 {
245
246 #if 1
247 KASSERT(clockpro_queue(&clockpro, clockpro_getq(pg)) == q);
248 #endif
249 KASSERT(q->q_len > 0);
250 TAILQ_REMOVE(&q->q_q, pg, pageq);
251 q->q_len--;
252 }
253
254 static struct vm_page *
255 pageq_remove_head(pageq_t *q)
256 {
257 struct vm_page *pg;
258
259 pg = TAILQ_FIRST(&q->q_q);
260 if (pg == NULL) {
261 KASSERT(q->q_len == 0);
262 return NULL;
263 }
264 pageq_remove(q, pg);
265 return pg;
266 }
267
268 /* ---------------------------------------- */
269
270 static void
271 clockpro_insert_tail(struct clockpro_state *s, int qidx, struct vm_page *pg)
272 {
273 pageq_t *q = clockpro_queue(s, qidx);
274
275 clockpro_setq(pg, qidx);
276 pageq_insert_tail(q, pg);
277 }
278
279 static void
280 clockpro_insert_head(struct clockpro_state *s, int qidx, struct vm_page *pg)
281 {
282 pageq_t *q = clockpro_queue(s, qidx);
283
284 clockpro_setq(pg, qidx);
285 pageq_insert_head(q, pg);
286 }
287
288 /* ---------------------------------------- */
289
290 typedef uint32_t nonres_cookie_t;
291 #define NONRES_COOKIE_INVAL 0
292
293 typedef uintptr_t objid_t;
294
295 /*
296 * XXX maybe these hash functions need reconsideration,
297 * given that hash distribution is critical here.
298 */
299
300 static uint32_t
301 pageidentityhash1(objid_t obj, off_t idx)
302 {
303 uint32_t hash = HASH32_BUF_INIT;
304
305 #if 1
306 hash = hash32_buf(&idx, sizeof(idx), hash);
307 hash = hash32_buf(&obj, sizeof(obj), hash);
308 #else
309 hash = hash32_buf(&obj, sizeof(obj), hash);
310 hash = hash32_buf(&idx, sizeof(idx), hash);
311 #endif
312 return hash;
313 }
314
315 static uint32_t
316 pageidentityhash2(objid_t obj, off_t idx)
317 {
318 uint32_t hash = HASH32_BUF_INIT;
319
320 hash = hash32_buf(&obj, sizeof(obj), hash);
321 hash = hash32_buf(&idx, sizeof(idx), hash);
322 return hash;
323 }
324
325 static nonres_cookie_t
326 calccookie(objid_t obj, off_t idx)
327 {
328 uint32_t hash = pageidentityhash2(obj, idx);
329 nonres_cookie_t cookie = hash;
330
331 if (__predict_false(cookie == NONRES_COOKIE_INVAL)) {
332 cookie++; /* XXX */
333 }
334 return cookie;
335 }
336
337 #define BUCKETSIZE 16
338 struct bucket {
339 int cycle;
340 int cur;
341 nonres_cookie_t pages[BUCKETSIZE];
342 };
343 static int cycle_target;
344 static int cycle_target_frac;
345
346 static struct bucket *buckets;
347 static size_t hashsize;
348
349 static int coldadj;
350 #define COLDTARGET_ADJ(d) coldadj += (d)
351
352 #if defined(PDSIM)
353
354 static void *
355 clockpro_hashalloc(int n)
356 {
357 size_t allocsz = sizeof(*buckets) * n;
358
359 return malloc(allocsz);
360 }
361
362 static void
363 clockpro_hashfree(void *p, int n)
364 {
365
366 free(p);
367 }
368
369 #else /* defined(PDSIM) */
370
371 static void *
372 clockpro_hashalloc(int n)
373 {
374 size_t allocsz = round_page(sizeof(*buckets) * n);
375
376 return (void *)uvm_km_alloc(kernel_map, allocsz, 0, UVM_KMF_WIRED);
377 }
378
379 static void
380 clockpro_hashfree(void *p, int n)
381 {
382 size_t allocsz = round_page(sizeof(*buckets) * n);
383
384 uvm_km_free(kernel_map, (vaddr_t)p, allocsz, UVM_KMF_WIRED);
385 }
386
387 #endif /* defined(PDSIM) */
388
389 static void
390 clockpro_hashinit(uint64_t n)
391 {
392 struct bucket *newbuckets;
393 struct bucket *oldbuckets;
394 size_t sz;
395 size_t oldsz;
396 int i;
397
398 sz = howmany(n, BUCKETSIZE);
399 sz *= clockpro_hashfactor;
400 newbuckets = clockpro_hashalloc(sz);
401 if (newbuckets == NULL) {
402 panic("%s: allocation failure", __func__);
403 }
404 for (i = 0; i < sz; i++) {
405 struct bucket *b = &newbuckets[i];
406 int j;
407
408 b->cycle = cycle_target;
409 b->cur = 0;
410 for (j = 0; j < BUCKETSIZE; j++) {
411 b->pages[j] = NONRES_COOKIE_INVAL;
412 }
413 }
414 /* XXX lock */
415 oldbuckets = buckets;
416 oldsz = hashsize;
417 buckets = newbuckets;
418 hashsize = sz;
419 /* XXX unlock */
420 if (oldbuckets) {
421 clockpro_hashfree(oldbuckets, oldsz);
422 }
423 }
424
425 static struct bucket *
426 nonresident_getbucket(objid_t obj, off_t idx)
427 {
428 uint32_t hash;
429 static struct bucket static_bucket;
430
431 if (hashsize == 0) {
432 return &static_bucket;
433 }
434
435 hash = pageidentityhash1(obj, idx);
436 return &buckets[hash % hashsize];
437 }
438
439 static void
440 nonresident_rotate(struct bucket *b)
441 {
442
443 while (b->cycle - cycle_target < 0) {
444 if (b->pages[b->cur] != NONRES_COOKIE_INVAL) {
445 PDPOL_EVCNT_INCR(nreshandhot);
446 COLDTARGET_ADJ(-1);
447 }
448 b->pages[b->cur] = NONRES_COOKIE_INVAL;
449 b->cur = (b->cur + 1) % BUCKETSIZE;
450 b->cycle++;
451 }
452 }
453
454 static boolean_t
455 nonresident_lookupremove(objid_t obj, off_t idx)
456 {
457 struct bucket *b = nonresident_getbucket(obj, idx);
458 nonres_cookie_t cookie = calccookie(obj, idx);
459 int i;
460
461 nonresident_rotate(b);
462 for (i = 0; i < BUCKETSIZE; i++) {
463 if (b->pages[i] == cookie) {
464 b->pages[i] = NONRES_COOKIE_INVAL;
465 return TRUE;
466 }
467 }
468 return FALSE;
469 }
470
471 static objid_t
472 pageobj(struct vm_page *pg)
473 {
474 const void *obj;
475
476 /*
477 * XXX object pointer is often freed and reused for unrelated object.
478 * for vnodes, it would be better to use something like
479 * a hash of fsid/fileid/generation.
480 */
481
482 obj = pg->uobject;
483 if (obj == NULL) {
484 obj = pg->uanon;
485 KASSERT(obj != NULL);
486 KASSERT(pg->offset == 0);
487 }
488
489 return (objid_t)obj;
490 }
491
492 static off_t
493 pageidx(struct vm_page *pg)
494 {
495
496 KASSERT((pg->offset & PAGE_MASK) == 0);
497 return pg->offset >> PAGE_SHIFT;
498 }
499
500 static boolean_t
501 nonresident_pagelookupremove(struct vm_page *pg)
502 {
503 boolean_t found = nonresident_lookupremove(pageobj(pg), pageidx(pg));
504
505 PDPOL_EVCNT_INCR(nreslookup);
506 if (found) {
507 if (pg->uobject) {
508 PDPOL_EVCNT_INCR(nresfoundobj);
509 } else {
510 PDPOL_EVCNT_INCR(nresfoundanon);
511 }
512 }
513 return found;
514 }
515
516 static void
517 nonresident_pagerecord(struct vm_page *pg)
518 {
519 objid_t obj = pageobj(pg);
520 off_t idx = pageidx(pg);
521 struct bucket *b = nonresident_getbucket(obj, idx);
522 nonres_cookie_t cookie = calccookie(obj, idx);
523
524 PDPOL_EVCNT_INCR(nresrecord);
525
526 #if defined(DEBUG)
527 int i;
528
529 for (i = 0; i < BUCKETSIZE; i++) {
530 if (b->pages[i] == cookie) {
531 PDPOL_EVCNT_INCR(nresconflict);
532 }
533 }
534 #endif /* defined(DEBUG) */
535 nonresident_rotate(b);
536 if (b->pages[b->cur] != NONRES_COOKIE_INVAL) {
537 PDPOL_EVCNT_INCR(nresoverwritten);
538 COLDTARGET_ADJ(-1);
539 }
540 b->pages[b->cur] = cookie;
541 b->cur = (b->cur + 1) % BUCKETSIZE;
542 }
543
544 /* ---------------------------------------- */
545
546 #if defined(CLOCKPRO_DEBUG)
547 static void
548 check_sanity(void)
549 {
550 }
551 #else /* defined(CLOCKPRO_DEBUG) */
552 #define check_sanity() /* nothing */
553 #endif /* defined(CLOCKPRO_DEBUG) */
554
555 static void
556 clockpro_reinit(void)
557 {
558
559 clockpro_hashinit(uvmexp.npages);
560 }
561
562 static void
563 clockpro_init(void)
564 {
565 struct clockpro_state *s = &clockpro;
566 int i;
567
568 for (i = 0; i < CLOCKPRO_NQUEUE; i++) {
569 pageq_init(&s->s_q[i]);
570 }
571 s->s_newqlenmax = 1;
572 s->s_coldtarget = 1;
573 }
574
575 static void
576 clockpro_tune(void)
577 {
578 struct clockpro_state *s = &clockpro;
579 int coldtarget;
580
581 #if defined(ADAPTIVE)
582 int coldmax = s->s_npages * CLOCKPRO_COLDPCTMAX / 100;
583 int coldmin = 1;
584
585 coldtarget = s->s_coldtarget;
586 if (coldtarget + coldadj < coldmin) {
587 coldadj = coldmin - coldtarget;
588 } else if (coldtarget + coldadj > coldmax) {
589 coldadj = coldmax - coldtarget;
590 }
591 coldtarget += coldadj;
592 #else /* defined(ADAPTIVE) */
593 coldtarget = s->s_npages * CLOCKPRO_COLDPCT / 100;
594 if (coldtarget < 1) {
595 coldtarget = 1;
596 }
597 #endif /* defined(ADAPTIVE) */
598
599 s->s_coldtarget = coldtarget;
600 s->s_newqlenmax = coldtarget / 4;
601 if (s->s_newqlenmax < CLOCKPRO_NEWQMIN) {
602 s->s_newqlenmax = CLOCKPRO_NEWQMIN;
603 }
604 }
605
606 static void
607 clockpro_movereferencebit(struct vm_page *pg)
608 {
609 boolean_t referenced;
610
611 referenced = pmap_clear_reference(pg);
612 if (referenced) {
613 pg->pqflags |= PQ_REFERENCED;
614 }
615 }
616
617 static void
618 clockpro_clearreferencebit(struct vm_page *pg)
619 {
620
621 clockpro_movereferencebit(pg);
622 pg->pqflags &= ~PQ_REFERENCED;
623 }
624
625 static void
626 clockpro___newqrotate(int len)
627 {
628 struct clockpro_state * const s = &clockpro;
629 pageq_t * const newq = clockpro_queue(s, CLOCKPRO_NEWQ);
630 struct vm_page *pg;
631
632 while (pageq_len(newq) > len) {
633 pg = pageq_remove_head(newq);
634 KASSERT(pg != NULL);
635 KASSERT(clockpro_getq(pg) == CLOCKPRO_NEWQ);
636 if ((pg->pqflags & PQ_INITIALREF) != 0) {
637 clockpro_clearreferencebit(pg);
638 pg->pqflags &= ~PQ_INITIALREF;
639 }
640 /* place at the list head */
641 clockpro_insert_tail(s, CLOCKPRO_COLDQ, pg);
642 }
643 }
644
645 static void
646 clockpro_newqrotate(void)
647 {
648 struct clockpro_state * const s = &clockpro;
649
650 check_sanity();
651 clockpro___newqrotate(s->s_newqlenmax);
652 check_sanity();
653 }
654
655 static void
656 clockpro_newqflush(int n)
657 {
658
659 check_sanity();
660 clockpro___newqrotate(n);
661 check_sanity();
662 }
663
664 static void
665 clockpro_newqflushone(void)
666 {
667 struct clockpro_state * const s = &clockpro;
668
669 clockpro_newqflush(
670 MAX(pageq_len(clockpro_queue(s, CLOCKPRO_NEWQ)) - 1, 0));
671 }
672
673 /*
674 * our "tail" is called "list-head" in the paper.
675 */
676
677 static void
678 clockpro___enqueuetail(struct vm_page *pg)
679 {
680 struct clockpro_state * const s = &clockpro;
681
682 KASSERT(clockpro_getq(pg) == CLOCKPRO_NOQUEUE);
683
684 check_sanity();
685 #if !defined(USEONCE2)
686 clockpro_insert_tail(s, CLOCKPRO_NEWQ, pg);
687 clockpro_newqrotate();
688 #else /* !defined(USEONCE2) */
689 #if defined(LISTQ)
690 KASSERT((pg->pqflags & PQ_REFERENCED) == 0);
691 #endif /* defined(LISTQ) */
692 clockpro_insert_tail(s, CLOCKPRO_COLDQ, pg);
693 #endif /* !defined(USEONCE2) */
694 check_sanity();
695 }
696
697 static void
698 clockpro_pageenqueue(struct vm_page *pg)
699 {
700 struct clockpro_state * const s = &clockpro;
701 boolean_t hot;
702 boolean_t speculative = (pg->pqflags & PQ_SPECULATIVE) != 0; /* XXX */
703
704 KASSERT((~pg->pqflags & (PQ_INITIALREF|PQ_SPECULATIVE)) != 0);
705 UVM_LOCK_ASSERT_PAGEQ();
706 check_sanity();
707 KASSERT(clockpro_getq(pg) == CLOCKPRO_NOQUEUE);
708 s->s_npages++;
709 pg->pqflags &= ~(PQ_HOT|PQ_TEST);
710 if (speculative) {
711 hot = FALSE;
712 PDPOL_EVCNT_INCR(speculativeenqueue);
713 } else {
714 hot = nonresident_pagelookupremove(pg);
715 if (hot) {
716 COLDTARGET_ADJ(1);
717 }
718 }
719
720 /*
721 * consider mmap'ed file:
722 *
723 * - read-ahead enqueues a page.
724 *
725 * - on the following read-ahead hit, the fault handler activates it.
726 *
727 * - finally, the userland code which caused the above fault
728 * actually accesses the page. it makes its reference bit set.
729 *
730 * we want to count the above as a single access, rather than
731 * three accesses with short reuse distances.
732 */
733
734 #if defined(USEONCE2)
735 pg->pqflags &= ~PQ_INITIALREF;
736 if (hot) {
737 pg->pqflags |= PQ_TEST;
738 }
739 s->s_ncold++;
740 clockpro_clearreferencebit(pg);
741 clockpro___enqueuetail(pg);
742 #else /* defined(USEONCE2) */
743 if (speculative) {
744 s->s_ncold++;
745 } else if (hot) {
746 pg->pqflags |= PQ_HOT;
747 } else {
748 pg->pqflags |= PQ_TEST;
749 s->s_ncold++;
750 }
751 clockpro___enqueuetail(pg);
752 #endif /* defined(USEONCE2) */
753 KASSERT(s->s_ncold <= s->s_npages);
754 }
755
756 static pageq_t *
757 clockpro_pagequeue(struct vm_page *pg)
758 {
759 struct clockpro_state * const s = &clockpro;
760 int qidx;
761
762 qidx = clockpro_getq(pg);
763 KASSERT(qidx != CLOCKPRO_NOQUEUE);
764
765 return clockpro_queue(s, qidx);
766 }
767
768 static void
769 clockpro_pagedequeue(struct vm_page *pg)
770 {
771 struct clockpro_state * const s = &clockpro;
772 pageq_t *q;
773
774 KASSERT(s->s_npages > 0);
775 check_sanity();
776 q = clockpro_pagequeue(pg);
777 pageq_remove(q, pg);
778 check_sanity();
779 clockpro_setq(pg, CLOCKPRO_NOQUEUE);
780 if ((pg->pqflags & PQ_HOT) == 0) {
781 KASSERT(s->s_ncold > 0);
782 s->s_ncold--;
783 }
784 KASSERT(s->s_npages > 0);
785 s->s_npages--;
786 check_sanity();
787 }
788
789 static void
790 clockpro_pagerequeue(struct vm_page *pg)
791 {
792 struct clockpro_state * const s = &clockpro;
793 int qidx;
794
795 qidx = clockpro_getq(pg);
796 KASSERT(qidx == CLOCKPRO_HOTQ || qidx == CLOCKPRO_COLDQ);
797 pageq_remove(clockpro_queue(s, qidx), pg);
798 check_sanity();
799 clockpro_setq(pg, CLOCKPRO_NOQUEUE);
800
801 clockpro___enqueuetail(pg);
802 }
803
804 static void
805 handhot_endtest(struct vm_page *pg)
806 {
807
808 KASSERT((pg->pqflags & PQ_HOT) == 0);
809 if ((pg->pqflags & PQ_TEST) != 0) {
810 PDPOL_EVCNT_INCR(hhotcoldtest);
811 COLDTARGET_ADJ(-1);
812 pg->pqflags &= ~PQ_TEST;
813 } else {
814 PDPOL_EVCNT_INCR(hhotcold);
815 }
816 }
817
818 static void
819 handhot_advance(void)
820 {
821 struct clockpro_state * const s = &clockpro;
822 struct vm_page *pg;
823 pageq_t *hotq;
824 int hotqlen;
825
826 clockpro_tune();
827
828 dump("hot called");
829 if (s->s_ncold >= s->s_coldtarget) {
830 return;
831 }
832 hotq = clockpro_queue(s, CLOCKPRO_HOTQ);
833 again:
834 pg = pageq_first(hotq);
835 if (pg == NULL) {
836 DPRINTF("%s: HHOT TAKEOVER\n", __func__);
837 dump("hhottakeover");
838 PDPOL_EVCNT_INCR(hhottakeover);
839 #if defined(LISTQ)
840 while (/* CONSTCOND */ 1) {
841 pageq_t *coldq = clockpro_queue(s, CLOCKPRO_COLDQ);
842
843 pg = pageq_first(coldq);
844 if (pg == NULL) {
845 clockpro_newqflushone();
846 pg = pageq_first(coldq);
847 if (pg == NULL) {
848 WARN("hhot: no page?\n");
849 return;
850 }
851 }
852 KASSERT(clockpro_pagequeue(pg) == coldq);
853 pageq_remove(coldq, pg);
854 check_sanity();
855 if ((pg->pqflags & PQ_HOT) == 0) {
856 handhot_endtest(pg);
857 clockpro_insert_tail(s, CLOCKPRO_LISTQ, pg);
858 } else {
859 clockpro_insert_head(s, CLOCKPRO_HOTQ, pg);
860 break;
861 }
862 }
863 #else /* defined(LISTQ) */
864 clockpro_newqflush(0); /* XXX XXX */
865 clockpro_switchqueue();
866 hotq = clockpro_queue(s, CLOCKPRO_HOTQ);
867 goto again;
868 #endif /* defined(LISTQ) */
869 }
870
871 KASSERT(clockpro_pagequeue(pg) == hotq);
872
873 cycle_target_frac += BUCKETSIZE;
874 hotqlen = pageq_len(hotq);
875 while (cycle_target_frac >= hotqlen) {
876 cycle_target++;
877 cycle_target_frac -= hotqlen;
878 }
879
880 if ((pg->pqflags & PQ_HOT) == 0) {
881 #if defined(LISTQ)
882 panic("cold page in hotq: %p", pg);
883 #else /* defined(LISTQ) */
884 handhot_endtest(pg);
885 goto next;
886 #endif /* defined(LISTQ) */
887 }
888 KASSERT((pg->pqflags & PQ_TEST) == 0);
889 KASSERT((pg->pqflags & PQ_INITIALREF) == 0);
890 KASSERT((pg->pqflags & PQ_SPECULATIVE) == 0);
891 if (s->s_ncold >= s->s_coldtarget) {
892 dump("hot done");
893 return;
894 }
895 clockpro_movereferencebit(pg);
896 if ((pg->pqflags & PQ_REFERENCED) == 0) {
897 PDPOL_EVCNT_INCR(hhotunref);
898 uvmexp.pddeact++;
899 pg->pqflags &= ~PQ_HOT;
900 clockpro.s_ncold++;
901 KASSERT(s->s_ncold <= s->s_npages);
902 } else {
903 PDPOL_EVCNT_INCR(hhotref);
904 }
905 pg->pqflags &= ~PQ_REFERENCED;
906 #if !defined(LISTQ)
907 next:
908 #endif /* !defined(LISTQ) */
909 clockpro_pagerequeue(pg);
910 dump("hot");
911 goto again;
912 }
913
914 static struct vm_page *
915 handcold_advance(void)
916 {
917 struct clockpro_state * const s = &clockpro;
918 struct vm_page *pg;
919
920 for (;;) {
921 pageq_t *listq = clockpro_queue(s, CLOCKPRO_LISTQ);
922 pageq_t *coldq;
923
924 clockpro_newqrotate();
925 handhot_advance();
926 #if defined(LISTQ)
927 pg = pageq_first(listq);
928 if (pg != NULL) {
929 KASSERT(clockpro_getq(pg) == CLOCKPRO_LISTQ);
930 KASSERT((pg->pqflags & PQ_TEST) == 0);
931 KASSERT((pg->pqflags & PQ_HOT) == 0);
932 KASSERT((pg->pqflags & PQ_INITIALREF) == 0);
933 pageq_remove(listq, pg);
934 check_sanity();
935 clockpro_insert_head(s, CLOCKPRO_COLDQ, pg); /* XXX */
936 goto gotcold;
937 }
938 #endif /* defined(LISTQ) */
939 check_sanity();
940 coldq = clockpro_queue(s, CLOCKPRO_COLDQ);
941 pg = pageq_first(coldq);
942 if (pg == NULL) {
943 clockpro_newqflushone();
944 pg = pageq_first(coldq);
945 }
946 if (pg == NULL) {
947 DPRINTF("%s: HCOLD TAKEOVER\n", __func__);
948 dump("hcoldtakeover");
949 PDPOL_EVCNT_INCR(hcoldtakeover);
950 KASSERT(
951 pageq_len(clockpro_queue(s, CLOCKPRO_NEWQ)) == 0);
952 #if defined(LISTQ)
953 KASSERT(
954 pageq_len(clockpro_queue(s, CLOCKPRO_HOTQ)) == 0);
955 #else /* defined(LISTQ) */
956 clockpro_switchqueue();
957 coldq = clockpro_queue(s, CLOCKPRO_COLDQ);
958 pg = pageq_first(coldq);
959 #endif /* defined(LISTQ) */
960 }
961 if (pg == NULL) {
962 WARN("hcold: no page?\n");
963 return NULL;
964 }
965 KASSERT((pg->pqflags & PQ_INITIALREF) == 0);
966 if ((pg->pqflags & PQ_HOT) != 0) {
967 pageq_remove(coldq, pg);
968 clockpro_insert_tail(s, CLOCKPRO_HOTQ, pg);
969 check_sanity();
970 KASSERT((pg->pqflags & PQ_TEST) == 0);
971 uvmexp.pdscans++;
972 continue;
973 }
974 #if defined(LISTQ)
975 gotcold:
976 #endif /* defined(LISTQ) */
977 KASSERT((pg->pqflags & PQ_HOT) == 0);
978 uvmexp.pdscans++;
979 clockpro_movereferencebit(pg);
980 if ((pg->pqflags & PQ_SPECULATIVE) != 0) {
981 KASSERT((pg->pqflags & PQ_TEST) == 0);
982 if ((pg->pqflags & PQ_REFERENCED) != 0) {
983 PDPOL_EVCNT_INCR(speculativehit2);
984 pg->pqflags &= ~(PQ_SPECULATIVE|PQ_REFERENCED);
985 clockpro_pagedequeue(pg);
986 clockpro_pageenqueue(pg);
987 continue;
988 }
989 PDPOL_EVCNT_INCR(speculativemiss);
990 }
991 switch (pg->pqflags & (PQ_REFERENCED|PQ_TEST)) {
992 case PQ_TEST:
993 PDPOL_EVCNT_INCR(hcoldunreftest);
994 nonresident_pagerecord(pg);
995 goto gotit;
996 case 0:
997 PDPOL_EVCNT_INCR(hcoldunref);
998 gotit:
999 KASSERT(s->s_ncold > 0);
1000 clockpro_pagerequeue(pg); /* XXX */
1001 dump("cold done");
1002 /* XXX "pg" is still in queue */
1003 handhot_advance();
1004 goto done;
1005
1006 case PQ_REFERENCED|PQ_TEST:
1007 PDPOL_EVCNT_INCR(hcoldreftest);
1008 s->s_ncold--;
1009 COLDTARGET_ADJ(1);
1010 pg->pqflags |= PQ_HOT;
1011 pg->pqflags &= ~PQ_TEST;
1012 break;
1013
1014 case PQ_REFERENCED:
1015 PDPOL_EVCNT_INCR(hcoldref);
1016 pg->pqflags |= PQ_TEST;
1017 break;
1018 }
1019 pg->pqflags &= ~PQ_REFERENCED;
1020 uvmexp.pdreact++;
1021 /* move to the list head */
1022 clockpro_pagerequeue(pg);
1023 dump("cold");
1024 }
1025 done:;
1026 return pg;
1027 }
1028
1029 void
1030 uvmpdpol_pageactivate(struct vm_page *pg)
1031 {
1032
1033 if (!uvmpdpol_pageisqueued_p(pg)) {
1034 KASSERT((pg->pqflags & PQ_SPECULATIVE) == 0);
1035 pg->pqflags |= PQ_INITIALREF;
1036 clockpro_pageenqueue(pg);
1037 } else if ((pg->pqflags & PQ_SPECULATIVE)) {
1038 PDPOL_EVCNT_INCR(speculativehit1);
1039 pg->pqflags &= ~PQ_SPECULATIVE;
1040 pg->pqflags |= PQ_INITIALREF;
1041 clockpro_pagedequeue(pg);
1042 clockpro_pageenqueue(pg);
1043 }
1044 pg->pqflags |= PQ_REFERENCED;
1045 }
1046
1047 void
1048 uvmpdpol_pagedeactivate(struct vm_page *pg)
1049 {
1050
1051 pg->pqflags &= ~PQ_REFERENCED;
1052 }
1053
1054 void
1055 uvmpdpol_pagedequeue(struct vm_page *pg)
1056 {
1057
1058 if (!uvmpdpol_pageisqueued_p(pg)) {
1059 return;
1060 }
1061 clockpro_pagedequeue(pg);
1062 pg->pqflags &= ~PQ_SPECULATIVE;
1063 }
1064
1065 void
1066 uvmpdpol_pageenqueue(struct vm_page *pg)
1067 {
1068
1069 #if 1
1070 if (uvmpdpol_pageisqueued_p(pg)) {
1071 return;
1072 }
1073 clockpro_clearreferencebit(pg);
1074 pg->pqflags |= PQ_SPECULATIVE;
1075 clockpro_pageenqueue(pg);
1076 #else
1077 uvmpdpol_pageactivate(pg);
1078 #endif
1079 }
1080
1081 void
1082 uvmpdpol_anfree(struct vm_anon *an)
1083 {
1084
1085 KASSERT(an->an_page == NULL);
1086 if (nonresident_lookupremove((objid_t)an, 0)) {
1087 PDPOL_EVCNT_INCR(nresanonfree);
1088 }
1089 }
1090
1091 void
1092 uvmpdpol_init(void)
1093 {
1094
1095 clockpro_init();
1096 }
1097
1098 void
1099 uvmpdpol_reinit(void)
1100 {
1101
1102 clockpro_reinit();
1103 }
1104
1105 void
1106 uvmpdpol_estimatepageable(int *active, int *inactive)
1107 {
1108 struct clockpro_state * const s = &clockpro;
1109
1110 if (active) {
1111 *active = s->s_npages - s->s_ncold;
1112 }
1113 if (inactive) {
1114 *inactive = s->s_ncold;
1115 }
1116 }
1117
1118 boolean_t
1119 uvmpdpol_pageisqueued_p(struct vm_page *pg)
1120 {
1121
1122 return clockpro_getq(pg) != CLOCKPRO_NOQUEUE;
1123 }
1124
1125 void
1126 uvmpdpol_scaninit(void)
1127 {
1128 struct clockpro_scanstate * const ss = &scanstate;
1129
1130 ss->ss_nscanned = 0;
1131 }
1132
1133 struct vm_page *
1134 uvmpdpol_selectvictim(void)
1135 {
1136 struct clockpro_state * const s = &clockpro;
1137 struct clockpro_scanstate * const ss = &scanstate;
1138 struct vm_page *pg;
1139
1140 if (ss->ss_nscanned > s->s_npages) {
1141 DPRINTF("scan too much\n");
1142 return NULL;
1143 }
1144 pg = handcold_advance();
1145 ss->ss_nscanned++;
1146 return pg;
1147 }
1148
1149 static void
1150 clockpro_dropswap(pageq_t *q, int *todo)
1151 {
1152 struct vm_page *pg;
1153
1154 TAILQ_FOREACH_REVERSE(pg, &q->q_q, pglist, pageq) {
1155 if (*todo <= 0) {
1156 break;
1157 }
1158 if ((pg->pqflags & PQ_HOT) == 0) {
1159 continue;
1160 }
1161 if ((pg->pqflags & PQ_SWAPBACKED) == 0) {
1162 continue;
1163 }
1164 if (uvmpd_trydropswap(pg)) {
1165 (*todo)--;
1166 }
1167 }
1168 }
1169
1170 void
1171 uvmpdpol_balancequeue(int swap_shortage)
1172 {
1173 struct clockpro_state * const s = &clockpro;
1174 int todo = swap_shortage;
1175
1176 if (todo == 0) {
1177 return;
1178 }
1179
1180 /*
1181 * reclaim swap slots from hot pages
1182 */
1183
1184 DPRINTF("%s: swap_shortage=%d\n", __func__, swap_shortage);
1185
1186 clockpro_dropswap(clockpro_queue(s, CLOCKPRO_NEWQ), &todo);
1187 clockpro_dropswap(clockpro_queue(s, CLOCKPRO_COLDQ), &todo);
1188 clockpro_dropswap(clockpro_queue(s, CLOCKPRO_HOTQ), &todo);
1189
1190 DPRINTF("%s: done=%d\n", __func__, swap_shortage - todo);
1191 }
1192
1193 boolean_t
1194 uvmpdpol_needsscan_p(void)
1195 {
1196 struct clockpro_state * const s = &clockpro;
1197
1198 if (s->s_ncold < s->s_coldtarget) {
1199 return TRUE;
1200 }
1201 return FALSE;
1202 }
1203
1204 void
1205 uvmpdpol_tune(void)
1206 {
1207
1208 clockpro_tune();
1209 }
1210
1211 void
1212 uvmpdpol_sysctlsetup(void)
1213 {
1214 }
1215
1216 #if defined(DDB)
1217
1218 void clockpro_dump(void);
1219
1220 void
1221 clockpro_dump(void)
1222 {
1223 struct clockpro_state * const s = &clockpro;
1224
1225 struct vm_page *pg;
1226 int ncold, nhot, ntest, nspeculative, ninitialref, nref;
1227 int newqlen, coldqlen, hotqlen, listqlen;
1228
1229 newqlen = coldqlen = hotqlen = listqlen = 0;
1230 printf("npages=%d, ncold=%d, coldtarget=%d, newqlenmax=%d\n",
1231 s->s_npages, s->s_ncold, s->s_coldtarget, s->s_newqlenmax);
1232
1233 #define INITCOUNT() \
1234 ncold = nhot = ntest = nspeculative = ninitialref = nref = 0
1235
1236 #define COUNT(pg) \
1237 if ((pg->pqflags & PQ_HOT) != 0) { \
1238 nhot++; \
1239 } else { \
1240 ncold++; \
1241 if ((pg->pqflags & PQ_TEST) != 0) { \
1242 ntest++; \
1243 } \
1244 if ((pg->pqflags & PQ_SPECULATIVE) != 0) { \
1245 nspeculative++; \
1246 } \
1247 if ((pg->pqflags & PQ_INITIALREF) != 0) { \
1248 ninitialref++; \
1249 } else if ((pg->pqflags & PQ_REFERENCED) != 0 || \
1250 pmap_is_referenced(pg)) { \
1251 nref++; \
1252 } \
1253 }
1254
1255 #define PRINTCOUNT(name) \
1256 printf("%s hot=%d, cold=%d, test=%d, speculative=%d, initialref=%d, " \
1257 "nref=%d\n", \
1258 (name), nhot, ncold, ntest, nspeculative, ninitialref, nref)
1259
1260 INITCOUNT();
1261 TAILQ_FOREACH(pg, &clockpro_queue(s, CLOCKPRO_NEWQ)->q_q, pageq) {
1262 if (clockpro_getq(pg) != CLOCKPRO_NEWQ) {
1263 printf("newq corrupt %p\n", pg);
1264 }
1265 COUNT(pg)
1266 newqlen++;
1267 }
1268 PRINTCOUNT("newq");
1269
1270 INITCOUNT();
1271 TAILQ_FOREACH(pg, &clockpro_queue(s, CLOCKPRO_COLDQ)->q_q, pageq) {
1272 if (clockpro_getq(pg) != CLOCKPRO_COLDQ) {
1273 printf("coldq corrupt %p\n", pg);
1274 }
1275 COUNT(pg)
1276 coldqlen++;
1277 }
1278 PRINTCOUNT("coldq");
1279
1280 INITCOUNT();
1281 TAILQ_FOREACH(pg, &clockpro_queue(s, CLOCKPRO_HOTQ)->q_q, pageq) {
1282 if (clockpro_getq(pg) != CLOCKPRO_HOTQ) {
1283 printf("hotq corrupt %p\n", pg);
1284 }
1285 #if defined(LISTQ)
1286 if ((pg->pqflags & PQ_HOT) == 0) {
1287 printf("cold page in hotq: %p\n", pg);
1288 }
1289 #endif /* defined(LISTQ) */
1290 COUNT(pg)
1291 hotqlen++;
1292 }
1293 PRINTCOUNT("hotq");
1294
1295 INITCOUNT();
1296 TAILQ_FOREACH(pg, &clockpro_queue(s, CLOCKPRO_LISTQ)->q_q, pageq) {
1297 #if !defined(LISTQ)
1298 printf("listq %p\n");
1299 #endif /* !defined(LISTQ) */
1300 if (clockpro_getq(pg) != CLOCKPRO_LISTQ) {
1301 printf("listq corrupt %p\n", pg);
1302 }
1303 COUNT(pg)
1304 listqlen++;
1305 }
1306 PRINTCOUNT("listq");
1307
1308 printf("newqlen=%d/%d, coldqlen=%d/%d, hotqlen=%d/%d, listqlen=%d/%d\n",
1309 newqlen, pageq_len(clockpro_queue(s, CLOCKPRO_NEWQ)),
1310 coldqlen, pageq_len(clockpro_queue(s, CLOCKPRO_COLDQ)),
1311 hotqlen, pageq_len(clockpro_queue(s, CLOCKPRO_HOTQ)),
1312 listqlen, pageq_len(clockpro_queue(s, CLOCKPRO_LISTQ)));
1313 }
1314
1315 #endif /* defined(DDB) */
1316
1317 #if defined(PDSIM)
1318 static void
1319 pdsim_dumpq(int qidx)
1320 {
1321 struct clockpro_state * const s = &clockpro;
1322 pageq_t *q = clockpro_queue(s, qidx);
1323 struct vm_page *pg;
1324
1325 TAILQ_FOREACH(pg, &q->q_q, pageq) {
1326 DPRINTF(" %" PRIu64 "%s%s%s%s%s%s",
1327 pg->offset >> PAGE_SHIFT,
1328 (pg->pqflags & PQ_HOT) ? "H" : "",
1329 (pg->pqflags & PQ_TEST) ? "T" : "",
1330 (pg->pqflags & PQ_REFERENCED) ? "R" : "",
1331 pmap_is_referenced(pg) ? "r" : "",
1332 (pg->pqflags & PQ_INITIALREF) ? "I" : "",
1333 (pg->pqflags & PQ_SPECULATIVE) ? "S" : ""
1334 );
1335 }
1336 }
1337
1338 void
1339 pdsim_dump(const char *id)
1340 {
1341 #if defined(DEBUG)
1342 struct clockpro_state * const s = &clockpro;
1343
1344 DPRINTF(" %s L(", id);
1345 pdsim_dumpq(CLOCKPRO_LISTQ);
1346 DPRINTF(" ) H(");
1347 pdsim_dumpq(CLOCKPRO_HOTQ);
1348 DPRINTF(" ) C(");
1349 pdsim_dumpq(CLOCKPRO_COLDQ);
1350 DPRINTF(" ) N(");
1351 pdsim_dumpq(CLOCKPRO_NEWQ);
1352 DPRINTF(" ) ncold=%d/%d, coldadj=%d\n",
1353 s->s_ncold, s->s_coldtarget, coldadj);
1354 #endif /* defined(DEBUG) */
1355 }
1356 #endif /* defined(PDSIM) */
1357