uvm_pdpolicy_clockpro.c revision 1.1.2.9 1 /* $NetBSD: uvm_pdpolicy_clockpro.c,v 1.1.2.9 2006/03/24 13:48:10 yamt Exp $ */
2
3 /*-
4 * Copyright (c)2005, 2006 YAMAMOTO Takashi,
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29 /*
30 * CLOCK-Pro replacement policy:
31 * http://www.cs.wm.edu/hpcs/WWW/HTML/publications/abs05-3.html
32 *
33 * approximation of the list of non-resident pages using hash:
34 * http://linux-mm.org/ClockProApproximation
35 */
36
37 /* #define CLOCKPRO_DEBUG */
38
39 #if defined(PDSIM)
40
41 #include "pdsim.h"
42
43 #else /* defined(PDSIM) */
44
45 #include <sys/cdefs.h>
46 __KERNEL_RCSID(0, "$NetBSD: uvm_pdpolicy_clockpro.c,v 1.1.2.9 2006/03/24 13:48:10 yamt Exp $");
47
48 #include "opt_ddb.h"
49
50 #include <sys/param.h>
51 #include <sys/proc.h>
52 #include <sys/systm.h>
53 #include <sys/kernel.h>
54 #include <sys/vnode.h>
55 #include <sys/hash.h>
56
57 #include <uvm/uvm.h>
58 #include <uvm/uvm_pdpolicy_impl.h>
59
60 #if ((__STDC_VERSION__ - 0) >= 199901L)
61 #define DPRINTF(...) /* nothing */
62 #define WARN(...) printf(__VA_ARGS__)
63 #else /* ((__STDC_VERSION__ - 0) >= 199901L) */
64 #define DPRINTF(a...) /* nothing */ /* GCC */
65 #define WARN(a...) printf(a)
66 #endif /* ((__STDC_VERSION__ - 0) >= 199901L) */
67
68 #define dump(a) /* nothing */
69
70 #undef USEONCE2
71 #define LISTQ
72 #undef ADAPTIVE
73
74 #endif /* defined(PDSIM) */
75
76 #if !defined(CLOCKPRO_COLDPCT)
77 #define CLOCKPRO_COLDPCT 10
78 #endif /* !defined(CLOCKPRO_COLDPCT) */
79
80 #define CLOCKPRO_COLDPCTMAX 90
81
82 #if !defined(CLOCKPRO_HASHFACTOR)
83 #define CLOCKPRO_HASHFACTOR 2
84 #endif /* !defined(CLOCKPRO_HASHFACTOR) */
85
86 #define CLOCKPRO_NEWQMIN ((1024 * 1024) >> PAGE_SHIFT) /* XXX */
87
88 int clockpro_hashfactor = CLOCKPRO_HASHFACTOR;
89
90 PDPOL_EVCNT_DEFINE(nresrecordobj)
91 PDPOL_EVCNT_DEFINE(nresrecordanon)
92 PDPOL_EVCNT_DEFINE(nreslookup)
93 PDPOL_EVCNT_DEFINE(nresfoundobj)
94 PDPOL_EVCNT_DEFINE(nresfoundanon)
95 PDPOL_EVCNT_DEFINE(nresanonfree)
96 PDPOL_EVCNT_DEFINE(nresconflict)
97 PDPOL_EVCNT_DEFINE(nresoverwritten)
98 PDPOL_EVCNT_DEFINE(nreshandhot)
99
100 PDPOL_EVCNT_DEFINE(hhottakeover)
101 PDPOL_EVCNT_DEFINE(hhotref)
102 PDPOL_EVCNT_DEFINE(hhotunref)
103 PDPOL_EVCNT_DEFINE(hhotcold)
104 PDPOL_EVCNT_DEFINE(hhotcoldtest)
105
106 PDPOL_EVCNT_DEFINE(hcoldtakeover)
107 PDPOL_EVCNT_DEFINE(hcoldref)
108 PDPOL_EVCNT_DEFINE(hcoldunref)
109 PDPOL_EVCNT_DEFINE(hcoldreftest)
110 PDPOL_EVCNT_DEFINE(hcoldunreftest)
111 PDPOL_EVCNT_DEFINE(hcoldunreftestspeculative)
112 PDPOL_EVCNT_DEFINE(hcoldhot)
113
114 PDPOL_EVCNT_DEFINE(speculativeenqueue)
115 PDPOL_EVCNT_DEFINE(speculativehit1)
116 PDPOL_EVCNT_DEFINE(speculativehit2)
117 PDPOL_EVCNT_DEFINE(speculativemiss)
118
119 #define PQ_REFERENCED PQ_PRIVATE1
120 #define PQ_HOT PQ_PRIVATE2
121 #define PQ_TEST PQ_PRIVATE3
122 #define PQ_INITIALREF PQ_PRIVATE4
123 #if PQ_PRIVATE6 != PQ_PRIVATE5 * 2 || PQ_PRIVATE7 != PQ_PRIVATE6 * 2
124 #error PQ_PRIVATE
125 #endif
126 #define PQ_QMASK (PQ_PRIVATE5|PQ_PRIVATE6|PQ_PRIVATE7)
127 #define PQ_QFACTOR PQ_PRIVATE5
128 #define PQ_SPECULATIVE PQ_PRIVATE8
129
130 #define CLOCKPRO_NOQUEUE 0
131 #define CLOCKPRO_NEWQ 1 /* small queue to clear initial ref. */
132 #if defined(LISTQ)
133 #define CLOCKPRO_COLDQ 2
134 #define CLOCKPRO_HOTQ 3
135 #else /* defined(LISTQ) */
136 #define CLOCKPRO_COLDQ (2 + coldqidx) /* XXX */
137 #define CLOCKPRO_HOTQ (3 - coldqidx) /* XXX */
138 #endif /* defined(LISTQ) */
139 #define CLOCKPRO_LISTQ 4
140 #define CLOCKPRO_NQUEUE 4
141
142 static inline void
143 clockpro_setq(struct vm_page *pg, int qidx)
144 {
145 KASSERT(qidx >= CLOCKPRO_NOQUEUE);
146 KASSERT(qidx <= CLOCKPRO_NQUEUE);
147
148 pg->pqflags = (pg->pqflags & ~PQ_QMASK) | (qidx * PQ_QFACTOR);
149 }
150
151 static inline int
152 clockpro_getq(struct vm_page *pg)
153 {
154 int qidx;
155
156 qidx = (pg->pqflags & PQ_QMASK) / PQ_QFACTOR;
157 KASSERT(qidx >= CLOCKPRO_NOQUEUE);
158 KASSERT(qidx <= CLOCKPRO_NQUEUE);
159 return qidx;
160 }
161
162 typedef struct {
163 struct pglist q_q;
164 int q_len;
165 } pageq_t;
166
167 struct clockpro_state {
168 int s_npages;
169 int s_coldtarget;
170 int s_ncold;
171
172 int s_newqlenmax;
173 pageq_t s_q[CLOCKPRO_NQUEUE];
174
175 struct uvm_pctparam s_coldtargetpct;
176 };
177
178 static pageq_t *
179 clockpro_queue(struct clockpro_state *s, int qidx)
180 {
181
182 KASSERT(CLOCKPRO_NOQUEUE < qidx);
183 KASSERT(qidx <= CLOCKPRO_NQUEUE);
184
185 return &s->s_q[qidx - 1];
186 }
187
188 #if !defined(LISTQ)
189
190 static int coldqidx;
191
192 static void
193 clockpro_switchqueue(void)
194 {
195
196 coldqidx = 1 - coldqidx;
197 }
198
199 #endif /* !defined(LISTQ) */
200
201 static struct clockpro_state clockpro;
202 static struct clockpro_scanstate {
203 int ss_nscanned;
204 } scanstate;
205
206 /* ---------------------------------------- */
207
208 static void
209 pageq_init(pageq_t *q)
210 {
211
212 TAILQ_INIT(&q->q_q);
213 q->q_len = 0;
214 }
215
216 static int
217 pageq_len(const pageq_t *q)
218 {
219
220 return q->q_len;
221 }
222
223 static struct vm_page *
224 pageq_first(const pageq_t *q)
225 {
226
227 return TAILQ_FIRST(&q->q_q);
228 }
229
230 static void
231 pageq_insert_tail(pageq_t *q, struct vm_page *pg)
232 {
233
234 TAILQ_INSERT_TAIL(&q->q_q, pg, pageq);
235 q->q_len++;
236 }
237
238 static void
239 pageq_insert_head(pageq_t *q, struct vm_page *pg)
240 {
241
242 TAILQ_INSERT_HEAD(&q->q_q, pg, pageq);
243 q->q_len++;
244 }
245
246 static void
247 pageq_remove(pageq_t *q, struct vm_page *pg)
248 {
249
250 #if 1
251 KASSERT(clockpro_queue(&clockpro, clockpro_getq(pg)) == q);
252 #endif
253 KASSERT(q->q_len > 0);
254 TAILQ_REMOVE(&q->q_q, pg, pageq);
255 q->q_len--;
256 }
257
258 static struct vm_page *
259 pageq_remove_head(pageq_t *q)
260 {
261 struct vm_page *pg;
262
263 pg = TAILQ_FIRST(&q->q_q);
264 if (pg == NULL) {
265 KASSERT(q->q_len == 0);
266 return NULL;
267 }
268 pageq_remove(q, pg);
269 return pg;
270 }
271
272 /* ---------------------------------------- */
273
274 static void
275 clockpro_insert_tail(struct clockpro_state *s, int qidx, struct vm_page *pg)
276 {
277 pageq_t *q = clockpro_queue(s, qidx);
278
279 clockpro_setq(pg, qidx);
280 pageq_insert_tail(q, pg);
281 }
282
283 static void
284 clockpro_insert_head(struct clockpro_state *s, int qidx, struct vm_page *pg)
285 {
286 pageq_t *q = clockpro_queue(s, qidx);
287
288 clockpro_setq(pg, qidx);
289 pageq_insert_head(q, pg);
290 }
291
292 /* ---------------------------------------- */
293
294 typedef uint32_t nonres_cookie_t;
295 #define NONRES_COOKIE_INVAL 0
296
297 typedef uintptr_t objid_t;
298
299 /*
300 * XXX maybe these hash functions need reconsideration,
301 * given that hash distribution is critical here.
302 */
303
304 static uint32_t
305 pageidentityhash1(objid_t obj, off_t idx)
306 {
307 uint32_t hash = HASH32_BUF_INIT;
308
309 #if 1
310 hash = hash32_buf(&idx, sizeof(idx), hash);
311 hash = hash32_buf(&obj, sizeof(obj), hash);
312 #else
313 hash = hash32_buf(&obj, sizeof(obj), hash);
314 hash = hash32_buf(&idx, sizeof(idx), hash);
315 #endif
316 return hash;
317 }
318
319 static uint32_t
320 pageidentityhash2(objid_t obj, off_t idx)
321 {
322 uint32_t hash = HASH32_BUF_INIT;
323
324 hash = hash32_buf(&obj, sizeof(obj), hash);
325 hash = hash32_buf(&idx, sizeof(idx), hash);
326 return hash;
327 }
328
329 static nonres_cookie_t
330 calccookie(objid_t obj, off_t idx)
331 {
332 uint32_t hash = pageidentityhash2(obj, idx);
333 nonres_cookie_t cookie = hash;
334
335 if (__predict_false(cookie == NONRES_COOKIE_INVAL)) {
336 cookie++; /* XXX */
337 }
338 return cookie;
339 }
340
341 #define BUCKETSIZE 14
342 struct bucket {
343 int cycle;
344 int cur;
345 nonres_cookie_t pages[BUCKETSIZE];
346 };
347 static int cycle_target;
348 static int cycle_target_frac;
349
350 static struct bucket *buckets;
351 static size_t hashsize;
352
353 static int coldadj;
354 #define COLDTARGET_ADJ(d) coldadj += (d)
355
356 #if defined(PDSIM)
357
358 static void *
359 clockpro_hashalloc(int n)
360 {
361 size_t allocsz = sizeof(*buckets) * n;
362
363 return malloc(allocsz);
364 }
365
366 static void
367 clockpro_hashfree(void *p, int n)
368 {
369
370 free(p);
371 }
372
373 #else /* defined(PDSIM) */
374
375 static void *
376 clockpro_hashalloc(int n)
377 {
378 size_t allocsz = round_page(sizeof(*buckets) * n);
379
380 return (void *)uvm_km_alloc(kernel_map, allocsz, 0, UVM_KMF_WIRED);
381 }
382
383 static void
384 clockpro_hashfree(void *p, int n)
385 {
386 size_t allocsz = round_page(sizeof(*buckets) * n);
387
388 uvm_km_free(kernel_map, (vaddr_t)p, allocsz, UVM_KMF_WIRED);
389 }
390
391 #endif /* defined(PDSIM) */
392
393 static void
394 clockpro_hashinit(uint64_t n)
395 {
396 struct bucket *newbuckets;
397 struct bucket *oldbuckets;
398 size_t sz;
399 size_t oldsz;
400 int i;
401
402 sz = howmany(n, BUCKETSIZE);
403 sz *= clockpro_hashfactor;
404 newbuckets = clockpro_hashalloc(sz);
405 if (newbuckets == NULL) {
406 panic("%s: allocation failure", __func__);
407 }
408 for (i = 0; i < sz; i++) {
409 struct bucket *b = &newbuckets[i];
410 int j;
411
412 b->cycle = cycle_target;
413 b->cur = 0;
414 for (j = 0; j < BUCKETSIZE; j++) {
415 b->pages[j] = NONRES_COOKIE_INVAL;
416 }
417 }
418 /* XXX lock */
419 oldbuckets = buckets;
420 oldsz = hashsize;
421 buckets = newbuckets;
422 hashsize = sz;
423 /* XXX unlock */
424 if (oldbuckets) {
425 clockpro_hashfree(oldbuckets, oldsz);
426 }
427 }
428
429 static struct bucket *
430 nonresident_getbucket(objid_t obj, off_t idx)
431 {
432 uint32_t hash;
433 static struct bucket static_bucket;
434
435 if (hashsize == 0) {
436 return &static_bucket;
437 }
438
439 hash = pageidentityhash1(obj, idx);
440 return &buckets[hash % hashsize];
441 }
442
443 static void
444 nonresident_rotate(struct bucket *b)
445 {
446
447 while (b->cycle - cycle_target < 0) {
448 if (b->pages[b->cur] != NONRES_COOKIE_INVAL) {
449 PDPOL_EVCNT_INCR(nreshandhot);
450 COLDTARGET_ADJ(-1);
451 }
452 b->pages[b->cur] = NONRES_COOKIE_INVAL;
453 b->cur = (b->cur + 1) % BUCKETSIZE;
454 b->cycle++;
455 }
456 }
457
458 static boolean_t
459 nonresident_lookupremove(objid_t obj, off_t idx)
460 {
461 struct bucket *b = nonresident_getbucket(obj, idx);
462 nonres_cookie_t cookie = calccookie(obj, idx);
463 int i;
464
465 nonresident_rotate(b);
466 for (i = 0; i < BUCKETSIZE; i++) {
467 if (b->pages[i] == cookie) {
468 b->pages[i] = NONRES_COOKIE_INVAL;
469 return TRUE;
470 }
471 }
472 return FALSE;
473 }
474
475 static objid_t
476 pageobj(struct vm_page *pg)
477 {
478 const void *obj;
479
480 /*
481 * XXX object pointer is often freed and reused for unrelated object.
482 * for vnodes, it would be better to use something like
483 * a hash of fsid/fileid/generation.
484 */
485
486 obj = pg->uobject;
487 if (obj == NULL) {
488 obj = pg->uanon;
489 KASSERT(obj != NULL);
490 KASSERT(pg->offset == 0);
491 }
492
493 return (objid_t)obj;
494 }
495
496 static off_t
497 pageidx(struct vm_page *pg)
498 {
499
500 KASSERT((pg->offset & PAGE_MASK) == 0);
501 return pg->offset >> PAGE_SHIFT;
502 }
503
504 static boolean_t
505 nonresident_pagelookupremove(struct vm_page *pg)
506 {
507 boolean_t found = nonresident_lookupremove(pageobj(pg), pageidx(pg));
508
509 PDPOL_EVCNT_INCR(nreslookup);
510 if (found) {
511 if (pg->uobject) {
512 PDPOL_EVCNT_INCR(nresfoundobj);
513 } else {
514 PDPOL_EVCNT_INCR(nresfoundanon);
515 }
516 }
517 return found;
518 }
519
520 static void
521 nonresident_pagerecord(struct vm_page *pg)
522 {
523 objid_t obj = pageobj(pg);
524 off_t idx = pageidx(pg);
525 struct bucket *b = nonresident_getbucket(obj, idx);
526 nonres_cookie_t cookie = calccookie(obj, idx);
527
528 #if defined(DEBUG)
529 int i;
530
531 for (i = 0; i < BUCKETSIZE; i++) {
532 if (b->pages[i] == cookie) {
533 PDPOL_EVCNT_INCR(nresconflict);
534 }
535 }
536 #endif /* defined(DEBUG) */
537
538 if (pg->uobject) {
539 PDPOL_EVCNT_INCR(nresrecordobj);
540 } else {
541 PDPOL_EVCNT_INCR(nresrecordanon);
542 }
543 nonresident_rotate(b);
544 if (b->pages[b->cur] != NONRES_COOKIE_INVAL) {
545 PDPOL_EVCNT_INCR(nresoverwritten);
546 COLDTARGET_ADJ(-1);
547 }
548 b->pages[b->cur] = cookie;
549 b->cur = (b->cur + 1) % BUCKETSIZE;
550 }
551
552 /* ---------------------------------------- */
553
554 #if defined(CLOCKPRO_DEBUG)
555 static void
556 check_sanity(void)
557 {
558 }
559 #else /* defined(CLOCKPRO_DEBUG) */
560 #define check_sanity() /* nothing */
561 #endif /* defined(CLOCKPRO_DEBUG) */
562
563 static void
564 clockpro_reinit(void)
565 {
566
567 clockpro_hashinit(uvmexp.npages);
568 }
569
570 static void
571 clockpro_init(void)
572 {
573 struct clockpro_state *s = &clockpro;
574 int i;
575
576 for (i = 0; i < CLOCKPRO_NQUEUE; i++) {
577 pageq_init(&s->s_q[i]);
578 }
579 s->s_newqlenmax = 1;
580 s->s_coldtarget = 1;
581 uvm_pctparam_init(&s->s_coldtargetpct, CLOCKPRO_COLDPCT, NULL);
582 }
583
584 static void
585 clockpro_tune(void)
586 {
587 struct clockpro_state *s = &clockpro;
588 int coldtarget;
589
590 #if defined(ADAPTIVE)
591 int coldmax = s->s_npages * CLOCKPRO_COLDPCTMAX / 100;
592 int coldmin = 1;
593
594 coldtarget = s->s_coldtarget;
595 if (coldtarget + coldadj < coldmin) {
596 coldadj = coldmin - coldtarget;
597 } else if (coldtarget + coldadj > coldmax) {
598 coldadj = coldmax - coldtarget;
599 }
600 coldtarget += coldadj;
601 #else /* defined(ADAPTIVE) */
602 coldtarget = UVM_PCTPARAM_APPLY(&s->s_coldtargetpct, s->s_npages);
603 if (coldtarget < 1) {
604 coldtarget = 1;
605 }
606 #endif /* defined(ADAPTIVE) */
607
608 s->s_coldtarget = coldtarget;
609 s->s_newqlenmax = coldtarget / 4;
610 if (s->s_newqlenmax < CLOCKPRO_NEWQMIN) {
611 s->s_newqlenmax = CLOCKPRO_NEWQMIN;
612 }
613 }
614
615 static void
616 clockpro_movereferencebit(struct vm_page *pg)
617 {
618 boolean_t referenced;
619
620 referenced = pmap_clear_reference(pg);
621 if (referenced) {
622 pg->pqflags |= PQ_REFERENCED;
623 }
624 }
625
626 static void
627 clockpro_clearreferencebit(struct vm_page *pg)
628 {
629
630 clockpro_movereferencebit(pg);
631 pg->pqflags &= ~PQ_REFERENCED;
632 }
633
634 static void
635 clockpro___newqrotate(int len)
636 {
637 struct clockpro_state * const s = &clockpro;
638 pageq_t * const newq = clockpro_queue(s, CLOCKPRO_NEWQ);
639 struct vm_page *pg;
640
641 while (pageq_len(newq) > len) {
642 pg = pageq_remove_head(newq);
643 KASSERT(pg != NULL);
644 KASSERT(clockpro_getq(pg) == CLOCKPRO_NEWQ);
645 if ((pg->pqflags & PQ_INITIALREF) != 0) {
646 clockpro_clearreferencebit(pg);
647 pg->pqflags &= ~PQ_INITIALREF;
648 }
649 /* place at the list head */
650 clockpro_insert_tail(s, CLOCKPRO_COLDQ, pg);
651 }
652 }
653
654 static void
655 clockpro_newqrotate(void)
656 {
657 struct clockpro_state * const s = &clockpro;
658
659 check_sanity();
660 clockpro___newqrotate(s->s_newqlenmax);
661 check_sanity();
662 }
663
664 static void
665 clockpro_newqflush(int n)
666 {
667
668 check_sanity();
669 clockpro___newqrotate(n);
670 check_sanity();
671 }
672
673 static void
674 clockpro_newqflushone(void)
675 {
676 struct clockpro_state * const s = &clockpro;
677
678 clockpro_newqflush(
679 MAX(pageq_len(clockpro_queue(s, CLOCKPRO_NEWQ)) - 1, 0));
680 }
681
682 /*
683 * our "tail" is called "list-head" in the paper.
684 */
685
686 static void
687 clockpro___enqueuetail(struct vm_page *pg)
688 {
689 struct clockpro_state * const s = &clockpro;
690
691 KASSERT(clockpro_getq(pg) == CLOCKPRO_NOQUEUE);
692
693 check_sanity();
694 #if !defined(USEONCE2)
695 clockpro_insert_tail(s, CLOCKPRO_NEWQ, pg);
696 clockpro_newqrotate();
697 #else /* !defined(USEONCE2) */
698 #if defined(LISTQ)
699 KASSERT((pg->pqflags & PQ_REFERENCED) == 0);
700 #endif /* defined(LISTQ) */
701 clockpro_insert_tail(s, CLOCKPRO_COLDQ, pg);
702 #endif /* !defined(USEONCE2) */
703 check_sanity();
704 }
705
706 static void
707 clockpro_pageenqueue(struct vm_page *pg)
708 {
709 struct clockpro_state * const s = &clockpro;
710 boolean_t hot;
711 boolean_t speculative = (pg->pqflags & PQ_SPECULATIVE) != 0; /* XXX */
712
713 KASSERT((~pg->pqflags & (PQ_INITIALREF|PQ_SPECULATIVE)) != 0);
714 UVM_LOCK_ASSERT_PAGEQ();
715 check_sanity();
716 KASSERT(clockpro_getq(pg) == CLOCKPRO_NOQUEUE);
717 s->s_npages++;
718 pg->pqflags &= ~(PQ_HOT|PQ_TEST);
719 if (speculative) {
720 hot = FALSE;
721 PDPOL_EVCNT_INCR(speculativeenqueue);
722 } else {
723 hot = nonresident_pagelookupremove(pg);
724 if (hot) {
725 COLDTARGET_ADJ(1);
726 }
727 }
728
729 /*
730 * consider mmap'ed file:
731 *
732 * - read-ahead enqueues a page.
733 *
734 * - on the following read-ahead hit, the fault handler activates it.
735 *
736 * - finally, the userland code which caused the above fault
737 * actually accesses the page. it makes its reference bit set.
738 *
739 * we want to count the above as a single access, rather than
740 * three accesses with short reuse distances.
741 */
742
743 #if defined(USEONCE2)
744 pg->pqflags &= ~PQ_INITIALREF;
745 if (hot) {
746 pg->pqflags |= PQ_TEST;
747 }
748 s->s_ncold++;
749 clockpro_clearreferencebit(pg);
750 clockpro___enqueuetail(pg);
751 #else /* defined(USEONCE2) */
752 if (speculative) {
753 s->s_ncold++;
754 } else if (hot) {
755 pg->pqflags |= PQ_HOT;
756 } else {
757 pg->pqflags |= PQ_TEST;
758 s->s_ncold++;
759 }
760 clockpro___enqueuetail(pg);
761 #endif /* defined(USEONCE2) */
762 KASSERT(s->s_ncold <= s->s_npages);
763 }
764
765 static pageq_t *
766 clockpro_pagequeue(struct vm_page *pg)
767 {
768 struct clockpro_state * const s = &clockpro;
769 int qidx;
770
771 qidx = clockpro_getq(pg);
772 KASSERT(qidx != CLOCKPRO_NOQUEUE);
773
774 return clockpro_queue(s, qidx);
775 }
776
777 static void
778 clockpro_pagedequeue(struct vm_page *pg)
779 {
780 struct clockpro_state * const s = &clockpro;
781 pageq_t *q;
782
783 KASSERT(s->s_npages > 0);
784 check_sanity();
785 q = clockpro_pagequeue(pg);
786 pageq_remove(q, pg);
787 check_sanity();
788 clockpro_setq(pg, CLOCKPRO_NOQUEUE);
789 if ((pg->pqflags & PQ_HOT) == 0) {
790 KASSERT(s->s_ncold > 0);
791 s->s_ncold--;
792 }
793 KASSERT(s->s_npages > 0);
794 s->s_npages--;
795 check_sanity();
796 }
797
798 static void
799 clockpro_pagerequeue(struct vm_page *pg)
800 {
801 struct clockpro_state * const s = &clockpro;
802 int qidx;
803
804 qidx = clockpro_getq(pg);
805 KASSERT(qidx == CLOCKPRO_HOTQ || qidx == CLOCKPRO_COLDQ);
806 pageq_remove(clockpro_queue(s, qidx), pg);
807 check_sanity();
808 clockpro_setq(pg, CLOCKPRO_NOQUEUE);
809
810 clockpro___enqueuetail(pg);
811 }
812
813 static void
814 handhot_endtest(struct vm_page *pg)
815 {
816
817 KASSERT((pg->pqflags & PQ_HOT) == 0);
818 if ((pg->pqflags & PQ_TEST) != 0) {
819 PDPOL_EVCNT_INCR(hhotcoldtest);
820 COLDTARGET_ADJ(-1);
821 pg->pqflags &= ~PQ_TEST;
822 } else {
823 PDPOL_EVCNT_INCR(hhotcold);
824 }
825 }
826
827 static void
828 handhot_advance(void)
829 {
830 struct clockpro_state * const s = &clockpro;
831 struct vm_page *pg;
832 pageq_t *hotq;
833 int hotqlen;
834
835 clockpro_tune();
836
837 dump("hot called");
838 if (s->s_ncold >= s->s_coldtarget) {
839 return;
840 }
841 hotq = clockpro_queue(s, CLOCKPRO_HOTQ);
842 again:
843 pg = pageq_first(hotq);
844 if (pg == NULL) {
845 DPRINTF("%s: HHOT TAKEOVER\n", __func__);
846 dump("hhottakeover");
847 PDPOL_EVCNT_INCR(hhottakeover);
848 #if defined(LISTQ)
849 while (/* CONSTCOND */ 1) {
850 pageq_t *coldq = clockpro_queue(s, CLOCKPRO_COLDQ);
851
852 pg = pageq_first(coldq);
853 if (pg == NULL) {
854 clockpro_newqflushone();
855 pg = pageq_first(coldq);
856 if (pg == NULL) {
857 WARN("hhot: no page?\n");
858 return;
859 }
860 }
861 KASSERT(clockpro_pagequeue(pg) == coldq);
862 pageq_remove(coldq, pg);
863 check_sanity();
864 if ((pg->pqflags & PQ_HOT) == 0) {
865 handhot_endtest(pg);
866 clockpro_insert_tail(s, CLOCKPRO_LISTQ, pg);
867 } else {
868 clockpro_insert_head(s, CLOCKPRO_HOTQ, pg);
869 break;
870 }
871 }
872 #else /* defined(LISTQ) */
873 clockpro_newqflush(0); /* XXX XXX */
874 clockpro_switchqueue();
875 hotq = clockpro_queue(s, CLOCKPRO_HOTQ);
876 goto again;
877 #endif /* defined(LISTQ) */
878 }
879
880 KASSERT(clockpro_pagequeue(pg) == hotq);
881
882 /*
883 * terminate test period of nonresident pages by cycling them.
884 */
885
886 cycle_target_frac += BUCKETSIZE;
887 hotqlen = pageq_len(hotq);
888 while (cycle_target_frac >= hotqlen) {
889 cycle_target++;
890 cycle_target_frac -= hotqlen;
891 }
892
893 if ((pg->pqflags & PQ_HOT) == 0) {
894 #if defined(LISTQ)
895 panic("cold page in hotq: %p", pg);
896 #else /* defined(LISTQ) */
897 handhot_endtest(pg);
898 goto next;
899 #endif /* defined(LISTQ) */
900 }
901 KASSERT((pg->pqflags & PQ_TEST) == 0);
902 KASSERT((pg->pqflags & PQ_INITIALREF) == 0);
903 KASSERT((pg->pqflags & PQ_SPECULATIVE) == 0);
904
905 /*
906 * once we met our target,
907 * stop at a hot page so that no cold pages in test period
908 * have larger recency than any hot pages.
909 */
910
911 if (s->s_ncold >= s->s_coldtarget) {
912 dump("hot done");
913 return;
914 }
915 clockpro_movereferencebit(pg);
916 if ((pg->pqflags & PQ_REFERENCED) == 0) {
917 PDPOL_EVCNT_INCR(hhotunref);
918 uvmexp.pddeact++;
919 pg->pqflags &= ~PQ_HOT;
920 clockpro.s_ncold++;
921 KASSERT(s->s_ncold <= s->s_npages);
922 } else {
923 PDPOL_EVCNT_INCR(hhotref);
924 }
925 pg->pqflags &= ~PQ_REFERENCED;
926 #if !defined(LISTQ)
927 next:
928 #endif /* !defined(LISTQ) */
929 clockpro_pagerequeue(pg);
930 dump("hot");
931 goto again;
932 }
933
934 static struct vm_page *
935 handcold_advance(void)
936 {
937 struct clockpro_state * const s = &clockpro;
938 struct vm_page *pg;
939
940 for (;;) {
941 pageq_t *listq = clockpro_queue(s, CLOCKPRO_LISTQ);
942 pageq_t *coldq;
943
944 clockpro_newqrotate();
945 handhot_advance();
946 #if defined(LISTQ)
947 pg = pageq_first(listq);
948 if (pg != NULL) {
949 KASSERT(clockpro_getq(pg) == CLOCKPRO_LISTQ);
950 KASSERT((pg->pqflags & PQ_TEST) == 0);
951 KASSERT((pg->pqflags & PQ_HOT) == 0);
952 KASSERT((pg->pqflags & PQ_INITIALREF) == 0);
953 pageq_remove(listq, pg);
954 check_sanity();
955 clockpro_insert_head(s, CLOCKPRO_COLDQ, pg); /* XXX */
956 goto gotcold;
957 }
958 #endif /* defined(LISTQ) */
959 check_sanity();
960 coldq = clockpro_queue(s, CLOCKPRO_COLDQ);
961 pg = pageq_first(coldq);
962 if (pg == NULL) {
963 clockpro_newqflushone();
964 pg = pageq_first(coldq);
965 }
966 if (pg == NULL) {
967 DPRINTF("%s: HCOLD TAKEOVER\n", __func__);
968 dump("hcoldtakeover");
969 PDPOL_EVCNT_INCR(hcoldtakeover);
970 KASSERT(
971 pageq_len(clockpro_queue(s, CLOCKPRO_NEWQ)) == 0);
972 #if defined(LISTQ)
973 KASSERT(
974 pageq_len(clockpro_queue(s, CLOCKPRO_HOTQ)) == 0);
975 #else /* defined(LISTQ) */
976 clockpro_switchqueue();
977 coldq = clockpro_queue(s, CLOCKPRO_COLDQ);
978 pg = pageq_first(coldq);
979 #endif /* defined(LISTQ) */
980 }
981 if (pg == NULL) {
982 WARN("hcold: no page?\n");
983 return NULL;
984 }
985 KASSERT((pg->pqflags & PQ_INITIALREF) == 0);
986 if ((pg->pqflags & PQ_HOT) != 0) {
987 PDPOL_EVCNT_INCR(hcoldhot);
988 pageq_remove(coldq, pg);
989 clockpro_insert_tail(s, CLOCKPRO_HOTQ, pg);
990 check_sanity();
991 KASSERT((pg->pqflags & PQ_TEST) == 0);
992 uvmexp.pdscans++;
993 continue;
994 }
995 #if defined(LISTQ)
996 gotcold:
997 #endif /* defined(LISTQ) */
998 KASSERT((pg->pqflags & PQ_HOT) == 0);
999 uvmexp.pdscans++;
1000 clockpro_movereferencebit(pg);
1001 if ((pg->pqflags & PQ_SPECULATIVE) != 0) {
1002 KASSERT((pg->pqflags & PQ_TEST) == 0);
1003 if ((pg->pqflags & PQ_REFERENCED) != 0) {
1004 PDPOL_EVCNT_INCR(speculativehit2);
1005 pg->pqflags &= ~(PQ_SPECULATIVE|PQ_REFERENCED);
1006 clockpro_pagedequeue(pg);
1007 clockpro_pageenqueue(pg);
1008 continue;
1009 }
1010 PDPOL_EVCNT_INCR(speculativemiss);
1011 }
1012 switch (pg->pqflags & (PQ_REFERENCED|PQ_TEST)) {
1013 case PQ_TEST:
1014 PDPOL_EVCNT_INCR(hcoldunreftest);
1015 nonresident_pagerecord(pg);
1016 goto gotit;
1017 case 0:
1018 PDPOL_EVCNT_INCR(hcoldunref);
1019 gotit:
1020 KASSERT(s->s_ncold > 0);
1021 clockpro_pagerequeue(pg); /* XXX */
1022 dump("cold done");
1023 /* XXX "pg" is still in queue */
1024 handhot_advance();
1025 goto done;
1026
1027 case PQ_REFERENCED|PQ_TEST:
1028 PDPOL_EVCNT_INCR(hcoldreftest);
1029 s->s_ncold--;
1030 COLDTARGET_ADJ(1);
1031 pg->pqflags |= PQ_HOT;
1032 pg->pqflags &= ~PQ_TEST;
1033 break;
1034
1035 case PQ_REFERENCED:
1036 PDPOL_EVCNT_INCR(hcoldref);
1037 pg->pqflags |= PQ_TEST;
1038 break;
1039 }
1040 pg->pqflags &= ~PQ_REFERENCED;
1041 uvmexp.pdreact++;
1042 /* move to the list head */
1043 clockpro_pagerequeue(pg);
1044 dump("cold");
1045 }
1046 done:;
1047 return pg;
1048 }
1049
1050 void
1051 uvmpdpol_pageactivate(struct vm_page *pg)
1052 {
1053
1054 if (!uvmpdpol_pageisqueued_p(pg)) {
1055 KASSERT((pg->pqflags & PQ_SPECULATIVE) == 0);
1056 pg->pqflags |= PQ_INITIALREF;
1057 clockpro_pageenqueue(pg);
1058 } else if ((pg->pqflags & PQ_SPECULATIVE)) {
1059 PDPOL_EVCNT_INCR(speculativehit1);
1060 pg->pqflags &= ~PQ_SPECULATIVE;
1061 pg->pqflags |= PQ_INITIALREF;
1062 clockpro_pagedequeue(pg);
1063 clockpro_pageenqueue(pg);
1064 }
1065 pg->pqflags |= PQ_REFERENCED;
1066 }
1067
1068 void
1069 uvmpdpol_pagedeactivate(struct vm_page *pg)
1070 {
1071
1072 pg->pqflags &= ~PQ_REFERENCED;
1073 }
1074
1075 void
1076 uvmpdpol_pagedequeue(struct vm_page *pg)
1077 {
1078
1079 if (!uvmpdpol_pageisqueued_p(pg)) {
1080 return;
1081 }
1082 clockpro_pagedequeue(pg);
1083 pg->pqflags &= ~PQ_SPECULATIVE;
1084 }
1085
1086 void
1087 uvmpdpol_pageenqueue(struct vm_page *pg)
1088 {
1089
1090 #if 1
1091 if (uvmpdpol_pageisqueued_p(pg)) {
1092 return;
1093 }
1094 clockpro_clearreferencebit(pg);
1095 pg->pqflags |= PQ_SPECULATIVE;
1096 clockpro_pageenqueue(pg);
1097 #else
1098 uvmpdpol_pageactivate(pg);
1099 #endif
1100 }
1101
1102 void
1103 uvmpdpol_anfree(struct vm_anon *an)
1104 {
1105
1106 KASSERT(an->an_page == NULL);
1107 if (nonresident_lookupremove((objid_t)an, 0)) {
1108 PDPOL_EVCNT_INCR(nresanonfree);
1109 }
1110 }
1111
1112 void
1113 uvmpdpol_init(void)
1114 {
1115
1116 clockpro_init();
1117 }
1118
1119 void
1120 uvmpdpol_reinit(void)
1121 {
1122
1123 clockpro_reinit();
1124 }
1125
1126 void
1127 uvmpdpol_estimatepageable(int *active, int *inactive)
1128 {
1129 struct clockpro_state * const s = &clockpro;
1130
1131 if (active) {
1132 *active = s->s_npages - s->s_ncold;
1133 }
1134 if (inactive) {
1135 *inactive = s->s_ncold;
1136 }
1137 }
1138
1139 boolean_t
1140 uvmpdpol_pageisqueued_p(struct vm_page *pg)
1141 {
1142
1143 return clockpro_getq(pg) != CLOCKPRO_NOQUEUE;
1144 }
1145
1146 void
1147 uvmpdpol_scaninit(void)
1148 {
1149 struct clockpro_scanstate * const ss = &scanstate;
1150
1151 ss->ss_nscanned = 0;
1152 }
1153
1154 struct vm_page *
1155 uvmpdpol_selectvictim(void)
1156 {
1157 struct clockpro_state * const s = &clockpro;
1158 struct clockpro_scanstate * const ss = &scanstate;
1159 struct vm_page *pg;
1160
1161 if (ss->ss_nscanned > s->s_npages) {
1162 DPRINTF("scan too much\n");
1163 return NULL;
1164 }
1165 pg = handcold_advance();
1166 ss->ss_nscanned++;
1167 return pg;
1168 }
1169
1170 static void
1171 clockpro_dropswap(pageq_t *q, int *todo)
1172 {
1173 struct vm_page *pg;
1174
1175 TAILQ_FOREACH_REVERSE(pg, &q->q_q, pglist, pageq) {
1176 if (*todo <= 0) {
1177 break;
1178 }
1179 if ((pg->pqflags & PQ_HOT) == 0) {
1180 continue;
1181 }
1182 if ((pg->pqflags & PQ_SWAPBACKED) == 0) {
1183 continue;
1184 }
1185 if (uvmpd_trydropswap(pg)) {
1186 (*todo)--;
1187 }
1188 }
1189 }
1190
1191 void
1192 uvmpdpol_balancequeue(int swap_shortage)
1193 {
1194 struct clockpro_state * const s = &clockpro;
1195 int todo = swap_shortage;
1196
1197 if (todo == 0) {
1198 return;
1199 }
1200
1201 /*
1202 * reclaim swap slots from hot pages
1203 */
1204
1205 DPRINTF("%s: swap_shortage=%d\n", __func__, swap_shortage);
1206
1207 clockpro_dropswap(clockpro_queue(s, CLOCKPRO_NEWQ), &todo);
1208 clockpro_dropswap(clockpro_queue(s, CLOCKPRO_COLDQ), &todo);
1209 clockpro_dropswap(clockpro_queue(s, CLOCKPRO_HOTQ), &todo);
1210
1211 DPRINTF("%s: done=%d\n", __func__, swap_shortage - todo);
1212 }
1213
1214 boolean_t
1215 uvmpdpol_needsscan_p(void)
1216 {
1217 struct clockpro_state * const s = &clockpro;
1218
1219 if (s->s_ncold < s->s_coldtarget) {
1220 return TRUE;
1221 }
1222 return FALSE;
1223 }
1224
1225 void
1226 uvmpdpol_tune(void)
1227 {
1228
1229 clockpro_tune();
1230 }
1231
1232 #if !defined(PDSIM)
1233
1234 #include <sys/sysctl.h> /* XXX SYSCTL_DESCR */
1235
1236 void
1237 uvmpdpol_sysctlsetup(void)
1238 {
1239 #if !defined(ADAPTIVE)
1240 struct clockpro_state * const s = &clockpro;
1241
1242 uvm_pctparam_createsysctlnode(&s->s_coldtargetpct, "coldtargetpct",
1243 SYSCTL_DESCR("Percentage cold target queue of the entire queue"));
1244 #endif /* !defined(ADAPTIVE) */
1245 }
1246
1247 #endif /* !defined(PDSIM) */
1248
1249 #if defined(DDB)
1250
1251 void clockpro_dump(void);
1252
1253 void
1254 clockpro_dump(void)
1255 {
1256 struct clockpro_state * const s = &clockpro;
1257
1258 struct vm_page *pg;
1259 int ncold, nhot, ntest, nspeculative, ninitialref, nref;
1260 int newqlen, coldqlen, hotqlen, listqlen;
1261
1262 newqlen = coldqlen = hotqlen = listqlen = 0;
1263 printf("npages=%d, ncold=%d, coldtarget=%d, newqlenmax=%d\n",
1264 s->s_npages, s->s_ncold, s->s_coldtarget, s->s_newqlenmax);
1265
1266 #define INITCOUNT() \
1267 ncold = nhot = ntest = nspeculative = ninitialref = nref = 0
1268
1269 #define COUNT(pg) \
1270 if ((pg->pqflags & PQ_HOT) != 0) { \
1271 nhot++; \
1272 } else { \
1273 ncold++; \
1274 if ((pg->pqflags & PQ_TEST) != 0) { \
1275 ntest++; \
1276 } \
1277 if ((pg->pqflags & PQ_SPECULATIVE) != 0) { \
1278 nspeculative++; \
1279 } \
1280 if ((pg->pqflags & PQ_INITIALREF) != 0) { \
1281 ninitialref++; \
1282 } else if ((pg->pqflags & PQ_REFERENCED) != 0 || \
1283 pmap_is_referenced(pg)) { \
1284 nref++; \
1285 } \
1286 }
1287
1288 #define PRINTCOUNT(name) \
1289 printf("%s hot=%d, cold=%d, test=%d, speculative=%d, initialref=%d, " \
1290 "nref=%d\n", \
1291 (name), nhot, ncold, ntest, nspeculative, ninitialref, nref)
1292
1293 INITCOUNT();
1294 TAILQ_FOREACH(pg, &clockpro_queue(s, CLOCKPRO_NEWQ)->q_q, pageq) {
1295 if (clockpro_getq(pg) != CLOCKPRO_NEWQ) {
1296 printf("newq corrupt %p\n", pg);
1297 }
1298 COUNT(pg)
1299 newqlen++;
1300 }
1301 PRINTCOUNT("newq");
1302
1303 INITCOUNT();
1304 TAILQ_FOREACH(pg, &clockpro_queue(s, CLOCKPRO_COLDQ)->q_q, pageq) {
1305 if (clockpro_getq(pg) != CLOCKPRO_COLDQ) {
1306 printf("coldq corrupt %p\n", pg);
1307 }
1308 COUNT(pg)
1309 coldqlen++;
1310 }
1311 PRINTCOUNT("coldq");
1312
1313 INITCOUNT();
1314 TAILQ_FOREACH(pg, &clockpro_queue(s, CLOCKPRO_HOTQ)->q_q, pageq) {
1315 if (clockpro_getq(pg) != CLOCKPRO_HOTQ) {
1316 printf("hotq corrupt %p\n", pg);
1317 }
1318 #if defined(LISTQ)
1319 if ((pg->pqflags & PQ_HOT) == 0) {
1320 printf("cold page in hotq: %p\n", pg);
1321 }
1322 #endif /* defined(LISTQ) */
1323 COUNT(pg)
1324 hotqlen++;
1325 }
1326 PRINTCOUNT("hotq");
1327
1328 INITCOUNT();
1329 TAILQ_FOREACH(pg, &clockpro_queue(s, CLOCKPRO_LISTQ)->q_q, pageq) {
1330 #if !defined(LISTQ)
1331 printf("listq %p\n");
1332 #endif /* !defined(LISTQ) */
1333 if (clockpro_getq(pg) != CLOCKPRO_LISTQ) {
1334 printf("listq corrupt %p\n", pg);
1335 }
1336 COUNT(pg)
1337 listqlen++;
1338 }
1339 PRINTCOUNT("listq");
1340
1341 printf("newqlen=%d/%d, coldqlen=%d/%d, hotqlen=%d/%d, listqlen=%d/%d\n",
1342 newqlen, pageq_len(clockpro_queue(s, CLOCKPRO_NEWQ)),
1343 coldqlen, pageq_len(clockpro_queue(s, CLOCKPRO_COLDQ)),
1344 hotqlen, pageq_len(clockpro_queue(s, CLOCKPRO_HOTQ)),
1345 listqlen, pageq_len(clockpro_queue(s, CLOCKPRO_LISTQ)));
1346 }
1347
1348 #endif /* defined(DDB) */
1349
1350 #if defined(PDSIM)
1351 static void
1352 pdsim_dumpq(int qidx)
1353 {
1354 struct clockpro_state * const s = &clockpro;
1355 pageq_t *q = clockpro_queue(s, qidx);
1356 struct vm_page *pg;
1357
1358 TAILQ_FOREACH(pg, &q->q_q, pageq) {
1359 DPRINTF(" %" PRIu64 "%s%s%s%s%s%s",
1360 pg->offset >> PAGE_SHIFT,
1361 (pg->pqflags & PQ_HOT) ? "H" : "",
1362 (pg->pqflags & PQ_TEST) ? "T" : "",
1363 (pg->pqflags & PQ_REFERENCED) ? "R" : "",
1364 pmap_is_referenced(pg) ? "r" : "",
1365 (pg->pqflags & PQ_INITIALREF) ? "I" : "",
1366 (pg->pqflags & PQ_SPECULATIVE) ? "S" : ""
1367 );
1368 }
1369 }
1370
1371 void
1372 pdsim_dump(const char *id)
1373 {
1374 #if defined(DEBUG)
1375 struct clockpro_state * const s = &clockpro;
1376
1377 DPRINTF(" %s L(", id);
1378 pdsim_dumpq(CLOCKPRO_LISTQ);
1379 DPRINTF(" ) H(");
1380 pdsim_dumpq(CLOCKPRO_HOTQ);
1381 DPRINTF(" ) C(");
1382 pdsim_dumpq(CLOCKPRO_COLDQ);
1383 DPRINTF(" ) N(");
1384 pdsim_dumpq(CLOCKPRO_NEWQ);
1385 DPRINTF(" ) ncold=%d/%d, coldadj=%d\n",
1386 s->s_ncold, s->s_coldtarget, coldadj);
1387 #endif /* defined(DEBUG) */
1388 }
1389 #endif /* defined(PDSIM) */
1390