uvm_pdpolicy_clockpro.c revision 1.11 1 /* $NetBSD: uvm_pdpolicy_clockpro.c,v 1.11 2008/01/13 16:28:41 yamt Exp $ */
2
3 /*-
4 * Copyright (c)2005, 2006 YAMAMOTO Takashi,
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29 /*
30 * CLOCK-Pro replacement policy:
31 * http://www.cs.wm.edu/hpcs/WWW/HTML/publications/abs05-3.html
32 *
33 * approximation of the list of non-resident pages using hash:
34 * http://linux-mm.org/ClockProApproximation
35 */
36
37 /* #define CLOCKPRO_DEBUG */
38
39 #if defined(PDSIM)
40
41 #include "pdsim.h"
42
43 #else /* defined(PDSIM) */
44
45 #include <sys/cdefs.h>
46 __KERNEL_RCSID(0, "$NetBSD: uvm_pdpolicy_clockpro.c,v 1.11 2008/01/13 16:28:41 yamt Exp $");
47
48 #include "opt_ddb.h"
49
50 #include <sys/param.h>
51 #include <sys/proc.h>
52 #include <sys/systm.h>
53 #include <sys/kernel.h>
54 #include <sys/hash.h>
55
56 #include <uvm/uvm.h>
57 #include <uvm/uvm_pdpolicy.h>
58 #include <uvm/uvm_pdpolicy_impl.h>
59
60 #if ((__STDC_VERSION__ - 0) >= 199901L)
61 #define DPRINTF(...) /* nothing */
62 #define WARN(...) printf(__VA_ARGS__)
63 #else /* ((__STDC_VERSION__ - 0) >= 199901L) */
64 #define DPRINTF(a...) /* nothing */ /* GCC */
65 #define WARN(a...) printf(a)
66 #endif /* ((__STDC_VERSION__ - 0) >= 199901L) */
67
68 #define dump(a) /* nothing */
69
70 #undef USEONCE2
71 #define LISTQ
72 #undef ADAPTIVE
73
74 #endif /* defined(PDSIM) */
75
76 #if !defined(CLOCKPRO_COLDPCT)
77 #define CLOCKPRO_COLDPCT 10
78 #endif /* !defined(CLOCKPRO_COLDPCT) */
79
80 #define CLOCKPRO_COLDPCTMAX 90
81
82 #if !defined(CLOCKPRO_HASHFACTOR)
83 #define CLOCKPRO_HASHFACTOR 2
84 #endif /* !defined(CLOCKPRO_HASHFACTOR) */
85
86 #define CLOCKPRO_NEWQMIN ((1024 * 1024) >> PAGE_SHIFT) /* XXX */
87
88 int clockpro_hashfactor = CLOCKPRO_HASHFACTOR;
89
90 PDPOL_EVCNT_DEFINE(nresrecordobj)
91 PDPOL_EVCNT_DEFINE(nresrecordanon)
92 PDPOL_EVCNT_DEFINE(nreslookupobj)
93 PDPOL_EVCNT_DEFINE(nreslookupanon)
94 PDPOL_EVCNT_DEFINE(nresfoundobj)
95 PDPOL_EVCNT_DEFINE(nresfoundanon)
96 PDPOL_EVCNT_DEFINE(nresanonfree)
97 PDPOL_EVCNT_DEFINE(nresconflict)
98 PDPOL_EVCNT_DEFINE(nresoverwritten)
99 PDPOL_EVCNT_DEFINE(nreshandhot)
100
101 PDPOL_EVCNT_DEFINE(hhottakeover)
102 PDPOL_EVCNT_DEFINE(hhotref)
103 PDPOL_EVCNT_DEFINE(hhotunref)
104 PDPOL_EVCNT_DEFINE(hhotcold)
105 PDPOL_EVCNT_DEFINE(hhotcoldtest)
106
107 PDPOL_EVCNT_DEFINE(hcoldtakeover)
108 PDPOL_EVCNT_DEFINE(hcoldref)
109 PDPOL_EVCNT_DEFINE(hcoldunref)
110 PDPOL_EVCNT_DEFINE(hcoldreftest)
111 PDPOL_EVCNT_DEFINE(hcoldunreftest)
112 PDPOL_EVCNT_DEFINE(hcoldunreftestspeculative)
113 PDPOL_EVCNT_DEFINE(hcoldhot)
114
115 PDPOL_EVCNT_DEFINE(speculativeenqueue)
116 PDPOL_EVCNT_DEFINE(speculativehit1)
117 PDPOL_EVCNT_DEFINE(speculativehit2)
118 PDPOL_EVCNT_DEFINE(speculativemiss)
119
120 #define PQ_REFERENCED PQ_PRIVATE1
121 #define PQ_HOT PQ_PRIVATE2
122 #define PQ_TEST PQ_PRIVATE3
123 #define PQ_INITIALREF PQ_PRIVATE4
124 #if PQ_PRIVATE6 != PQ_PRIVATE5 * 2 || PQ_PRIVATE7 != PQ_PRIVATE6 * 2
125 #error PQ_PRIVATE
126 #endif
127 #define PQ_QMASK (PQ_PRIVATE5|PQ_PRIVATE6|PQ_PRIVATE7)
128 #define PQ_QFACTOR PQ_PRIVATE5
129 #define PQ_SPECULATIVE PQ_PRIVATE8
130
131 #define CLOCKPRO_NOQUEUE 0
132 #define CLOCKPRO_NEWQ 1 /* small queue to clear initial ref. */
133 #if defined(LISTQ)
134 #define CLOCKPRO_COLDQ 2
135 #define CLOCKPRO_HOTQ 3
136 #else /* defined(LISTQ) */
137 #define CLOCKPRO_COLDQ (2 + coldqidx) /* XXX */
138 #define CLOCKPRO_HOTQ (3 - coldqidx) /* XXX */
139 #endif /* defined(LISTQ) */
140 #define CLOCKPRO_LISTQ 4
141 #define CLOCKPRO_NQUEUE 4
142
143 static inline void
144 clockpro_setq(struct vm_page *pg, int qidx)
145 {
146 KASSERT(qidx >= CLOCKPRO_NOQUEUE);
147 KASSERT(qidx <= CLOCKPRO_NQUEUE);
148
149 pg->pqflags = (pg->pqflags & ~PQ_QMASK) | (qidx * PQ_QFACTOR);
150 }
151
152 static inline int
153 clockpro_getq(struct vm_page *pg)
154 {
155 int qidx;
156
157 qidx = (pg->pqflags & PQ_QMASK) / PQ_QFACTOR;
158 KASSERT(qidx >= CLOCKPRO_NOQUEUE);
159 KASSERT(qidx <= CLOCKPRO_NQUEUE);
160 return qidx;
161 }
162
163 typedef struct {
164 struct pglist q_q;
165 int q_len;
166 } pageq_t;
167
168 struct clockpro_state {
169 int s_npages;
170 int s_coldtarget;
171 int s_ncold;
172
173 int s_newqlenmax;
174 pageq_t s_q[CLOCKPRO_NQUEUE];
175
176 struct uvm_pctparam s_coldtargetpct;
177 };
178
179 static pageq_t *
180 clockpro_queue(struct clockpro_state *s, int qidx)
181 {
182
183 KASSERT(CLOCKPRO_NOQUEUE < qidx);
184 KASSERT(qidx <= CLOCKPRO_NQUEUE);
185
186 return &s->s_q[qidx - 1];
187 }
188
189 #if !defined(LISTQ)
190
191 static int coldqidx;
192
193 static void
194 clockpro_switchqueue(void)
195 {
196
197 coldqidx = 1 - coldqidx;
198 }
199
200 #endif /* !defined(LISTQ) */
201
202 static struct clockpro_state clockpro;
203 static struct clockpro_scanstate {
204 int ss_nscanned;
205 } scanstate;
206
207 /* ---------------------------------------- */
208
209 static void
210 pageq_init(pageq_t *q)
211 {
212
213 TAILQ_INIT(&q->q_q);
214 q->q_len = 0;
215 }
216
217 static int
218 pageq_len(const pageq_t *q)
219 {
220
221 return q->q_len;
222 }
223
224 static struct vm_page *
225 pageq_first(const pageq_t *q)
226 {
227
228 return TAILQ_FIRST(&q->q_q);
229 }
230
231 static void
232 pageq_insert_tail(pageq_t *q, struct vm_page *pg)
233 {
234
235 TAILQ_INSERT_TAIL(&q->q_q, pg, pageq);
236 q->q_len++;
237 }
238
239 static void
240 pageq_insert_head(pageq_t *q, struct vm_page *pg)
241 {
242
243 TAILQ_INSERT_HEAD(&q->q_q, pg, pageq);
244 q->q_len++;
245 }
246
247 static void
248 pageq_remove(pageq_t *q, struct vm_page *pg)
249 {
250
251 #if 1
252 KASSERT(clockpro_queue(&clockpro, clockpro_getq(pg)) == q);
253 #endif
254 KASSERT(q->q_len > 0);
255 TAILQ_REMOVE(&q->q_q, pg, pageq);
256 q->q_len--;
257 }
258
259 static struct vm_page *
260 pageq_remove_head(pageq_t *q)
261 {
262 struct vm_page *pg;
263
264 pg = TAILQ_FIRST(&q->q_q);
265 if (pg == NULL) {
266 KASSERT(q->q_len == 0);
267 return NULL;
268 }
269 pageq_remove(q, pg);
270 return pg;
271 }
272
273 /* ---------------------------------------- */
274
275 static void
276 clockpro_insert_tail(struct clockpro_state *s, int qidx, struct vm_page *pg)
277 {
278 pageq_t *q = clockpro_queue(s, qidx);
279
280 clockpro_setq(pg, qidx);
281 pageq_insert_tail(q, pg);
282 }
283
284 static void
285 clockpro_insert_head(struct clockpro_state *s, int qidx, struct vm_page *pg)
286 {
287 pageq_t *q = clockpro_queue(s, qidx);
288
289 clockpro_setq(pg, qidx);
290 pageq_insert_head(q, pg);
291 }
292
293 /* ---------------------------------------- */
294
295 typedef uint32_t nonres_cookie_t;
296 #define NONRES_COOKIE_INVAL 0
297
298 typedef uintptr_t objid_t;
299
300 /*
301 * XXX maybe these hash functions need reconsideration,
302 * given that hash distribution is critical here.
303 */
304
305 static uint32_t
306 pageidentityhash1(objid_t obj, off_t idx)
307 {
308 uint32_t hash = HASH32_BUF_INIT;
309
310 #if 1
311 hash = hash32_buf(&idx, sizeof(idx), hash);
312 hash = hash32_buf(&obj, sizeof(obj), hash);
313 #else
314 hash = hash32_buf(&obj, sizeof(obj), hash);
315 hash = hash32_buf(&idx, sizeof(idx), hash);
316 #endif
317 return hash;
318 }
319
320 static uint32_t
321 pageidentityhash2(objid_t obj, off_t idx)
322 {
323 uint32_t hash = HASH32_BUF_INIT;
324
325 hash = hash32_buf(&obj, sizeof(obj), hash);
326 hash = hash32_buf(&idx, sizeof(idx), hash);
327 return hash;
328 }
329
330 static nonres_cookie_t
331 calccookie(objid_t obj, off_t idx)
332 {
333 uint32_t hash = pageidentityhash2(obj, idx);
334 nonres_cookie_t cookie = hash;
335
336 if (__predict_false(cookie == NONRES_COOKIE_INVAL)) {
337 cookie++; /* XXX */
338 }
339 return cookie;
340 }
341
342 #define BUCKETSIZE 14
343 struct bucket {
344 int cycle;
345 int cur;
346 nonres_cookie_t pages[BUCKETSIZE];
347 };
348 static int cycle_target;
349 static int cycle_target_frac;
350
351 static struct bucket static_bucket;
352 static struct bucket *buckets = &static_bucket;
353 static size_t hashsize = 1;
354
355 static int coldadj;
356 #define COLDTARGET_ADJ(d) coldadj += (d)
357
358 #if defined(PDSIM)
359
360 static void *
361 clockpro_hashalloc(int n)
362 {
363 size_t allocsz = sizeof(*buckets) * n;
364
365 return malloc(allocsz);
366 }
367
368 static void
369 clockpro_hashfree(void *p, int n)
370 {
371
372 free(p);
373 }
374
375 #else /* defined(PDSIM) */
376
377 static void *
378 clockpro_hashalloc(int n)
379 {
380 size_t allocsz = round_page(sizeof(*buckets) * n);
381
382 return (void *)uvm_km_alloc(kernel_map, allocsz, 0, UVM_KMF_WIRED);
383 }
384
385 static void
386 clockpro_hashfree(void *p, int n)
387 {
388 size_t allocsz = round_page(sizeof(*buckets) * n);
389
390 uvm_km_free(kernel_map, (vaddr_t)p, allocsz, UVM_KMF_WIRED);
391 }
392
393 #endif /* defined(PDSIM) */
394
395 static void
396 clockpro_hashinit(uint64_t n)
397 {
398 struct bucket *newbuckets;
399 struct bucket *oldbuckets;
400 size_t sz;
401 size_t oldsz;
402 int i;
403
404 sz = howmany(n, BUCKETSIZE);
405 sz *= clockpro_hashfactor;
406 newbuckets = clockpro_hashalloc(sz);
407 if (newbuckets == NULL) {
408 panic("%s: allocation failure", __func__);
409 }
410 for (i = 0; i < sz; i++) {
411 struct bucket *b = &newbuckets[i];
412 int j;
413
414 b->cycle = cycle_target;
415 b->cur = 0;
416 for (j = 0; j < BUCKETSIZE; j++) {
417 b->pages[j] = NONRES_COOKIE_INVAL;
418 }
419 }
420 /* XXX lock */
421 oldbuckets = buckets;
422 oldsz = hashsize;
423 buckets = newbuckets;
424 hashsize = sz;
425 /* XXX unlock */
426 if (oldbuckets != &static_bucket) {
427 clockpro_hashfree(oldbuckets, oldsz);
428 }
429 }
430
431 static struct bucket *
432 nonresident_getbucket(objid_t obj, off_t idx)
433 {
434 uint32_t hash;
435
436 hash = pageidentityhash1(obj, idx);
437 return &buckets[hash % hashsize];
438 }
439
440 static void
441 nonresident_rotate(struct bucket *b)
442 {
443 int cycle;
444 int cur;
445
446 cycle = b->cycle;
447 cur = b->cur;
448 while (cycle - cycle_target < 0) {
449 if (b->pages[cur] != NONRES_COOKIE_INVAL) {
450 PDPOL_EVCNT_INCR(nreshandhot);
451 COLDTARGET_ADJ(-1);
452 }
453 b->pages[cur] = NONRES_COOKIE_INVAL;
454 cur++;
455 if (cur == BUCKETSIZE) {
456 cur = 0;
457 }
458 cycle++;
459 }
460 b->cycle = cycle;
461 b->cur = cur;
462 }
463
464 static bool
465 nonresident_lookupremove(objid_t obj, off_t idx)
466 {
467 struct bucket *b = nonresident_getbucket(obj, idx);
468 nonres_cookie_t cookie = calccookie(obj, idx);
469 int i;
470
471 nonresident_rotate(b);
472 for (i = 0; i < BUCKETSIZE; i++) {
473 if (b->pages[i] == cookie) {
474 b->pages[i] = NONRES_COOKIE_INVAL;
475 return true;
476 }
477 }
478 return false;
479 }
480
481 static objid_t
482 pageobj(struct vm_page *pg)
483 {
484 const void *obj;
485
486 /*
487 * XXX object pointer is often freed and reused for unrelated object.
488 * for vnodes, it would be better to use something like
489 * a hash of fsid/fileid/generation.
490 */
491
492 obj = pg->uobject;
493 if (obj == NULL) {
494 obj = pg->uanon;
495 KASSERT(obj != NULL);
496 KASSERT(pg->offset == 0);
497 }
498
499 return (objid_t)obj;
500 }
501
502 static off_t
503 pageidx(struct vm_page *pg)
504 {
505
506 KASSERT((pg->offset & PAGE_MASK) == 0);
507 return pg->offset >> PAGE_SHIFT;
508 }
509
510 static bool
511 nonresident_pagelookupremove(struct vm_page *pg)
512 {
513 bool found = nonresident_lookupremove(pageobj(pg), pageidx(pg));
514
515 if (pg->uobject) {
516 PDPOL_EVCNT_INCR(nreslookupobj);
517 } else {
518 PDPOL_EVCNT_INCR(nreslookupanon);
519 }
520 if (found) {
521 if (pg->uobject) {
522 PDPOL_EVCNT_INCR(nresfoundobj);
523 } else {
524 PDPOL_EVCNT_INCR(nresfoundanon);
525 }
526 }
527 return found;
528 }
529
530 static void
531 nonresident_pagerecord(struct vm_page *pg)
532 {
533 objid_t obj = pageobj(pg);
534 off_t idx = pageidx(pg);
535 struct bucket *b = nonresident_getbucket(obj, idx);
536 nonres_cookie_t cookie = calccookie(obj, idx);
537
538 #if defined(DEBUG)
539 int i;
540
541 for (i = 0; i < BUCKETSIZE; i++) {
542 if (b->pages[i] == cookie) {
543 PDPOL_EVCNT_INCR(nresconflict);
544 }
545 }
546 #endif /* defined(DEBUG) */
547
548 if (pg->uobject) {
549 PDPOL_EVCNT_INCR(nresrecordobj);
550 } else {
551 PDPOL_EVCNT_INCR(nresrecordanon);
552 }
553 nonresident_rotate(b);
554 if (b->pages[b->cur] != NONRES_COOKIE_INVAL) {
555 PDPOL_EVCNT_INCR(nresoverwritten);
556 COLDTARGET_ADJ(-1);
557 }
558 b->pages[b->cur] = cookie;
559 b->cur = (b->cur + 1) % BUCKETSIZE;
560 }
561
562 /* ---------------------------------------- */
563
564 #if defined(CLOCKPRO_DEBUG)
565 static void
566 check_sanity(void)
567 {
568 }
569 #else /* defined(CLOCKPRO_DEBUG) */
570 #define check_sanity() /* nothing */
571 #endif /* defined(CLOCKPRO_DEBUG) */
572
573 static void
574 clockpro_reinit(void)
575 {
576
577 clockpro_hashinit(uvmexp.npages);
578 }
579
580 static void
581 clockpro_init(void)
582 {
583 struct clockpro_state *s = &clockpro;
584 int i;
585
586 for (i = 0; i < CLOCKPRO_NQUEUE; i++) {
587 pageq_init(&s->s_q[i]);
588 }
589 s->s_newqlenmax = 1;
590 s->s_coldtarget = 1;
591 uvm_pctparam_init(&s->s_coldtargetpct, CLOCKPRO_COLDPCT, NULL);
592 }
593
594 static void
595 clockpro_tune(void)
596 {
597 struct clockpro_state *s = &clockpro;
598 int coldtarget;
599
600 #if defined(ADAPTIVE)
601 int coldmax = s->s_npages * CLOCKPRO_COLDPCTMAX / 100;
602 int coldmin = 1;
603
604 coldtarget = s->s_coldtarget;
605 if (coldtarget + coldadj < coldmin) {
606 coldadj = coldmin - coldtarget;
607 } else if (coldtarget + coldadj > coldmax) {
608 coldadj = coldmax - coldtarget;
609 }
610 coldtarget += coldadj;
611 #else /* defined(ADAPTIVE) */
612 coldtarget = UVM_PCTPARAM_APPLY(&s->s_coldtargetpct, s->s_npages);
613 if (coldtarget < 1) {
614 coldtarget = 1;
615 }
616 #endif /* defined(ADAPTIVE) */
617
618 s->s_coldtarget = coldtarget;
619 s->s_newqlenmax = coldtarget / 4;
620 if (s->s_newqlenmax < CLOCKPRO_NEWQMIN) {
621 s->s_newqlenmax = CLOCKPRO_NEWQMIN;
622 }
623 }
624
625 static void
626 clockpro_movereferencebit(struct vm_page *pg)
627 {
628 bool referenced;
629
630 referenced = pmap_clear_reference(pg);
631 if (referenced) {
632 pg->pqflags |= PQ_REFERENCED;
633 }
634 }
635
636 static void
637 clockpro_clearreferencebit(struct vm_page *pg)
638 {
639
640 clockpro_movereferencebit(pg);
641 pg->pqflags &= ~PQ_REFERENCED;
642 }
643
644 static void
645 clockpro___newqrotate(int len)
646 {
647 struct clockpro_state * const s = &clockpro;
648 pageq_t * const newq = clockpro_queue(s, CLOCKPRO_NEWQ);
649 struct vm_page *pg;
650
651 while (pageq_len(newq) > len) {
652 pg = pageq_remove_head(newq);
653 KASSERT(pg != NULL);
654 KASSERT(clockpro_getq(pg) == CLOCKPRO_NEWQ);
655 if ((pg->pqflags & PQ_INITIALREF) != 0) {
656 clockpro_clearreferencebit(pg);
657 pg->pqflags &= ~PQ_INITIALREF;
658 }
659 /* place at the list head */
660 clockpro_insert_tail(s, CLOCKPRO_COLDQ, pg);
661 }
662 }
663
664 static void
665 clockpro_newqrotate(void)
666 {
667 struct clockpro_state * const s = &clockpro;
668
669 check_sanity();
670 clockpro___newqrotate(s->s_newqlenmax);
671 check_sanity();
672 }
673
674 static void
675 clockpro_newqflush(int n)
676 {
677
678 check_sanity();
679 clockpro___newqrotate(n);
680 check_sanity();
681 }
682
683 static void
684 clockpro_newqflushone(void)
685 {
686 struct clockpro_state * const s = &clockpro;
687
688 clockpro_newqflush(
689 MAX(pageq_len(clockpro_queue(s, CLOCKPRO_NEWQ)) - 1, 0));
690 }
691
692 /*
693 * our "tail" is called "list-head" in the paper.
694 */
695
696 static void
697 clockpro___enqueuetail(struct vm_page *pg)
698 {
699 struct clockpro_state * const s = &clockpro;
700
701 KASSERT(clockpro_getq(pg) == CLOCKPRO_NOQUEUE);
702
703 check_sanity();
704 #if !defined(USEONCE2)
705 clockpro_insert_tail(s, CLOCKPRO_NEWQ, pg);
706 clockpro_newqrotate();
707 #else /* !defined(USEONCE2) */
708 #if defined(LISTQ)
709 KASSERT((pg->pqflags & PQ_REFERENCED) == 0);
710 #endif /* defined(LISTQ) */
711 clockpro_insert_tail(s, CLOCKPRO_COLDQ, pg);
712 #endif /* !defined(USEONCE2) */
713 check_sanity();
714 }
715
716 static void
717 clockpro_pageenqueue(struct vm_page *pg)
718 {
719 struct clockpro_state * const s = &clockpro;
720 bool hot;
721 bool speculative = (pg->pqflags & PQ_SPECULATIVE) != 0; /* XXX */
722
723 KASSERT((~pg->pqflags & (PQ_INITIALREF|PQ_SPECULATIVE)) != 0);
724 KASSERT(mutex_owned(&uvm_pageqlock));
725 check_sanity();
726 KASSERT(clockpro_getq(pg) == CLOCKPRO_NOQUEUE);
727 s->s_npages++;
728 pg->pqflags &= ~(PQ_HOT|PQ_TEST);
729 if (speculative) {
730 hot = false;
731 PDPOL_EVCNT_INCR(speculativeenqueue);
732 } else {
733 hot = nonresident_pagelookupremove(pg);
734 if (hot) {
735 COLDTARGET_ADJ(1);
736 }
737 }
738
739 /*
740 * consider mmap'ed file:
741 *
742 * - read-ahead enqueues a page.
743 *
744 * - on the following read-ahead hit, the fault handler activates it.
745 *
746 * - finally, the userland code which caused the above fault
747 * actually accesses the page. it makes its reference bit set.
748 *
749 * we want to count the above as a single access, rather than
750 * three accesses with short reuse distances.
751 */
752
753 #if defined(USEONCE2)
754 pg->pqflags &= ~PQ_INITIALREF;
755 if (hot) {
756 pg->pqflags |= PQ_TEST;
757 }
758 s->s_ncold++;
759 clockpro_clearreferencebit(pg);
760 clockpro___enqueuetail(pg);
761 #else /* defined(USEONCE2) */
762 if (speculative) {
763 s->s_ncold++;
764 } else if (hot) {
765 pg->pqflags |= PQ_HOT;
766 } else {
767 pg->pqflags |= PQ_TEST;
768 s->s_ncold++;
769 }
770 clockpro___enqueuetail(pg);
771 #endif /* defined(USEONCE2) */
772 KASSERT(s->s_ncold <= s->s_npages);
773 }
774
775 static pageq_t *
776 clockpro_pagequeue(struct vm_page *pg)
777 {
778 struct clockpro_state * const s = &clockpro;
779 int qidx;
780
781 qidx = clockpro_getq(pg);
782 KASSERT(qidx != CLOCKPRO_NOQUEUE);
783
784 return clockpro_queue(s, qidx);
785 }
786
787 static void
788 clockpro_pagedequeue(struct vm_page *pg)
789 {
790 struct clockpro_state * const s = &clockpro;
791 pageq_t *q;
792
793 KASSERT(s->s_npages > 0);
794 check_sanity();
795 q = clockpro_pagequeue(pg);
796 pageq_remove(q, pg);
797 check_sanity();
798 clockpro_setq(pg, CLOCKPRO_NOQUEUE);
799 if ((pg->pqflags & PQ_HOT) == 0) {
800 KASSERT(s->s_ncold > 0);
801 s->s_ncold--;
802 }
803 KASSERT(s->s_npages > 0);
804 s->s_npages--;
805 check_sanity();
806 }
807
808 static void
809 clockpro_pagerequeue(struct vm_page *pg)
810 {
811 struct clockpro_state * const s = &clockpro;
812 int qidx;
813
814 qidx = clockpro_getq(pg);
815 KASSERT(qidx == CLOCKPRO_HOTQ || qidx == CLOCKPRO_COLDQ);
816 pageq_remove(clockpro_queue(s, qidx), pg);
817 check_sanity();
818 clockpro_setq(pg, CLOCKPRO_NOQUEUE);
819
820 clockpro___enqueuetail(pg);
821 }
822
823 static void
824 handhot_endtest(struct vm_page *pg)
825 {
826
827 KASSERT((pg->pqflags & PQ_HOT) == 0);
828 if ((pg->pqflags & PQ_TEST) != 0) {
829 PDPOL_EVCNT_INCR(hhotcoldtest);
830 COLDTARGET_ADJ(-1);
831 pg->pqflags &= ~PQ_TEST;
832 } else {
833 PDPOL_EVCNT_INCR(hhotcold);
834 }
835 }
836
837 static void
838 handhot_advance(void)
839 {
840 struct clockpro_state * const s = &clockpro;
841 struct vm_page *pg;
842 pageq_t *hotq;
843 int hotqlen;
844
845 clockpro_tune();
846
847 dump("hot called");
848 if (s->s_ncold >= s->s_coldtarget) {
849 return;
850 }
851 hotq = clockpro_queue(s, CLOCKPRO_HOTQ);
852 again:
853 pg = pageq_first(hotq);
854 if (pg == NULL) {
855 DPRINTF("%s: HHOT TAKEOVER\n", __func__);
856 dump("hhottakeover");
857 PDPOL_EVCNT_INCR(hhottakeover);
858 #if defined(LISTQ)
859 while (/* CONSTCOND */ 1) {
860 pageq_t *coldq = clockpro_queue(s, CLOCKPRO_COLDQ);
861
862 pg = pageq_first(coldq);
863 if (pg == NULL) {
864 clockpro_newqflushone();
865 pg = pageq_first(coldq);
866 if (pg == NULL) {
867 WARN("hhot: no page?\n");
868 return;
869 }
870 }
871 KASSERT(clockpro_pagequeue(pg) == coldq);
872 pageq_remove(coldq, pg);
873 check_sanity();
874 if ((pg->pqflags & PQ_HOT) == 0) {
875 handhot_endtest(pg);
876 clockpro_insert_tail(s, CLOCKPRO_LISTQ, pg);
877 } else {
878 clockpro_insert_head(s, CLOCKPRO_HOTQ, pg);
879 break;
880 }
881 }
882 #else /* defined(LISTQ) */
883 clockpro_newqflush(0); /* XXX XXX */
884 clockpro_switchqueue();
885 hotq = clockpro_queue(s, CLOCKPRO_HOTQ);
886 goto again;
887 #endif /* defined(LISTQ) */
888 }
889
890 KASSERT(clockpro_pagequeue(pg) == hotq);
891
892 /*
893 * terminate test period of nonresident pages by cycling them.
894 */
895
896 cycle_target_frac += BUCKETSIZE;
897 hotqlen = pageq_len(hotq);
898 while (cycle_target_frac >= hotqlen) {
899 cycle_target++;
900 cycle_target_frac -= hotqlen;
901 }
902
903 if ((pg->pqflags & PQ_HOT) == 0) {
904 #if defined(LISTQ)
905 panic("cold page in hotq: %p", pg);
906 #else /* defined(LISTQ) */
907 handhot_endtest(pg);
908 goto next;
909 #endif /* defined(LISTQ) */
910 }
911 KASSERT((pg->pqflags & PQ_TEST) == 0);
912 KASSERT((pg->pqflags & PQ_INITIALREF) == 0);
913 KASSERT((pg->pqflags & PQ_SPECULATIVE) == 0);
914
915 /*
916 * once we met our target,
917 * stop at a hot page so that no cold pages in test period
918 * have larger recency than any hot pages.
919 */
920
921 if (s->s_ncold >= s->s_coldtarget) {
922 dump("hot done");
923 return;
924 }
925 clockpro_movereferencebit(pg);
926 if ((pg->pqflags & PQ_REFERENCED) == 0) {
927 PDPOL_EVCNT_INCR(hhotunref);
928 uvmexp.pddeact++;
929 pg->pqflags &= ~PQ_HOT;
930 clockpro.s_ncold++;
931 KASSERT(s->s_ncold <= s->s_npages);
932 } else {
933 PDPOL_EVCNT_INCR(hhotref);
934 }
935 pg->pqflags &= ~PQ_REFERENCED;
936 #if !defined(LISTQ)
937 next:
938 #endif /* !defined(LISTQ) */
939 clockpro_pagerequeue(pg);
940 dump("hot");
941 goto again;
942 }
943
944 static struct vm_page *
945 handcold_advance(void)
946 {
947 struct clockpro_state * const s = &clockpro;
948 struct vm_page *pg;
949
950 for (;;) {
951 #if defined(LISTQ)
952 pageq_t *listq = clockpro_queue(s, CLOCKPRO_LISTQ);
953 #endif /* defined(LISTQ) */
954 pageq_t *coldq;
955
956 clockpro_newqrotate();
957 handhot_advance();
958 #if defined(LISTQ)
959 pg = pageq_first(listq);
960 if (pg != NULL) {
961 KASSERT(clockpro_getq(pg) == CLOCKPRO_LISTQ);
962 KASSERT((pg->pqflags & PQ_TEST) == 0);
963 KASSERT((pg->pqflags & PQ_HOT) == 0);
964 KASSERT((pg->pqflags & PQ_INITIALREF) == 0);
965 pageq_remove(listq, pg);
966 check_sanity();
967 clockpro_insert_head(s, CLOCKPRO_COLDQ, pg); /* XXX */
968 goto gotcold;
969 }
970 #endif /* defined(LISTQ) */
971 check_sanity();
972 coldq = clockpro_queue(s, CLOCKPRO_COLDQ);
973 pg = pageq_first(coldq);
974 if (pg == NULL) {
975 clockpro_newqflushone();
976 pg = pageq_first(coldq);
977 }
978 if (pg == NULL) {
979 DPRINTF("%s: HCOLD TAKEOVER\n", __func__);
980 dump("hcoldtakeover");
981 PDPOL_EVCNT_INCR(hcoldtakeover);
982 KASSERT(
983 pageq_len(clockpro_queue(s, CLOCKPRO_NEWQ)) == 0);
984 #if defined(LISTQ)
985 KASSERT(
986 pageq_len(clockpro_queue(s, CLOCKPRO_HOTQ)) == 0);
987 #else /* defined(LISTQ) */
988 clockpro_switchqueue();
989 coldq = clockpro_queue(s, CLOCKPRO_COLDQ);
990 pg = pageq_first(coldq);
991 #endif /* defined(LISTQ) */
992 }
993 if (pg == NULL) {
994 WARN("hcold: no page?\n");
995 return NULL;
996 }
997 KASSERT((pg->pqflags & PQ_INITIALREF) == 0);
998 if ((pg->pqflags & PQ_HOT) != 0) {
999 PDPOL_EVCNT_INCR(hcoldhot);
1000 pageq_remove(coldq, pg);
1001 clockpro_insert_tail(s, CLOCKPRO_HOTQ, pg);
1002 check_sanity();
1003 KASSERT((pg->pqflags & PQ_TEST) == 0);
1004 uvmexp.pdscans++;
1005 continue;
1006 }
1007 #if defined(LISTQ)
1008 gotcold:
1009 #endif /* defined(LISTQ) */
1010 KASSERT((pg->pqflags & PQ_HOT) == 0);
1011 uvmexp.pdscans++;
1012 clockpro_movereferencebit(pg);
1013 if ((pg->pqflags & PQ_SPECULATIVE) != 0) {
1014 KASSERT((pg->pqflags & PQ_TEST) == 0);
1015 if ((pg->pqflags & PQ_REFERENCED) != 0) {
1016 PDPOL_EVCNT_INCR(speculativehit2);
1017 pg->pqflags &= ~(PQ_SPECULATIVE|PQ_REFERENCED);
1018 clockpro_pagedequeue(pg);
1019 clockpro_pageenqueue(pg);
1020 continue;
1021 }
1022 PDPOL_EVCNT_INCR(speculativemiss);
1023 }
1024 switch (pg->pqflags & (PQ_REFERENCED|PQ_TEST)) {
1025 case PQ_TEST:
1026 PDPOL_EVCNT_INCR(hcoldunreftest);
1027 nonresident_pagerecord(pg);
1028 goto gotit;
1029 case 0:
1030 PDPOL_EVCNT_INCR(hcoldunref);
1031 gotit:
1032 KASSERT(s->s_ncold > 0);
1033 clockpro_pagerequeue(pg); /* XXX */
1034 dump("cold done");
1035 /* XXX "pg" is still in queue */
1036 handhot_advance();
1037 goto done;
1038
1039 case PQ_REFERENCED|PQ_TEST:
1040 PDPOL_EVCNT_INCR(hcoldreftest);
1041 s->s_ncold--;
1042 COLDTARGET_ADJ(1);
1043 pg->pqflags |= PQ_HOT;
1044 pg->pqflags &= ~PQ_TEST;
1045 break;
1046
1047 case PQ_REFERENCED:
1048 PDPOL_EVCNT_INCR(hcoldref);
1049 pg->pqflags |= PQ_TEST;
1050 break;
1051 }
1052 pg->pqflags &= ~PQ_REFERENCED;
1053 uvmexp.pdreact++;
1054 /* move to the list head */
1055 clockpro_pagerequeue(pg);
1056 dump("cold");
1057 }
1058 done:;
1059 return pg;
1060 }
1061
1062 void
1063 uvmpdpol_pageactivate(struct vm_page *pg)
1064 {
1065
1066 if (!uvmpdpol_pageisqueued_p(pg)) {
1067 KASSERT((pg->pqflags & PQ_SPECULATIVE) == 0);
1068 pg->pqflags |= PQ_INITIALREF;
1069 clockpro_pageenqueue(pg);
1070 } else if ((pg->pqflags & PQ_SPECULATIVE)) {
1071 PDPOL_EVCNT_INCR(speculativehit1);
1072 pg->pqflags &= ~PQ_SPECULATIVE;
1073 pg->pqflags |= PQ_INITIALREF;
1074 clockpro_pagedequeue(pg);
1075 clockpro_pageenqueue(pg);
1076 }
1077 pg->pqflags |= PQ_REFERENCED;
1078 }
1079
1080 void
1081 uvmpdpol_pagedeactivate(struct vm_page *pg)
1082 {
1083
1084 pg->pqflags &= ~PQ_REFERENCED;
1085 }
1086
1087 void
1088 uvmpdpol_pagedequeue(struct vm_page *pg)
1089 {
1090
1091 if (!uvmpdpol_pageisqueued_p(pg)) {
1092 return;
1093 }
1094 clockpro_pagedequeue(pg);
1095 pg->pqflags &= ~(PQ_INITIALREF|PQ_SPECULATIVE);
1096 }
1097
1098 void
1099 uvmpdpol_pageenqueue(struct vm_page *pg)
1100 {
1101
1102 #if 1
1103 if (uvmpdpol_pageisqueued_p(pg)) {
1104 return;
1105 }
1106 clockpro_clearreferencebit(pg);
1107 pg->pqflags |= PQ_SPECULATIVE;
1108 clockpro_pageenqueue(pg);
1109 #else
1110 uvmpdpol_pageactivate(pg);
1111 #endif
1112 }
1113
1114 void
1115 uvmpdpol_anfree(struct vm_anon *an)
1116 {
1117
1118 KASSERT(an->an_page == NULL);
1119 if (nonresident_lookupremove((objid_t)an, 0)) {
1120 PDPOL_EVCNT_INCR(nresanonfree);
1121 }
1122 }
1123
1124 void
1125 uvmpdpol_init(void)
1126 {
1127
1128 clockpro_init();
1129 }
1130
1131 void
1132 uvmpdpol_reinit(void)
1133 {
1134
1135 clockpro_reinit();
1136 }
1137
1138 void
1139 uvmpdpol_estimatepageable(int *active, int *inactive)
1140 {
1141 struct clockpro_state * const s = &clockpro;
1142
1143 if (active) {
1144 *active = s->s_npages - s->s_ncold;
1145 }
1146 if (inactive) {
1147 *inactive = s->s_ncold;
1148 }
1149 }
1150
1151 bool
1152 uvmpdpol_pageisqueued_p(struct vm_page *pg)
1153 {
1154
1155 return clockpro_getq(pg) != CLOCKPRO_NOQUEUE;
1156 }
1157
1158 void
1159 uvmpdpol_scaninit(void)
1160 {
1161 struct clockpro_scanstate * const ss = &scanstate;
1162
1163 ss->ss_nscanned = 0;
1164 }
1165
1166 struct vm_page *
1167 uvmpdpol_selectvictim(void)
1168 {
1169 struct clockpro_state * const s = &clockpro;
1170 struct clockpro_scanstate * const ss = &scanstate;
1171 struct vm_page *pg;
1172
1173 if (ss->ss_nscanned > s->s_npages) {
1174 DPRINTF("scan too much\n");
1175 return NULL;
1176 }
1177 pg = handcold_advance();
1178 ss->ss_nscanned++;
1179 return pg;
1180 }
1181
1182 static void
1183 clockpro_dropswap(pageq_t *q, int *todo)
1184 {
1185 struct vm_page *pg;
1186
1187 TAILQ_FOREACH_REVERSE(pg, &q->q_q, pglist, pageq) {
1188 if (*todo <= 0) {
1189 break;
1190 }
1191 if ((pg->pqflags & PQ_HOT) == 0) {
1192 continue;
1193 }
1194 if ((pg->pqflags & PQ_SWAPBACKED) == 0) {
1195 continue;
1196 }
1197 if (uvmpd_trydropswap(pg)) {
1198 (*todo)--;
1199 }
1200 }
1201 }
1202
1203 void
1204 uvmpdpol_balancequeue(int swap_shortage)
1205 {
1206 struct clockpro_state * const s = &clockpro;
1207 int todo = swap_shortage;
1208
1209 if (todo == 0) {
1210 return;
1211 }
1212
1213 /*
1214 * reclaim swap slots from hot pages
1215 */
1216
1217 DPRINTF("%s: swap_shortage=%d\n", __func__, swap_shortage);
1218
1219 clockpro_dropswap(clockpro_queue(s, CLOCKPRO_NEWQ), &todo);
1220 clockpro_dropswap(clockpro_queue(s, CLOCKPRO_COLDQ), &todo);
1221 clockpro_dropswap(clockpro_queue(s, CLOCKPRO_HOTQ), &todo);
1222
1223 DPRINTF("%s: done=%d\n", __func__, swap_shortage - todo);
1224 }
1225
1226 bool
1227 uvmpdpol_needsscan_p(void)
1228 {
1229 struct clockpro_state * const s = &clockpro;
1230
1231 if (s->s_ncold < s->s_coldtarget) {
1232 return true;
1233 }
1234 return false;
1235 }
1236
1237 void
1238 uvmpdpol_tune(void)
1239 {
1240
1241 clockpro_tune();
1242 }
1243
1244 #if !defined(PDSIM)
1245
1246 #include <sys/sysctl.h> /* XXX SYSCTL_DESCR */
1247
1248 void
1249 uvmpdpol_sysctlsetup(void)
1250 {
1251 #if !defined(ADAPTIVE)
1252 struct clockpro_state * const s = &clockpro;
1253
1254 uvm_pctparam_createsysctlnode(&s->s_coldtargetpct, "coldtargetpct",
1255 SYSCTL_DESCR("Percentage cold target queue of the entire queue"));
1256 #endif /* !defined(ADAPTIVE) */
1257 }
1258
1259 #endif /* !defined(PDSIM) */
1260
1261 #if defined(DDB)
1262
1263 void clockpro_dump(void);
1264
1265 void
1266 clockpro_dump(void)
1267 {
1268 struct clockpro_state * const s = &clockpro;
1269
1270 struct vm_page *pg;
1271 int ncold, nhot, ntest, nspeculative, ninitialref, nref;
1272 int newqlen, coldqlen, hotqlen, listqlen;
1273
1274 newqlen = coldqlen = hotqlen = listqlen = 0;
1275 printf("npages=%d, ncold=%d, coldtarget=%d, newqlenmax=%d\n",
1276 s->s_npages, s->s_ncold, s->s_coldtarget, s->s_newqlenmax);
1277
1278 #define INITCOUNT() \
1279 ncold = nhot = ntest = nspeculative = ninitialref = nref = 0
1280
1281 #define COUNT(pg) \
1282 if ((pg->pqflags & PQ_HOT) != 0) { \
1283 nhot++; \
1284 } else { \
1285 ncold++; \
1286 if ((pg->pqflags & PQ_TEST) != 0) { \
1287 ntest++; \
1288 } \
1289 if ((pg->pqflags & PQ_SPECULATIVE) != 0) { \
1290 nspeculative++; \
1291 } \
1292 if ((pg->pqflags & PQ_INITIALREF) != 0) { \
1293 ninitialref++; \
1294 } else if ((pg->pqflags & PQ_REFERENCED) != 0 || \
1295 pmap_is_referenced(pg)) { \
1296 nref++; \
1297 } \
1298 }
1299
1300 #define PRINTCOUNT(name) \
1301 printf("%s hot=%d, cold=%d, test=%d, speculative=%d, initialref=%d, " \
1302 "nref=%d\n", \
1303 (name), nhot, ncold, ntest, nspeculative, ninitialref, nref)
1304
1305 INITCOUNT();
1306 TAILQ_FOREACH(pg, &clockpro_queue(s, CLOCKPRO_NEWQ)->q_q, pageq) {
1307 if (clockpro_getq(pg) != CLOCKPRO_NEWQ) {
1308 printf("newq corrupt %p\n", pg);
1309 }
1310 COUNT(pg)
1311 newqlen++;
1312 }
1313 PRINTCOUNT("newq");
1314
1315 INITCOUNT();
1316 TAILQ_FOREACH(pg, &clockpro_queue(s, CLOCKPRO_COLDQ)->q_q, pageq) {
1317 if (clockpro_getq(pg) != CLOCKPRO_COLDQ) {
1318 printf("coldq corrupt %p\n", pg);
1319 }
1320 COUNT(pg)
1321 coldqlen++;
1322 }
1323 PRINTCOUNT("coldq");
1324
1325 INITCOUNT();
1326 TAILQ_FOREACH(pg, &clockpro_queue(s, CLOCKPRO_HOTQ)->q_q, pageq) {
1327 if (clockpro_getq(pg) != CLOCKPRO_HOTQ) {
1328 printf("hotq corrupt %p\n", pg);
1329 }
1330 #if defined(LISTQ)
1331 if ((pg->pqflags & PQ_HOT) == 0) {
1332 printf("cold page in hotq: %p\n", pg);
1333 }
1334 #endif /* defined(LISTQ) */
1335 COUNT(pg)
1336 hotqlen++;
1337 }
1338 PRINTCOUNT("hotq");
1339
1340 INITCOUNT();
1341 TAILQ_FOREACH(pg, &clockpro_queue(s, CLOCKPRO_LISTQ)->q_q, pageq) {
1342 #if !defined(LISTQ)
1343 printf("listq %p\n");
1344 #endif /* !defined(LISTQ) */
1345 if (clockpro_getq(pg) != CLOCKPRO_LISTQ) {
1346 printf("listq corrupt %p\n", pg);
1347 }
1348 COUNT(pg)
1349 listqlen++;
1350 }
1351 PRINTCOUNT("listq");
1352
1353 printf("newqlen=%d/%d, coldqlen=%d/%d, hotqlen=%d/%d, listqlen=%d/%d\n",
1354 newqlen, pageq_len(clockpro_queue(s, CLOCKPRO_NEWQ)),
1355 coldqlen, pageq_len(clockpro_queue(s, CLOCKPRO_COLDQ)),
1356 hotqlen, pageq_len(clockpro_queue(s, CLOCKPRO_HOTQ)),
1357 listqlen, pageq_len(clockpro_queue(s, CLOCKPRO_LISTQ)));
1358 }
1359
1360 #endif /* defined(DDB) */
1361
1362 #if defined(PDSIM)
1363 #if defined(DEBUG)
1364 static void
1365 pdsim_dumpq(int qidx)
1366 {
1367 struct clockpro_state * const s = &clockpro;
1368 pageq_t *q = clockpro_queue(s, qidx);
1369 struct vm_page *pg;
1370
1371 TAILQ_FOREACH(pg, &q->q_q, pageq) {
1372 DPRINTF(" %" PRIu64 "%s%s%s%s%s%s",
1373 pg->offset >> PAGE_SHIFT,
1374 (pg->pqflags & PQ_HOT) ? "H" : "",
1375 (pg->pqflags & PQ_TEST) ? "T" : "",
1376 (pg->pqflags & PQ_REFERENCED) ? "R" : "",
1377 pmap_is_referenced(pg) ? "r" : "",
1378 (pg->pqflags & PQ_INITIALREF) ? "I" : "",
1379 (pg->pqflags & PQ_SPECULATIVE) ? "S" : ""
1380 );
1381 }
1382 }
1383 #endif /* defined(DEBUG) */
1384
1385 void
1386 pdsim_dump(const char *id)
1387 {
1388 #if defined(DEBUG)
1389 struct clockpro_state * const s = &clockpro;
1390
1391 DPRINTF(" %s L(", id);
1392 pdsim_dumpq(CLOCKPRO_LISTQ);
1393 DPRINTF(" ) H(");
1394 pdsim_dumpq(CLOCKPRO_HOTQ);
1395 DPRINTF(" ) C(");
1396 pdsim_dumpq(CLOCKPRO_COLDQ);
1397 DPRINTF(" ) N(");
1398 pdsim_dumpq(CLOCKPRO_NEWQ);
1399 DPRINTF(" ) ncold=%d/%d, coldadj=%d\n",
1400 s->s_ncold, s->s_coldtarget, coldadj);
1401 #endif /* defined(DEBUG) */
1402 }
1403 #endif /* defined(PDSIM) */
1404