uvm_pdpolicy_clockpro.c revision 1.1.2.8 1 /* $NetBSD: uvm_pdpolicy_clockpro.c,v 1.1.2.8 2006/03/21 11:05:22 yamt Exp $ */
2
3 /*-
4 * Copyright (c)2005, 2006 YAMAMOTO Takashi,
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29 /*
30 * CLOCK-Pro replacement policy:
31 * http://www.cs.wm.edu/hpcs/WWW/HTML/publications/abs05-3.html
32 *
33 * approximation of the list of non-resident pages using hash:
34 * http://linux-mm.org/ClockProApproximation
35 */
36
37 /* #define CLOCKPRO_DEBUG */
38
39 #if defined(PDSIM)
40
41 #include "pdsim.h"
42
43 #else /* defined(PDSIM) */
44
45 #include <sys/cdefs.h>
46 __KERNEL_RCSID(0, "$NetBSD: uvm_pdpolicy_clockpro.c,v 1.1.2.8 2006/03/21 11:05:22 yamt Exp $");
47
48 #include "opt_ddb.h"
49
50 #include <sys/param.h>
51 #include <sys/proc.h>
52 #include <sys/systm.h>
53 #include <sys/kernel.h>
54 #include <sys/vnode.h>
55 #include <sys/hash.h>
56
57 #include <uvm/uvm.h>
58 #include <uvm/uvm_pdpolicy_impl.h>
59
60 #if ((__STDC_VERSION__ - 0) >= 199901L)
61 #define DPRINTF(...) /* nothing */
62 #define WARN(...) printf(__VA_ARGS__)
63 #else /* ((__STDC_VERSION__ - 0) >= 199901L) */
64 #define DPRINTF(a...) /* nothing */ /* GCC */
65 #define WARN(a...) printf(a)
66 #endif /* ((__STDC_VERSION__ - 0) >= 199901L) */
67
68 #define dump(a) /* nothing */
69
70 #undef USEONCE2
71 #define LISTQ
72 #undef ADAPTIVE
73
74 #endif /* defined(PDSIM) */
75
76 #if !defined(CLOCKPRO_COLDPCT)
77 #define CLOCKPRO_COLDPCT 10
78 #endif /* !defined(CLOCKPRO_COLDPCT) */
79
80 #define CLOCKPRO_COLDPCTMAX 90
81
82 #if !defined(CLOCKPRO_HASHFACTOR)
83 #define CLOCKPRO_HASHFACTOR 2
84 #endif /* !defined(CLOCKPRO_HASHFACTOR) */
85
86 #define CLOCKPRO_NEWQMIN ((1024 * 1024) >> PAGE_SHIFT) /* XXX */
87
88 int clockpro_hashfactor = CLOCKPRO_HASHFACTOR;
89
90 PDPOL_EVCNT_DEFINE(nresrecord)
91 PDPOL_EVCNT_DEFINE(nreslookup)
92 PDPOL_EVCNT_DEFINE(nresfoundobj)
93 PDPOL_EVCNT_DEFINE(nresfoundanon)
94 PDPOL_EVCNT_DEFINE(nresanonfree)
95 PDPOL_EVCNT_DEFINE(nresconflict)
96 PDPOL_EVCNT_DEFINE(nresoverwritten)
97 PDPOL_EVCNT_DEFINE(nreshandhot)
98
99 PDPOL_EVCNT_DEFINE(hhottakeover)
100 PDPOL_EVCNT_DEFINE(hhotref)
101 PDPOL_EVCNT_DEFINE(hhotunref)
102 PDPOL_EVCNT_DEFINE(hhotcold)
103 PDPOL_EVCNT_DEFINE(hhotcoldtest)
104
105 PDPOL_EVCNT_DEFINE(hcoldtakeover)
106 PDPOL_EVCNT_DEFINE(hcoldref)
107 PDPOL_EVCNT_DEFINE(hcoldunref)
108 PDPOL_EVCNT_DEFINE(hcoldreftest)
109 PDPOL_EVCNT_DEFINE(hcoldunreftest)
110 PDPOL_EVCNT_DEFINE(hcoldunreftestspeculative)
111 PDPOL_EVCNT_DEFINE(hcoldhot)
112
113 PDPOL_EVCNT_DEFINE(speculativeenqueue)
114 PDPOL_EVCNT_DEFINE(speculativehit1)
115 PDPOL_EVCNT_DEFINE(speculativehit2)
116 PDPOL_EVCNT_DEFINE(speculativemiss)
117
118 #define PQ_REFERENCED PQ_PRIVATE1
119 #define PQ_HOT PQ_PRIVATE2
120 #define PQ_TEST PQ_PRIVATE3
121 #define PQ_INITIALREF PQ_PRIVATE4
122 #if PQ_PRIVATE6 != PQ_PRIVATE5 * 2 || PQ_PRIVATE7 != PQ_PRIVATE6 * 2
123 #error PQ_PRIVATE
124 #endif
125 #define PQ_QMASK (PQ_PRIVATE5|PQ_PRIVATE6|PQ_PRIVATE7)
126 #define PQ_QFACTOR PQ_PRIVATE5
127 #define PQ_SPECULATIVE PQ_PRIVATE8
128
129 #define CLOCKPRO_NOQUEUE 0
130 #define CLOCKPRO_NEWQ 1 /* small queue to clear initial ref. */
131 #if defined(LISTQ)
132 #define CLOCKPRO_COLDQ 2
133 #define CLOCKPRO_HOTQ 3
134 #else /* defined(LISTQ) */
135 #define CLOCKPRO_COLDQ (2 + coldqidx) /* XXX */
136 #define CLOCKPRO_HOTQ (3 - coldqidx) /* XXX */
137 #endif /* defined(LISTQ) */
138 #define CLOCKPRO_LISTQ 4
139 #define CLOCKPRO_NQUEUE 4
140
141 static inline void
142 clockpro_setq(struct vm_page *pg, int qidx)
143 {
144 KASSERT(qidx >= CLOCKPRO_NOQUEUE);
145 KASSERT(qidx <= CLOCKPRO_NQUEUE);
146
147 pg->pqflags = (pg->pqflags & ~PQ_QMASK) | (qidx * PQ_QFACTOR);
148 }
149
150 static inline int
151 clockpro_getq(struct vm_page *pg)
152 {
153 int qidx;
154
155 qidx = (pg->pqflags & PQ_QMASK) / PQ_QFACTOR;
156 KASSERT(qidx >= CLOCKPRO_NOQUEUE);
157 KASSERT(qidx <= CLOCKPRO_NQUEUE);
158 return qidx;
159 }
160
161 typedef struct {
162 struct pglist q_q;
163 int q_len;
164 } pageq_t;
165
166 struct clockpro_state {
167 int s_npages;
168 int s_coldtarget;
169 int s_ncold;
170
171 int s_newqlenmax;
172 pageq_t s_q[CLOCKPRO_NQUEUE];
173
174 struct uvm_pctparam s_coldtargetpct;
175 };
176
177 static pageq_t *
178 clockpro_queue(struct clockpro_state *s, int qidx)
179 {
180
181 KASSERT(CLOCKPRO_NOQUEUE < qidx);
182 KASSERT(qidx <= CLOCKPRO_NQUEUE);
183
184 return &s->s_q[qidx - 1];
185 }
186
187 #if !defined(LISTQ)
188
189 static int coldqidx;
190
191 static void
192 clockpro_switchqueue(void)
193 {
194
195 coldqidx = 1 - coldqidx;
196 }
197
198 #endif /* !defined(LISTQ) */
199
200 static struct clockpro_state clockpro;
201 static struct clockpro_scanstate {
202 int ss_nscanned;
203 } scanstate;
204
205 /* ---------------------------------------- */
206
207 static void
208 pageq_init(pageq_t *q)
209 {
210
211 TAILQ_INIT(&q->q_q);
212 q->q_len = 0;
213 }
214
215 static int
216 pageq_len(const pageq_t *q)
217 {
218
219 return q->q_len;
220 }
221
222 static struct vm_page *
223 pageq_first(const pageq_t *q)
224 {
225
226 return TAILQ_FIRST(&q->q_q);
227 }
228
229 static void
230 pageq_insert_tail(pageq_t *q, struct vm_page *pg)
231 {
232
233 TAILQ_INSERT_TAIL(&q->q_q, pg, pageq);
234 q->q_len++;
235 }
236
237 static void
238 pageq_insert_head(pageq_t *q, struct vm_page *pg)
239 {
240
241 TAILQ_INSERT_HEAD(&q->q_q, pg, pageq);
242 q->q_len++;
243 }
244
245 static void
246 pageq_remove(pageq_t *q, struct vm_page *pg)
247 {
248
249 #if 1
250 KASSERT(clockpro_queue(&clockpro, clockpro_getq(pg)) == q);
251 #endif
252 KASSERT(q->q_len > 0);
253 TAILQ_REMOVE(&q->q_q, pg, pageq);
254 q->q_len--;
255 }
256
257 static struct vm_page *
258 pageq_remove_head(pageq_t *q)
259 {
260 struct vm_page *pg;
261
262 pg = TAILQ_FIRST(&q->q_q);
263 if (pg == NULL) {
264 KASSERT(q->q_len == 0);
265 return NULL;
266 }
267 pageq_remove(q, pg);
268 return pg;
269 }
270
271 /* ---------------------------------------- */
272
273 static void
274 clockpro_insert_tail(struct clockpro_state *s, int qidx, struct vm_page *pg)
275 {
276 pageq_t *q = clockpro_queue(s, qidx);
277
278 clockpro_setq(pg, qidx);
279 pageq_insert_tail(q, pg);
280 }
281
282 static void
283 clockpro_insert_head(struct clockpro_state *s, int qidx, struct vm_page *pg)
284 {
285 pageq_t *q = clockpro_queue(s, qidx);
286
287 clockpro_setq(pg, qidx);
288 pageq_insert_head(q, pg);
289 }
290
291 /* ---------------------------------------- */
292
293 typedef uint32_t nonres_cookie_t;
294 #define NONRES_COOKIE_INVAL 0
295
296 typedef uintptr_t objid_t;
297
298 /*
299 * XXX maybe these hash functions need reconsideration,
300 * given that hash distribution is critical here.
301 */
302
303 static uint32_t
304 pageidentityhash1(objid_t obj, off_t idx)
305 {
306 uint32_t hash = HASH32_BUF_INIT;
307
308 #if 1
309 hash = hash32_buf(&idx, sizeof(idx), hash);
310 hash = hash32_buf(&obj, sizeof(obj), hash);
311 #else
312 hash = hash32_buf(&obj, sizeof(obj), hash);
313 hash = hash32_buf(&idx, sizeof(idx), hash);
314 #endif
315 return hash;
316 }
317
318 static uint32_t
319 pageidentityhash2(objid_t obj, off_t idx)
320 {
321 uint32_t hash = HASH32_BUF_INIT;
322
323 hash = hash32_buf(&obj, sizeof(obj), hash);
324 hash = hash32_buf(&idx, sizeof(idx), hash);
325 return hash;
326 }
327
328 static nonres_cookie_t
329 calccookie(objid_t obj, off_t idx)
330 {
331 uint32_t hash = pageidentityhash2(obj, idx);
332 nonres_cookie_t cookie = hash;
333
334 if (__predict_false(cookie == NONRES_COOKIE_INVAL)) {
335 cookie++; /* XXX */
336 }
337 return cookie;
338 }
339
340 #define BUCKETSIZE 14
341 struct bucket {
342 int cycle;
343 int cur;
344 nonres_cookie_t pages[BUCKETSIZE];
345 };
346 static int cycle_target;
347 static int cycle_target_frac;
348
349 static struct bucket *buckets;
350 static size_t hashsize;
351
352 static int coldadj;
353 #define COLDTARGET_ADJ(d) coldadj += (d)
354
355 #if defined(PDSIM)
356
357 static void *
358 clockpro_hashalloc(int n)
359 {
360 size_t allocsz = sizeof(*buckets) * n;
361
362 return malloc(allocsz);
363 }
364
365 static void
366 clockpro_hashfree(void *p, int n)
367 {
368
369 free(p);
370 }
371
372 #else /* defined(PDSIM) */
373
374 static void *
375 clockpro_hashalloc(int n)
376 {
377 size_t allocsz = round_page(sizeof(*buckets) * n);
378
379 return (void *)uvm_km_alloc(kernel_map, allocsz, 0, UVM_KMF_WIRED);
380 }
381
382 static void
383 clockpro_hashfree(void *p, int n)
384 {
385 size_t allocsz = round_page(sizeof(*buckets) * n);
386
387 uvm_km_free(kernel_map, (vaddr_t)p, allocsz, UVM_KMF_WIRED);
388 }
389
390 #endif /* defined(PDSIM) */
391
392 static void
393 clockpro_hashinit(uint64_t n)
394 {
395 struct bucket *newbuckets;
396 struct bucket *oldbuckets;
397 size_t sz;
398 size_t oldsz;
399 int i;
400
401 sz = howmany(n, BUCKETSIZE);
402 sz *= clockpro_hashfactor;
403 newbuckets = clockpro_hashalloc(sz);
404 if (newbuckets == NULL) {
405 panic("%s: allocation failure", __func__);
406 }
407 for (i = 0; i < sz; i++) {
408 struct bucket *b = &newbuckets[i];
409 int j;
410
411 b->cycle = cycle_target;
412 b->cur = 0;
413 for (j = 0; j < BUCKETSIZE; j++) {
414 b->pages[j] = NONRES_COOKIE_INVAL;
415 }
416 }
417 /* XXX lock */
418 oldbuckets = buckets;
419 oldsz = hashsize;
420 buckets = newbuckets;
421 hashsize = sz;
422 /* XXX unlock */
423 if (oldbuckets) {
424 clockpro_hashfree(oldbuckets, oldsz);
425 }
426 }
427
428 static struct bucket *
429 nonresident_getbucket(objid_t obj, off_t idx)
430 {
431 uint32_t hash;
432 static struct bucket static_bucket;
433
434 if (hashsize == 0) {
435 return &static_bucket;
436 }
437
438 hash = pageidentityhash1(obj, idx);
439 return &buckets[hash % hashsize];
440 }
441
442 static void
443 nonresident_rotate(struct bucket *b)
444 {
445
446 while (b->cycle - cycle_target < 0) {
447 if (b->pages[b->cur] != NONRES_COOKIE_INVAL) {
448 PDPOL_EVCNT_INCR(nreshandhot);
449 COLDTARGET_ADJ(-1);
450 }
451 b->pages[b->cur] = NONRES_COOKIE_INVAL;
452 b->cur = (b->cur + 1) % BUCKETSIZE;
453 b->cycle++;
454 }
455 }
456
457 static boolean_t
458 nonresident_lookupremove(objid_t obj, off_t idx)
459 {
460 struct bucket *b = nonresident_getbucket(obj, idx);
461 nonres_cookie_t cookie = calccookie(obj, idx);
462 int i;
463
464 nonresident_rotate(b);
465 for (i = 0; i < BUCKETSIZE; i++) {
466 if (b->pages[i] == cookie) {
467 b->pages[i] = NONRES_COOKIE_INVAL;
468 return TRUE;
469 }
470 }
471 return FALSE;
472 }
473
474 static objid_t
475 pageobj(struct vm_page *pg)
476 {
477 const void *obj;
478
479 /*
480 * XXX object pointer is often freed and reused for unrelated object.
481 * for vnodes, it would be better to use something like
482 * a hash of fsid/fileid/generation.
483 */
484
485 obj = pg->uobject;
486 if (obj == NULL) {
487 obj = pg->uanon;
488 KASSERT(obj != NULL);
489 KASSERT(pg->offset == 0);
490 }
491
492 return (objid_t)obj;
493 }
494
495 static off_t
496 pageidx(struct vm_page *pg)
497 {
498
499 KASSERT((pg->offset & PAGE_MASK) == 0);
500 return pg->offset >> PAGE_SHIFT;
501 }
502
503 static boolean_t
504 nonresident_pagelookupremove(struct vm_page *pg)
505 {
506 boolean_t found = nonresident_lookupremove(pageobj(pg), pageidx(pg));
507
508 PDPOL_EVCNT_INCR(nreslookup);
509 if (found) {
510 if (pg->uobject) {
511 PDPOL_EVCNT_INCR(nresfoundobj);
512 } else {
513 PDPOL_EVCNT_INCR(nresfoundanon);
514 }
515 }
516 return found;
517 }
518
519 static void
520 nonresident_pagerecord(struct vm_page *pg)
521 {
522 objid_t obj = pageobj(pg);
523 off_t idx = pageidx(pg);
524 struct bucket *b = nonresident_getbucket(obj, idx);
525 nonres_cookie_t cookie = calccookie(obj, idx);
526
527 #if defined(DEBUG)
528 int i;
529
530 for (i = 0; i < BUCKETSIZE; i++) {
531 if (b->pages[i] == cookie) {
532 PDPOL_EVCNT_INCR(nresconflict);
533 }
534 }
535 #endif /* defined(DEBUG) */
536
537 PDPOL_EVCNT_INCR(nresrecord);
538 nonresident_rotate(b);
539 if (b->pages[b->cur] != NONRES_COOKIE_INVAL) {
540 PDPOL_EVCNT_INCR(nresoverwritten);
541 COLDTARGET_ADJ(-1);
542 }
543 b->pages[b->cur] = cookie;
544 b->cur = (b->cur + 1) % BUCKETSIZE;
545 }
546
547 /* ---------------------------------------- */
548
549 #if defined(CLOCKPRO_DEBUG)
550 static void
551 check_sanity(void)
552 {
553 }
554 #else /* defined(CLOCKPRO_DEBUG) */
555 #define check_sanity() /* nothing */
556 #endif /* defined(CLOCKPRO_DEBUG) */
557
558 static void
559 clockpro_reinit(void)
560 {
561
562 clockpro_hashinit(uvmexp.npages);
563 }
564
565 static void
566 clockpro_init(void)
567 {
568 struct clockpro_state *s = &clockpro;
569 int i;
570
571 for (i = 0; i < CLOCKPRO_NQUEUE; i++) {
572 pageq_init(&s->s_q[i]);
573 }
574 s->s_newqlenmax = 1;
575 s->s_coldtarget = 1;
576 uvm_pctparam_init(&s->s_coldtargetpct, CLOCKPRO_COLDPCT, NULL);
577 }
578
579 static void
580 clockpro_tune(void)
581 {
582 struct clockpro_state *s = &clockpro;
583 int coldtarget;
584
585 #if defined(ADAPTIVE)
586 int coldmax = s->s_npages * CLOCKPRO_COLDPCTMAX / 100;
587 int coldmin = 1;
588
589 coldtarget = s->s_coldtarget;
590 if (coldtarget + coldadj < coldmin) {
591 coldadj = coldmin - coldtarget;
592 } else if (coldtarget + coldadj > coldmax) {
593 coldadj = coldmax - coldtarget;
594 }
595 coldtarget += coldadj;
596 #else /* defined(ADAPTIVE) */
597 coldtarget = UVM_PCTPARAM_APPLY(&s->s_coldtargetpct, s->s_npages);
598 if (coldtarget < 1) {
599 coldtarget = 1;
600 }
601 #endif /* defined(ADAPTIVE) */
602
603 s->s_coldtarget = coldtarget;
604 s->s_newqlenmax = coldtarget / 4;
605 if (s->s_newqlenmax < CLOCKPRO_NEWQMIN) {
606 s->s_newqlenmax = CLOCKPRO_NEWQMIN;
607 }
608 }
609
610 static void
611 clockpro_movereferencebit(struct vm_page *pg)
612 {
613 boolean_t referenced;
614
615 referenced = pmap_clear_reference(pg);
616 if (referenced) {
617 pg->pqflags |= PQ_REFERENCED;
618 }
619 }
620
621 static void
622 clockpro_clearreferencebit(struct vm_page *pg)
623 {
624
625 clockpro_movereferencebit(pg);
626 pg->pqflags &= ~PQ_REFERENCED;
627 }
628
629 static void
630 clockpro___newqrotate(int len)
631 {
632 struct clockpro_state * const s = &clockpro;
633 pageq_t * const newq = clockpro_queue(s, CLOCKPRO_NEWQ);
634 struct vm_page *pg;
635
636 while (pageq_len(newq) > len) {
637 pg = pageq_remove_head(newq);
638 KASSERT(pg != NULL);
639 KASSERT(clockpro_getq(pg) == CLOCKPRO_NEWQ);
640 if ((pg->pqflags & PQ_INITIALREF) != 0) {
641 clockpro_clearreferencebit(pg);
642 pg->pqflags &= ~PQ_INITIALREF;
643 }
644 /* place at the list head */
645 clockpro_insert_tail(s, CLOCKPRO_COLDQ, pg);
646 }
647 }
648
649 static void
650 clockpro_newqrotate(void)
651 {
652 struct clockpro_state * const s = &clockpro;
653
654 check_sanity();
655 clockpro___newqrotate(s->s_newqlenmax);
656 check_sanity();
657 }
658
659 static void
660 clockpro_newqflush(int n)
661 {
662
663 check_sanity();
664 clockpro___newqrotate(n);
665 check_sanity();
666 }
667
668 static void
669 clockpro_newqflushone(void)
670 {
671 struct clockpro_state * const s = &clockpro;
672
673 clockpro_newqflush(
674 MAX(pageq_len(clockpro_queue(s, CLOCKPRO_NEWQ)) - 1, 0));
675 }
676
677 /*
678 * our "tail" is called "list-head" in the paper.
679 */
680
681 static void
682 clockpro___enqueuetail(struct vm_page *pg)
683 {
684 struct clockpro_state * const s = &clockpro;
685
686 KASSERT(clockpro_getq(pg) == CLOCKPRO_NOQUEUE);
687
688 check_sanity();
689 #if !defined(USEONCE2)
690 clockpro_insert_tail(s, CLOCKPRO_NEWQ, pg);
691 clockpro_newqrotate();
692 #else /* !defined(USEONCE2) */
693 #if defined(LISTQ)
694 KASSERT((pg->pqflags & PQ_REFERENCED) == 0);
695 #endif /* defined(LISTQ) */
696 clockpro_insert_tail(s, CLOCKPRO_COLDQ, pg);
697 #endif /* !defined(USEONCE2) */
698 check_sanity();
699 }
700
701 static void
702 clockpro_pageenqueue(struct vm_page *pg)
703 {
704 struct clockpro_state * const s = &clockpro;
705 boolean_t hot;
706 boolean_t speculative = (pg->pqflags & PQ_SPECULATIVE) != 0; /* XXX */
707
708 KASSERT((~pg->pqflags & (PQ_INITIALREF|PQ_SPECULATIVE)) != 0);
709 UVM_LOCK_ASSERT_PAGEQ();
710 check_sanity();
711 KASSERT(clockpro_getq(pg) == CLOCKPRO_NOQUEUE);
712 s->s_npages++;
713 pg->pqflags &= ~(PQ_HOT|PQ_TEST);
714 if (speculative) {
715 hot = FALSE;
716 PDPOL_EVCNT_INCR(speculativeenqueue);
717 } else {
718 hot = nonresident_pagelookupremove(pg);
719 if (hot) {
720 COLDTARGET_ADJ(1);
721 }
722 }
723
724 /*
725 * consider mmap'ed file:
726 *
727 * - read-ahead enqueues a page.
728 *
729 * - on the following read-ahead hit, the fault handler activates it.
730 *
731 * - finally, the userland code which caused the above fault
732 * actually accesses the page. it makes its reference bit set.
733 *
734 * we want to count the above as a single access, rather than
735 * three accesses with short reuse distances.
736 */
737
738 #if defined(USEONCE2)
739 pg->pqflags &= ~PQ_INITIALREF;
740 if (hot) {
741 pg->pqflags |= PQ_TEST;
742 }
743 s->s_ncold++;
744 clockpro_clearreferencebit(pg);
745 clockpro___enqueuetail(pg);
746 #else /* defined(USEONCE2) */
747 if (speculative) {
748 s->s_ncold++;
749 } else if (hot) {
750 pg->pqflags |= PQ_HOT;
751 } else {
752 pg->pqflags |= PQ_TEST;
753 s->s_ncold++;
754 }
755 clockpro___enqueuetail(pg);
756 #endif /* defined(USEONCE2) */
757 KASSERT(s->s_ncold <= s->s_npages);
758 }
759
760 static pageq_t *
761 clockpro_pagequeue(struct vm_page *pg)
762 {
763 struct clockpro_state * const s = &clockpro;
764 int qidx;
765
766 qidx = clockpro_getq(pg);
767 KASSERT(qidx != CLOCKPRO_NOQUEUE);
768
769 return clockpro_queue(s, qidx);
770 }
771
772 static void
773 clockpro_pagedequeue(struct vm_page *pg)
774 {
775 struct clockpro_state * const s = &clockpro;
776 pageq_t *q;
777
778 KASSERT(s->s_npages > 0);
779 check_sanity();
780 q = clockpro_pagequeue(pg);
781 pageq_remove(q, pg);
782 check_sanity();
783 clockpro_setq(pg, CLOCKPRO_NOQUEUE);
784 if ((pg->pqflags & PQ_HOT) == 0) {
785 KASSERT(s->s_ncold > 0);
786 s->s_ncold--;
787 }
788 KASSERT(s->s_npages > 0);
789 s->s_npages--;
790 check_sanity();
791 }
792
793 static void
794 clockpro_pagerequeue(struct vm_page *pg)
795 {
796 struct clockpro_state * const s = &clockpro;
797 int qidx;
798
799 qidx = clockpro_getq(pg);
800 KASSERT(qidx == CLOCKPRO_HOTQ || qidx == CLOCKPRO_COLDQ);
801 pageq_remove(clockpro_queue(s, qidx), pg);
802 check_sanity();
803 clockpro_setq(pg, CLOCKPRO_NOQUEUE);
804
805 clockpro___enqueuetail(pg);
806 }
807
808 static void
809 handhot_endtest(struct vm_page *pg)
810 {
811
812 KASSERT((pg->pqflags & PQ_HOT) == 0);
813 if ((pg->pqflags & PQ_TEST) != 0) {
814 PDPOL_EVCNT_INCR(hhotcoldtest);
815 COLDTARGET_ADJ(-1);
816 pg->pqflags &= ~PQ_TEST;
817 } else {
818 PDPOL_EVCNT_INCR(hhotcold);
819 }
820 }
821
822 static void
823 handhot_advance(void)
824 {
825 struct clockpro_state * const s = &clockpro;
826 struct vm_page *pg;
827 pageq_t *hotq;
828 int hotqlen;
829
830 clockpro_tune();
831
832 dump("hot called");
833 if (s->s_ncold >= s->s_coldtarget) {
834 return;
835 }
836 hotq = clockpro_queue(s, CLOCKPRO_HOTQ);
837 again:
838 pg = pageq_first(hotq);
839 if (pg == NULL) {
840 DPRINTF("%s: HHOT TAKEOVER\n", __func__);
841 dump("hhottakeover");
842 PDPOL_EVCNT_INCR(hhottakeover);
843 #if defined(LISTQ)
844 while (/* CONSTCOND */ 1) {
845 pageq_t *coldq = clockpro_queue(s, CLOCKPRO_COLDQ);
846
847 pg = pageq_first(coldq);
848 if (pg == NULL) {
849 clockpro_newqflushone();
850 pg = pageq_first(coldq);
851 if (pg == NULL) {
852 WARN("hhot: no page?\n");
853 return;
854 }
855 }
856 KASSERT(clockpro_pagequeue(pg) == coldq);
857 pageq_remove(coldq, pg);
858 check_sanity();
859 if ((pg->pqflags & PQ_HOT) == 0) {
860 handhot_endtest(pg);
861 clockpro_insert_tail(s, CLOCKPRO_LISTQ, pg);
862 } else {
863 clockpro_insert_head(s, CLOCKPRO_HOTQ, pg);
864 break;
865 }
866 }
867 #else /* defined(LISTQ) */
868 clockpro_newqflush(0); /* XXX XXX */
869 clockpro_switchqueue();
870 hotq = clockpro_queue(s, CLOCKPRO_HOTQ);
871 goto again;
872 #endif /* defined(LISTQ) */
873 }
874
875 KASSERT(clockpro_pagequeue(pg) == hotq);
876
877 /*
878 * terminate test period of nonresident pages by cycling them.
879 */
880
881 cycle_target_frac += BUCKETSIZE;
882 hotqlen = pageq_len(hotq);
883 while (cycle_target_frac >= hotqlen) {
884 cycle_target++;
885 cycle_target_frac -= hotqlen;
886 }
887
888 if ((pg->pqflags & PQ_HOT) == 0) {
889 #if defined(LISTQ)
890 panic("cold page in hotq: %p", pg);
891 #else /* defined(LISTQ) */
892 handhot_endtest(pg);
893 goto next;
894 #endif /* defined(LISTQ) */
895 }
896 KASSERT((pg->pqflags & PQ_TEST) == 0);
897 KASSERT((pg->pqflags & PQ_INITIALREF) == 0);
898 KASSERT((pg->pqflags & PQ_SPECULATIVE) == 0);
899
900 /*
901 * once we met our target,
902 * stop at a hot page so that no cold pages in test period
903 * have larger recency than any hot pages.
904 */
905
906 if (s->s_ncold >= s->s_coldtarget) {
907 dump("hot done");
908 return;
909 }
910 clockpro_movereferencebit(pg);
911 if ((pg->pqflags & PQ_REFERENCED) == 0) {
912 PDPOL_EVCNT_INCR(hhotunref);
913 uvmexp.pddeact++;
914 pg->pqflags &= ~PQ_HOT;
915 clockpro.s_ncold++;
916 KASSERT(s->s_ncold <= s->s_npages);
917 } else {
918 PDPOL_EVCNT_INCR(hhotref);
919 }
920 pg->pqflags &= ~PQ_REFERENCED;
921 #if !defined(LISTQ)
922 next:
923 #endif /* !defined(LISTQ) */
924 clockpro_pagerequeue(pg);
925 dump("hot");
926 goto again;
927 }
928
929 static struct vm_page *
930 handcold_advance(void)
931 {
932 struct clockpro_state * const s = &clockpro;
933 struct vm_page *pg;
934
935 for (;;) {
936 pageq_t *listq = clockpro_queue(s, CLOCKPRO_LISTQ);
937 pageq_t *coldq;
938
939 clockpro_newqrotate();
940 handhot_advance();
941 #if defined(LISTQ)
942 pg = pageq_first(listq);
943 if (pg != NULL) {
944 KASSERT(clockpro_getq(pg) == CLOCKPRO_LISTQ);
945 KASSERT((pg->pqflags & PQ_TEST) == 0);
946 KASSERT((pg->pqflags & PQ_HOT) == 0);
947 KASSERT((pg->pqflags & PQ_INITIALREF) == 0);
948 pageq_remove(listq, pg);
949 check_sanity();
950 clockpro_insert_head(s, CLOCKPRO_COLDQ, pg); /* XXX */
951 goto gotcold;
952 }
953 #endif /* defined(LISTQ) */
954 check_sanity();
955 coldq = clockpro_queue(s, CLOCKPRO_COLDQ);
956 pg = pageq_first(coldq);
957 if (pg == NULL) {
958 clockpro_newqflushone();
959 pg = pageq_first(coldq);
960 }
961 if (pg == NULL) {
962 DPRINTF("%s: HCOLD TAKEOVER\n", __func__);
963 dump("hcoldtakeover");
964 PDPOL_EVCNT_INCR(hcoldtakeover);
965 KASSERT(
966 pageq_len(clockpro_queue(s, CLOCKPRO_NEWQ)) == 0);
967 #if defined(LISTQ)
968 KASSERT(
969 pageq_len(clockpro_queue(s, CLOCKPRO_HOTQ)) == 0);
970 #else /* defined(LISTQ) */
971 clockpro_switchqueue();
972 coldq = clockpro_queue(s, CLOCKPRO_COLDQ);
973 pg = pageq_first(coldq);
974 #endif /* defined(LISTQ) */
975 }
976 if (pg == NULL) {
977 WARN("hcold: no page?\n");
978 return NULL;
979 }
980 KASSERT((pg->pqflags & PQ_INITIALREF) == 0);
981 if ((pg->pqflags & PQ_HOT) != 0) {
982 PDPOL_EVCNT_INCR(hcoldhot);
983 pageq_remove(coldq, pg);
984 clockpro_insert_tail(s, CLOCKPRO_HOTQ, pg);
985 check_sanity();
986 KASSERT((pg->pqflags & PQ_TEST) == 0);
987 uvmexp.pdscans++;
988 continue;
989 }
990 #if defined(LISTQ)
991 gotcold:
992 #endif /* defined(LISTQ) */
993 KASSERT((pg->pqflags & PQ_HOT) == 0);
994 uvmexp.pdscans++;
995 clockpro_movereferencebit(pg);
996 if ((pg->pqflags & PQ_SPECULATIVE) != 0) {
997 KASSERT((pg->pqflags & PQ_TEST) == 0);
998 if ((pg->pqflags & PQ_REFERENCED) != 0) {
999 PDPOL_EVCNT_INCR(speculativehit2);
1000 pg->pqflags &= ~(PQ_SPECULATIVE|PQ_REFERENCED);
1001 clockpro_pagedequeue(pg);
1002 clockpro_pageenqueue(pg);
1003 continue;
1004 }
1005 PDPOL_EVCNT_INCR(speculativemiss);
1006 }
1007 switch (pg->pqflags & (PQ_REFERENCED|PQ_TEST)) {
1008 case PQ_TEST:
1009 PDPOL_EVCNT_INCR(hcoldunreftest);
1010 nonresident_pagerecord(pg);
1011 goto gotit;
1012 case 0:
1013 PDPOL_EVCNT_INCR(hcoldunref);
1014 gotit:
1015 KASSERT(s->s_ncold > 0);
1016 clockpro_pagerequeue(pg); /* XXX */
1017 dump("cold done");
1018 /* XXX "pg" is still in queue */
1019 handhot_advance();
1020 goto done;
1021
1022 case PQ_REFERENCED|PQ_TEST:
1023 PDPOL_EVCNT_INCR(hcoldreftest);
1024 s->s_ncold--;
1025 COLDTARGET_ADJ(1);
1026 pg->pqflags |= PQ_HOT;
1027 pg->pqflags &= ~PQ_TEST;
1028 break;
1029
1030 case PQ_REFERENCED:
1031 PDPOL_EVCNT_INCR(hcoldref);
1032 pg->pqflags |= PQ_TEST;
1033 break;
1034 }
1035 pg->pqflags &= ~PQ_REFERENCED;
1036 uvmexp.pdreact++;
1037 /* move to the list head */
1038 clockpro_pagerequeue(pg);
1039 dump("cold");
1040 }
1041 done:;
1042 return pg;
1043 }
1044
1045 void
1046 uvmpdpol_pageactivate(struct vm_page *pg)
1047 {
1048
1049 if (!uvmpdpol_pageisqueued_p(pg)) {
1050 KASSERT((pg->pqflags & PQ_SPECULATIVE) == 0);
1051 pg->pqflags |= PQ_INITIALREF;
1052 clockpro_pageenqueue(pg);
1053 } else if ((pg->pqflags & PQ_SPECULATIVE)) {
1054 PDPOL_EVCNT_INCR(speculativehit1);
1055 pg->pqflags &= ~PQ_SPECULATIVE;
1056 pg->pqflags |= PQ_INITIALREF;
1057 clockpro_pagedequeue(pg);
1058 clockpro_pageenqueue(pg);
1059 }
1060 pg->pqflags |= PQ_REFERENCED;
1061 }
1062
1063 void
1064 uvmpdpol_pagedeactivate(struct vm_page *pg)
1065 {
1066
1067 pg->pqflags &= ~PQ_REFERENCED;
1068 }
1069
1070 void
1071 uvmpdpol_pagedequeue(struct vm_page *pg)
1072 {
1073
1074 if (!uvmpdpol_pageisqueued_p(pg)) {
1075 return;
1076 }
1077 clockpro_pagedequeue(pg);
1078 pg->pqflags &= ~PQ_SPECULATIVE;
1079 }
1080
1081 void
1082 uvmpdpol_pageenqueue(struct vm_page *pg)
1083 {
1084
1085 #if 1
1086 if (uvmpdpol_pageisqueued_p(pg)) {
1087 return;
1088 }
1089 clockpro_clearreferencebit(pg);
1090 pg->pqflags |= PQ_SPECULATIVE;
1091 clockpro_pageenqueue(pg);
1092 #else
1093 uvmpdpol_pageactivate(pg);
1094 #endif
1095 }
1096
1097 void
1098 uvmpdpol_anfree(struct vm_anon *an)
1099 {
1100
1101 KASSERT(an->an_page == NULL);
1102 if (nonresident_lookupremove((objid_t)an, 0)) {
1103 PDPOL_EVCNT_INCR(nresanonfree);
1104 }
1105 }
1106
1107 void
1108 uvmpdpol_init(void)
1109 {
1110
1111 clockpro_init();
1112 }
1113
1114 void
1115 uvmpdpol_reinit(void)
1116 {
1117
1118 clockpro_reinit();
1119 }
1120
1121 void
1122 uvmpdpol_estimatepageable(int *active, int *inactive)
1123 {
1124 struct clockpro_state * const s = &clockpro;
1125
1126 if (active) {
1127 *active = s->s_npages - s->s_ncold;
1128 }
1129 if (inactive) {
1130 *inactive = s->s_ncold;
1131 }
1132 }
1133
1134 boolean_t
1135 uvmpdpol_pageisqueued_p(struct vm_page *pg)
1136 {
1137
1138 return clockpro_getq(pg) != CLOCKPRO_NOQUEUE;
1139 }
1140
1141 void
1142 uvmpdpol_scaninit(void)
1143 {
1144 struct clockpro_scanstate * const ss = &scanstate;
1145
1146 ss->ss_nscanned = 0;
1147 }
1148
1149 struct vm_page *
1150 uvmpdpol_selectvictim(void)
1151 {
1152 struct clockpro_state * const s = &clockpro;
1153 struct clockpro_scanstate * const ss = &scanstate;
1154 struct vm_page *pg;
1155
1156 if (ss->ss_nscanned > s->s_npages) {
1157 DPRINTF("scan too much\n");
1158 return NULL;
1159 }
1160 pg = handcold_advance();
1161 ss->ss_nscanned++;
1162 return pg;
1163 }
1164
1165 static void
1166 clockpro_dropswap(pageq_t *q, int *todo)
1167 {
1168 struct vm_page *pg;
1169
1170 TAILQ_FOREACH_REVERSE(pg, &q->q_q, pglist, pageq) {
1171 if (*todo <= 0) {
1172 break;
1173 }
1174 if ((pg->pqflags & PQ_HOT) == 0) {
1175 continue;
1176 }
1177 if ((pg->pqflags & PQ_SWAPBACKED) == 0) {
1178 continue;
1179 }
1180 if (uvmpd_trydropswap(pg)) {
1181 (*todo)--;
1182 }
1183 }
1184 }
1185
1186 void
1187 uvmpdpol_balancequeue(int swap_shortage)
1188 {
1189 struct clockpro_state * const s = &clockpro;
1190 int todo = swap_shortage;
1191
1192 if (todo == 0) {
1193 return;
1194 }
1195
1196 /*
1197 * reclaim swap slots from hot pages
1198 */
1199
1200 DPRINTF("%s: swap_shortage=%d\n", __func__, swap_shortage);
1201
1202 clockpro_dropswap(clockpro_queue(s, CLOCKPRO_NEWQ), &todo);
1203 clockpro_dropswap(clockpro_queue(s, CLOCKPRO_COLDQ), &todo);
1204 clockpro_dropswap(clockpro_queue(s, CLOCKPRO_HOTQ), &todo);
1205
1206 DPRINTF("%s: done=%d\n", __func__, swap_shortage - todo);
1207 }
1208
1209 boolean_t
1210 uvmpdpol_needsscan_p(void)
1211 {
1212 struct clockpro_state * const s = &clockpro;
1213
1214 if (s->s_ncold < s->s_coldtarget) {
1215 return TRUE;
1216 }
1217 return FALSE;
1218 }
1219
1220 void
1221 uvmpdpol_tune(void)
1222 {
1223
1224 clockpro_tune();
1225 }
1226
1227 #if !defined(PDSIM)
1228
1229 #include <sys/sysctl.h> /* XXX SYSCTL_DESCR */
1230
1231 void
1232 uvmpdpol_sysctlsetup(void)
1233 {
1234 #if !defined(ADAPTIVE)
1235 struct clockpro_state * const s = &clockpro;
1236
1237 uvm_pctparam_createsysctlnode(&s->s_coldtargetpct, "coldtargetpct",
1238 SYSCTL_DESCR("Percentage cold target queue of the entire queue"));
1239 #endif /* !defined(ADAPTIVE) */
1240 }
1241
1242 #endif /* !defined(PDSIM) */
1243
1244 #if defined(DDB)
1245
1246 void clockpro_dump(void);
1247
1248 void
1249 clockpro_dump(void)
1250 {
1251 struct clockpro_state * const s = &clockpro;
1252
1253 struct vm_page *pg;
1254 int ncold, nhot, ntest, nspeculative, ninitialref, nref;
1255 int newqlen, coldqlen, hotqlen, listqlen;
1256
1257 newqlen = coldqlen = hotqlen = listqlen = 0;
1258 printf("npages=%d, ncold=%d, coldtarget=%d, newqlenmax=%d\n",
1259 s->s_npages, s->s_ncold, s->s_coldtarget, s->s_newqlenmax);
1260
1261 #define INITCOUNT() \
1262 ncold = nhot = ntest = nspeculative = ninitialref = nref = 0
1263
1264 #define COUNT(pg) \
1265 if ((pg->pqflags & PQ_HOT) != 0) { \
1266 nhot++; \
1267 } else { \
1268 ncold++; \
1269 if ((pg->pqflags & PQ_TEST) != 0) { \
1270 ntest++; \
1271 } \
1272 if ((pg->pqflags & PQ_SPECULATIVE) != 0) { \
1273 nspeculative++; \
1274 } \
1275 if ((pg->pqflags & PQ_INITIALREF) != 0) { \
1276 ninitialref++; \
1277 } else if ((pg->pqflags & PQ_REFERENCED) != 0 || \
1278 pmap_is_referenced(pg)) { \
1279 nref++; \
1280 } \
1281 }
1282
1283 #define PRINTCOUNT(name) \
1284 printf("%s hot=%d, cold=%d, test=%d, speculative=%d, initialref=%d, " \
1285 "nref=%d\n", \
1286 (name), nhot, ncold, ntest, nspeculative, ninitialref, nref)
1287
1288 INITCOUNT();
1289 TAILQ_FOREACH(pg, &clockpro_queue(s, CLOCKPRO_NEWQ)->q_q, pageq) {
1290 if (clockpro_getq(pg) != CLOCKPRO_NEWQ) {
1291 printf("newq corrupt %p\n", pg);
1292 }
1293 COUNT(pg)
1294 newqlen++;
1295 }
1296 PRINTCOUNT("newq");
1297
1298 INITCOUNT();
1299 TAILQ_FOREACH(pg, &clockpro_queue(s, CLOCKPRO_COLDQ)->q_q, pageq) {
1300 if (clockpro_getq(pg) != CLOCKPRO_COLDQ) {
1301 printf("coldq corrupt %p\n", pg);
1302 }
1303 COUNT(pg)
1304 coldqlen++;
1305 }
1306 PRINTCOUNT("coldq");
1307
1308 INITCOUNT();
1309 TAILQ_FOREACH(pg, &clockpro_queue(s, CLOCKPRO_HOTQ)->q_q, pageq) {
1310 if (clockpro_getq(pg) != CLOCKPRO_HOTQ) {
1311 printf("hotq corrupt %p\n", pg);
1312 }
1313 #if defined(LISTQ)
1314 if ((pg->pqflags & PQ_HOT) == 0) {
1315 printf("cold page in hotq: %p\n", pg);
1316 }
1317 #endif /* defined(LISTQ) */
1318 COUNT(pg)
1319 hotqlen++;
1320 }
1321 PRINTCOUNT("hotq");
1322
1323 INITCOUNT();
1324 TAILQ_FOREACH(pg, &clockpro_queue(s, CLOCKPRO_LISTQ)->q_q, pageq) {
1325 #if !defined(LISTQ)
1326 printf("listq %p\n");
1327 #endif /* !defined(LISTQ) */
1328 if (clockpro_getq(pg) != CLOCKPRO_LISTQ) {
1329 printf("listq corrupt %p\n", pg);
1330 }
1331 COUNT(pg)
1332 listqlen++;
1333 }
1334 PRINTCOUNT("listq");
1335
1336 printf("newqlen=%d/%d, coldqlen=%d/%d, hotqlen=%d/%d, listqlen=%d/%d\n",
1337 newqlen, pageq_len(clockpro_queue(s, CLOCKPRO_NEWQ)),
1338 coldqlen, pageq_len(clockpro_queue(s, CLOCKPRO_COLDQ)),
1339 hotqlen, pageq_len(clockpro_queue(s, CLOCKPRO_HOTQ)),
1340 listqlen, pageq_len(clockpro_queue(s, CLOCKPRO_LISTQ)));
1341 }
1342
1343 #endif /* defined(DDB) */
1344
1345 #if defined(PDSIM)
1346 static void
1347 pdsim_dumpq(int qidx)
1348 {
1349 struct clockpro_state * const s = &clockpro;
1350 pageq_t *q = clockpro_queue(s, qidx);
1351 struct vm_page *pg;
1352
1353 TAILQ_FOREACH(pg, &q->q_q, pageq) {
1354 DPRINTF(" %" PRIu64 "%s%s%s%s%s%s",
1355 pg->offset >> PAGE_SHIFT,
1356 (pg->pqflags & PQ_HOT) ? "H" : "",
1357 (pg->pqflags & PQ_TEST) ? "T" : "",
1358 (pg->pqflags & PQ_REFERENCED) ? "R" : "",
1359 pmap_is_referenced(pg) ? "r" : "",
1360 (pg->pqflags & PQ_INITIALREF) ? "I" : "",
1361 (pg->pqflags & PQ_SPECULATIVE) ? "S" : ""
1362 );
1363 }
1364 }
1365
1366 void
1367 pdsim_dump(const char *id)
1368 {
1369 #if defined(DEBUG)
1370 struct clockpro_state * const s = &clockpro;
1371
1372 DPRINTF(" %s L(", id);
1373 pdsim_dumpq(CLOCKPRO_LISTQ);
1374 DPRINTF(" ) H(");
1375 pdsim_dumpq(CLOCKPRO_HOTQ);
1376 DPRINTF(" ) C(");
1377 pdsim_dumpq(CLOCKPRO_COLDQ);
1378 DPRINTF(" ) N(");
1379 pdsim_dumpq(CLOCKPRO_NEWQ);
1380 DPRINTF(" ) ncold=%d/%d, coldadj=%d\n",
1381 s->s_ncold, s->s_coldtarget, coldadj);
1382 #endif /* defined(DEBUG) */
1383 }
1384 #endif /* defined(PDSIM) */
1385