uvm_pdpolicy_clock.c revision 1.30 1 /* $NetBSD: uvm_pdpolicy_clock.c,v 1.30 2020/01/01 14:33:48 ad Exp $ */
2 /* NetBSD: uvm_pdaemon.c,v 1.72 2006/01/05 10:47:33 yamt Exp $ */
3
4 /*-
5 * Copyright (c) 2019 The NetBSD Foundation, Inc.
6 * All rights reserved.
7 *
8 * This code is derived from software contributed to The NetBSD Foundation
9 * by Andrew Doran.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 * POSSIBILITY OF SUCH DAMAGE.
31 */
32
33 /*
34 * Copyright (c) 1997 Charles D. Cranor and Washington University.
35 * Copyright (c) 1991, 1993, The Regents of the University of California.
36 *
37 * All rights reserved.
38 *
39 * This code is derived from software contributed to Berkeley by
40 * The Mach Operating System project at Carnegie-Mellon University.
41 *
42 * Redistribution and use in source and binary forms, with or without
43 * modification, are permitted provided that the following conditions
44 * are met:
45 * 1. Redistributions of source code must retain the above copyright
46 * notice, this list of conditions and the following disclaimer.
47 * 2. Redistributions in binary form must reproduce the above copyright
48 * notice, this list of conditions and the following disclaimer in the
49 * documentation and/or other materials provided with the distribution.
50 * 3. Neither the name of the University nor the names of its contributors
51 * may be used to endorse or promote products derived from this software
52 * without specific prior written permission.
53 *
54 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
55 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
56 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
57 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
58 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
59 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
60 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
61 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
62 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
63 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
64 * SUCH DAMAGE.
65 *
66 * @(#)vm_pageout.c 8.5 (Berkeley) 2/14/94
67 * from: Id: uvm_pdaemon.c,v 1.1.2.32 1998/02/06 05:26:30 chs Exp
68 *
69 *
70 * Copyright (c) 1987, 1990 Carnegie-Mellon University.
71 * All rights reserved.
72 *
73 * Permission to use, copy, modify and distribute this software and
74 * its documentation is hereby granted, provided that both the copyright
75 * notice and this permission notice appear in all copies of the
76 * software, derivative works or modified versions, and any portions
77 * thereof, and that both notices appear in supporting documentation.
78 *
79 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
80 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
81 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
82 *
83 * Carnegie Mellon requests users of this software to return to
84 *
85 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
86 * School of Computer Science
87 * Carnegie Mellon University
88 * Pittsburgh PA 15213-3890
89 *
90 * any improvements or extensions that they make and grant Carnegie the
91 * rights to redistribute these changes.
92 */
93
94 #if defined(PDSIM)
95
96 #include "pdsim.h"
97
98 #else /* defined(PDSIM) */
99
100 #include <sys/cdefs.h>
101 __KERNEL_RCSID(0, "$NetBSD: uvm_pdpolicy_clock.c,v 1.30 2020/01/01 14:33:48 ad Exp $");
102
103 #include <sys/param.h>
104 #include <sys/proc.h>
105 #include <sys/systm.h>
106 #include <sys/kernel.h>
107 #include <sys/kmem.h>
108 #include <sys/atomic.h>
109
110 #include <uvm/uvm.h>
111 #include <uvm/uvm_pdpolicy.h>
112 #include <uvm/uvm_pdpolicy_impl.h>
113 #include <uvm/uvm_stat.h>
114
115 #endif /* defined(PDSIM) */
116
117 /*
118 * per-CPU queue of pending page status changes. 128 entries makes for a
119 * 1kB queue on _LP64 and has been found to be a reasonable compromise that
120 * keeps lock contention events and wait times low, while not using too much
121 * memory nor allowing global state to fall too far behind.
122 */
123 #if !defined(CLOCK_PDQ_SIZE)
124 #define CLOCK_PDQ_SIZE 128
125 #endif /* !defined(CLOCK_PDQ_SIZE) */
126
127 #define PQ_TIME 0xffffffc0 /* time of last activation */
128 #define PQ_INACTIVE 0x00000010 /* page is in inactive list */
129 #define PQ_ACTIVE 0x00000020 /* page is in active list */
130
131 #if !defined(CLOCK_INACTIVEPCT)
132 #define CLOCK_INACTIVEPCT 33
133 #endif /* !defined(CLOCK_INACTIVEPCT) */
134
135 struct uvmpdpol_globalstate {
136 kmutex_t lock; /* lock on state */
137 /* <= compiler pads here */
138 struct pglist s_activeq /* allocated pages, in use */
139 __aligned(COHERENCY_UNIT);
140 struct pglist s_inactiveq; /* pages between the clock hands */
141 int s_active;
142 int s_inactive;
143 int s_inactarg;
144 struct uvm_pctparam s_anonmin;
145 struct uvm_pctparam s_filemin;
146 struct uvm_pctparam s_execmin;
147 struct uvm_pctparam s_anonmax;
148 struct uvm_pctparam s_filemax;
149 struct uvm_pctparam s_execmax;
150 struct uvm_pctparam s_inactivepct;
151 };
152
153 struct uvmpdpol_scanstate {
154 bool ss_anonreact, ss_filereact, ss_execreact;
155 struct vm_page ss_marker;
156 };
157
158 static void uvmpdpol_pageactivate_locked(struct vm_page *);
159 static void uvmpdpol_pagedeactivate_locked(struct vm_page *);
160 static void uvmpdpol_pagedequeue_locked(struct vm_page *);
161 static bool uvmpdpol_pagerealize_locked(struct vm_page *);
162 static struct uvm_cpu *uvmpdpol_flush(void);
163
164 static struct uvmpdpol_globalstate pdpol_state __cacheline_aligned;
165 static struct uvmpdpol_scanstate pdpol_scanstate;
166
167 PDPOL_EVCNT_DEFINE(reactexec)
168 PDPOL_EVCNT_DEFINE(reactfile)
169 PDPOL_EVCNT_DEFINE(reactanon)
170
171 static void
172 clock_tune(void)
173 {
174 struct uvmpdpol_globalstate *s = &pdpol_state;
175
176 s->s_inactarg = UVM_PCTPARAM_APPLY(&s->s_inactivepct,
177 s->s_active + s->s_inactive);
178 if (s->s_inactarg <= uvmexp.freetarg) {
179 s->s_inactarg = uvmexp.freetarg + 1;
180 }
181 }
182
183 void
184 uvmpdpol_scaninit(void)
185 {
186 struct uvmpdpol_globalstate *s = &pdpol_state;
187 struct uvmpdpol_scanstate *ss = &pdpol_scanstate;
188 int t;
189 bool anonunder, fileunder, execunder;
190 bool anonover, fileover, execover;
191 bool anonreact, filereact, execreact;
192 int64_t freepg, anonpg, filepg, execpg;
193
194 /*
195 * decide which types of pages we want to reactivate instead of freeing
196 * to keep usage within the minimum and maximum usage limits.
197 */
198
199 cpu_count_sync_all();
200 freepg = uvm_availmem();
201 anonpg = cpu_count_get(CPU_COUNT_ANONPAGES);
202 filepg = cpu_count_get(CPU_COUNT_FILEPAGES);
203 execpg = cpu_count_get(CPU_COUNT_EXECPAGES);
204
205 mutex_enter(&s->lock);
206 t = s->s_active + s->s_inactive + freepg;
207 anonunder = anonpg <= UVM_PCTPARAM_APPLY(&s->s_anonmin, t);
208 fileunder = filepg <= UVM_PCTPARAM_APPLY(&s->s_filemin, t);
209 execunder = execpg <= UVM_PCTPARAM_APPLY(&s->s_execmin, t);
210 anonover = anonpg > UVM_PCTPARAM_APPLY(&s->s_anonmax, t);
211 fileover = filepg > UVM_PCTPARAM_APPLY(&s->s_filemax, t);
212 execover = execpg > UVM_PCTPARAM_APPLY(&s->s_execmax, t);
213 anonreact = anonunder || (!anonover && (fileover || execover));
214 filereact = fileunder || (!fileover && (anonover || execover));
215 execreact = execunder || (!execover && (anonover || fileover));
216 if (filereact && execreact && (anonreact || uvm_swapisfull())) {
217 anonreact = filereact = execreact = false;
218 }
219 ss->ss_anonreact = anonreact;
220 ss->ss_filereact = filereact;
221 ss->ss_execreact = execreact;
222 memset(&ss->ss_marker, 0, sizeof(ss->ss_marker));
223 ss->ss_marker.flags = PG_MARKER;
224 TAILQ_INSERT_HEAD(&pdpol_state.s_inactiveq, &ss->ss_marker, pdqueue);
225 mutex_exit(&s->lock);
226 }
227
228 void
229 uvmpdpol_scanfini(void)
230 {
231 struct uvmpdpol_globalstate *s = &pdpol_state;
232 struct uvmpdpol_scanstate *ss = &pdpol_scanstate;
233
234 mutex_enter(&s->lock);
235 TAILQ_REMOVE(&pdpol_state.s_inactiveq, &ss->ss_marker, pdqueue);
236 mutex_exit(&s->lock);
237 }
238
239 struct vm_page *
240 uvmpdpol_selectvictim(kmutex_t **plock)
241 {
242 struct uvmpdpol_globalstate *s = &pdpol_state;
243 struct uvmpdpol_scanstate *ss = &pdpol_scanstate;
244 struct vm_page *pg;
245 kmutex_t *lock;
246
247 mutex_enter(&s->lock);
248 while (/* CONSTCOND */ 1) {
249 struct vm_anon *anon;
250 struct uvm_object *uobj;
251
252 pg = TAILQ_NEXT(&ss->ss_marker, pdqueue);
253 if (pg == NULL) {
254 break;
255 }
256 KASSERT((pg->flags & PG_MARKER) == 0);
257 uvmexp.pdscans++;
258
259 /*
260 * acquire interlock to stablize page identity.
261 * if we have caught the page in a state of flux
262 * deal with it and retry.
263 */
264 mutex_enter(&pg->interlock);
265 if (uvmpdpol_pagerealize_locked(pg)) {
266 mutex_exit(&pg->interlock);
267 continue;
268 }
269
270 /*
271 * now prepare to move on to the next page.
272 */
273 TAILQ_REMOVE(&pdpol_state.s_inactiveq, &ss->ss_marker,
274 pdqueue);
275 TAILQ_INSERT_AFTER(&pdpol_state.s_inactiveq, pg,
276 &ss->ss_marker, pdqueue);
277
278 /*
279 * enforce the minimum thresholds on different
280 * types of memory usage. if reusing the current
281 * page would reduce that type of usage below its
282 * minimum, reactivate the page instead and move
283 * on to the next page.
284 */
285 anon = pg->uanon;
286 uobj = pg->uobject;
287 if (uobj && UVM_OBJ_IS_VTEXT(uobj) && ss->ss_execreact) {
288 uvmpdpol_pageactivate_locked(pg);
289 mutex_exit(&pg->interlock);
290 PDPOL_EVCNT_INCR(reactexec);
291 continue;
292 }
293 if (uobj && UVM_OBJ_IS_VNODE(uobj) &&
294 !UVM_OBJ_IS_VTEXT(uobj) && ss->ss_filereact) {
295 uvmpdpol_pageactivate_locked(pg);
296 mutex_exit(&pg->interlock);
297 PDPOL_EVCNT_INCR(reactfile);
298 continue;
299 }
300 if ((anon || UVM_OBJ_IS_AOBJ(uobj)) && ss->ss_anonreact) {
301 uvmpdpol_pageactivate_locked(pg);
302 mutex_exit(&pg->interlock);
303 PDPOL_EVCNT_INCR(reactanon);
304 continue;
305 }
306
307 /*
308 * try to lock the object that owns the page.
309 *
310 * with the page interlock held, we can drop s->lock, which
311 * could otherwise serve as a barrier to us getting the
312 * object locked, because the owner of the object's lock may
313 * be blocked on s->lock (i.e. a deadlock).
314 *
315 * whatever happens, uvmpd_trylockowner() will release the
316 * interlock. with the interlock dropped we can then
317 * re-acquire our own lock. the order is:
318 *
319 * object -> pdpol -> interlock.
320 */
321 mutex_exit(&s->lock);
322 lock = uvmpd_trylockowner(pg);
323 /* pg->interlock now released */
324 mutex_enter(&s->lock);
325 if (lock == NULL) {
326 /* didn't get it - try the next page. */
327 continue;
328 }
329
330 /*
331 * move referenced pages back to active queue and skip to
332 * next page.
333 */
334 if (pmap_is_referenced(pg)) {
335 mutex_enter(&pg->interlock);
336 uvmpdpol_pageactivate_locked(pg);
337 mutex_exit(&pg->interlock);
338 uvmexp.pdreact++;
339 mutex_exit(lock);
340 continue;
341 }
342
343 /* we have a potential victim. */
344 *plock = lock;
345 break;
346 }
347 mutex_exit(&s->lock);
348 return pg;
349 }
350
351 void
352 uvmpdpol_balancequeue(int swap_shortage)
353 {
354 struct uvmpdpol_globalstate *s = &pdpol_state;
355 int inactive_shortage;
356 struct vm_page *p, marker;
357 kmutex_t *lock;
358
359 /*
360 * we have done the scan to get free pages. now we work on meeting
361 * our inactive target.
362 */
363
364 memset(&marker, 0, sizeof(marker));
365 marker.flags = PG_MARKER;
366
367 mutex_enter(&s->lock);
368 TAILQ_INSERT_HEAD(&pdpol_state.s_activeq, &marker, pdqueue);
369 for (;;) {
370 inactive_shortage =
371 pdpol_state.s_inactarg - pdpol_state.s_inactive;
372 if (inactive_shortage <= 0 && swap_shortage <= 0) {
373 break;
374 }
375 p = TAILQ_NEXT(&marker, pdqueue);
376 if (p == NULL) {
377 break;
378 }
379 KASSERT((p->flags & PG_MARKER) == 0);
380
381 /*
382 * acquire interlock to stablize page identity.
383 * if we have caught the page in a state of flux
384 * deal with it and retry.
385 */
386 mutex_enter(&p->interlock);
387 if (uvmpdpol_pagerealize_locked(p)) {
388 mutex_exit(&p->interlock);
389 continue;
390 }
391
392 /*
393 * now prepare to move on to the next page.
394 */
395 TAILQ_REMOVE(&pdpol_state.s_activeq, &marker, pdqueue);
396 TAILQ_INSERT_AFTER(&pdpol_state.s_activeq, p, &marker,
397 pdqueue);
398
399 /*
400 * try to lock the object that owns the page. see comments
401 * in uvmpdol_selectvictim().
402 */
403 mutex_exit(&s->lock);
404 lock = uvmpd_trylockowner(p);
405 /* p->interlock now released */
406 mutex_enter(&s->lock);
407 if (lock == NULL) {
408 /* didn't get it - try the next page. */
409 continue;
410 }
411
412 /*
413 * if there's a shortage of swap slots, try to free it.
414 */
415 if (swap_shortage > 0 && (p->flags & PG_SWAPBACKED) != 0 &&
416 (p->flags & PG_BUSY) == 0) {
417 if (uvmpd_dropswap(p)) {
418 swap_shortage--;
419 }
420 }
421
422 /*
423 * if there's a shortage of inactive pages, deactivate.
424 */
425 if (inactive_shortage > 0) {
426 pmap_clear_reference(p);
427 mutex_enter(&p->interlock);
428 uvmpdpol_pagedeactivate_locked(p);
429 mutex_exit(&p->interlock);
430 uvmexp.pddeact++;
431 inactive_shortage--;
432 }
433 mutex_exit(lock);
434 }
435 TAILQ_REMOVE(&pdpol_state.s_activeq, &marker, pdqueue);
436 mutex_exit(&s->lock);
437 }
438
439 static void
440 uvmpdpol_pagedeactivate_locked(struct vm_page *pg)
441 {
442 struct uvmpdpol_globalstate *s __diagused = &pdpol_state;
443
444 KASSERT(mutex_owned(&s->lock));
445 KASSERT(mutex_owned(&pg->interlock));
446 KASSERT((pg->pqflags & (PQ_INTENT_MASK | PQ_INTENT_SET)) !=
447 (PQ_INTENT_D | PQ_INTENT_SET));
448
449 if (pg->pqflags & PQ_ACTIVE) {
450 TAILQ_REMOVE(&pdpol_state.s_activeq, pg, pdqueue);
451 KASSERT(pdpol_state.s_active > 0);
452 pdpol_state.s_active--;
453 }
454 if ((pg->pqflags & PQ_INACTIVE) == 0) {
455 KASSERT(pg->wire_count == 0);
456 TAILQ_INSERT_TAIL(&pdpol_state.s_inactiveq, pg, pdqueue);
457 pdpol_state.s_inactive++;
458 }
459 pg->pqflags = (pg->pqflags & PQ_INTENT_QUEUED) | PQ_INACTIVE;
460 }
461
462 void
463 uvmpdpol_pagedeactivate(struct vm_page *pg)
464 {
465
466 KASSERT(uvm_page_owner_locked_p(pg));
467 KASSERT(mutex_owned(&pg->interlock));
468
469 /*
470 * we have to clear the reference bit now, as when it comes time to
471 * realize the intent we won't have the object locked any more.
472 */
473 pmap_clear_reference(pg);
474 uvmpdpol_set_intent(pg, PQ_INTENT_I);
475 }
476
477 static void
478 uvmpdpol_pageactivate_locked(struct vm_page *pg)
479 {
480 struct uvmpdpol_globalstate *s __diagused = &pdpol_state;
481
482 KASSERT(mutex_owned(&s->lock));
483 KASSERT(mutex_owned(&pg->interlock));
484 KASSERT((pg->pqflags & (PQ_INTENT_MASK | PQ_INTENT_SET)) !=
485 (PQ_INTENT_D | PQ_INTENT_SET));
486
487 uvmpdpol_pagedequeue_locked(pg);
488 TAILQ_INSERT_TAIL(&pdpol_state.s_activeq, pg, pdqueue);
489 pdpol_state.s_active++;
490 pg->pqflags = (pg->pqflags & PQ_INTENT_QUEUED) | PQ_ACTIVE |
491 (hardclock_ticks & PQ_TIME);
492 }
493
494 void
495 uvmpdpol_pageactivate(struct vm_page *pg)
496 {
497 uint32_t pqflags;
498
499 KASSERT(uvm_page_owner_locked_p(pg));
500 KASSERT(mutex_owned(&pg->interlock));
501
502 /*
503 * if there is any intent set on the page, or the page is not
504 * active, or the page was activated in the "distant" past, then
505 * it needs to be activated anew.
506 */
507 pqflags = pg->pqflags;
508 if ((pqflags & PQ_INTENT_SET) != 0 ||
509 (pqflags & PQ_ACTIVE) == 0 ||
510 ((hardclock_ticks & PQ_TIME) - (pqflags & PQ_TIME)) > hz) {
511 uvmpdpol_set_intent(pg, PQ_INTENT_A);
512 }
513 }
514
515 static void
516 uvmpdpol_pagedequeue_locked(struct vm_page *pg)
517 {
518 struct uvmpdpol_globalstate *s __diagused = &pdpol_state;
519
520 KASSERT(mutex_owned(&s->lock));
521 KASSERT(mutex_owned(&pg->interlock));
522
523 if (pg->pqflags & PQ_ACTIVE) {
524 TAILQ_REMOVE(&pdpol_state.s_activeq, pg, pdqueue);
525 KASSERT((pg->pqflags & PQ_INACTIVE) == 0);
526 KASSERT(pdpol_state.s_active > 0);
527 pdpol_state.s_active--;
528 } else if (pg->pqflags & PQ_INACTIVE) {
529 TAILQ_REMOVE(&pdpol_state.s_inactiveq, pg, pdqueue);
530 KASSERT(pdpol_state.s_inactive > 0);
531 pdpol_state.s_inactive--;
532 }
533 pg->pqflags &= PQ_INTENT_QUEUED;
534 }
535
536 void
537 uvmpdpol_pagedequeue(struct vm_page *pg)
538 {
539
540 KASSERT(uvm_page_owner_locked_p(pg));
541 KASSERT(mutex_owned(&pg->interlock));
542
543 uvmpdpol_set_intent(pg, PQ_INTENT_D);
544 }
545
546 void
547 uvmpdpol_pageenqueue(struct vm_page *pg)
548 {
549
550 KASSERT(uvm_page_owner_locked_p(pg));
551 KASSERT(mutex_owned(&pg->interlock));
552
553 uvmpdpol_set_intent(pg, PQ_INTENT_E);
554 }
555
556 void
557 uvmpdpol_anfree(struct vm_anon *an)
558 {
559 }
560
561 bool
562 uvmpdpol_pageisqueued_p(struct vm_page *pg)
563 {
564 uint32_t pqflags;
565
566 /*
567 * if there's an intent set, we have to consider it. otherwise,
568 * return the actual state. we may be called unlocked for the
569 * purpose of assertions, which is safe due to the page lifecycle.
570 */
571 pqflags = atomic_load_relaxed(&pg->pqflags);
572 if ((pqflags & PQ_INTENT_SET) != 0) {
573 return (pqflags & PQ_INTENT_MASK) != PQ_INTENT_D;
574 } else {
575 return (pqflags & (PQ_ACTIVE | PQ_INACTIVE)) != 0;
576 }
577 }
578
579 void
580 uvmpdpol_estimatepageable(int *active, int *inactive)
581 {
582 struct uvmpdpol_globalstate *s = &pdpol_state;
583
584 mutex_enter(&s->lock);
585 if (active) {
586 *active = pdpol_state.s_active;
587 }
588 if (inactive) {
589 *inactive = pdpol_state.s_inactive;
590 }
591 mutex_exit(&s->lock);
592 }
593
594 #if !defined(PDSIM)
595 static int
596 min_check(struct uvm_pctparam *pct, int t)
597 {
598 struct uvmpdpol_globalstate *s = &pdpol_state;
599 int total = t;
600
601 if (pct != &s->s_anonmin) {
602 total += uvm_pctparam_get(&s->s_anonmin);
603 }
604 if (pct != &s->s_filemin) {
605 total += uvm_pctparam_get(&s->s_filemin);
606 }
607 if (pct != &s->s_execmin) {
608 total += uvm_pctparam_get(&s->s_execmin);
609 }
610 if (total > 95) {
611 return EINVAL;
612 }
613 return 0;
614 }
615 #endif /* !defined(PDSIM) */
616
617 void
618 uvmpdpol_init(void)
619 {
620 struct uvmpdpol_globalstate *s = &pdpol_state;
621
622 mutex_init(&s->lock, MUTEX_DEFAULT, IPL_NONE);
623 TAILQ_INIT(&s->s_activeq);
624 TAILQ_INIT(&s->s_inactiveq);
625 uvm_pctparam_init(&s->s_inactivepct, CLOCK_INACTIVEPCT, NULL);
626 uvm_pctparam_init(&s->s_anonmin, 10, min_check);
627 uvm_pctparam_init(&s->s_filemin, 10, min_check);
628 uvm_pctparam_init(&s->s_execmin, 5, min_check);
629 uvm_pctparam_init(&s->s_anonmax, 80, NULL);
630 uvm_pctparam_init(&s->s_filemax, 50, NULL);
631 uvm_pctparam_init(&s->s_execmax, 30, NULL);
632 }
633
634 void
635 uvmpdpol_init_cpu(struct uvm_cpu *ucpu)
636 {
637
638 ucpu->pdq =
639 kmem_alloc(CLOCK_PDQ_SIZE * sizeof(struct vm_page *), KM_SLEEP);
640 ucpu->pdqhead = CLOCK_PDQ_SIZE;
641 ucpu->pdqtail = CLOCK_PDQ_SIZE;
642 }
643
644 void
645 uvmpdpol_reinit(void)
646 {
647 }
648
649 bool
650 uvmpdpol_needsscan_p(void)
651 {
652
653 /*
654 * this must be an unlocked check: can be called from interrupt.
655 */
656 return pdpol_state.s_inactive < pdpol_state.s_inactarg;
657 }
658
659 void
660 uvmpdpol_tune(void)
661 {
662 struct uvmpdpol_globalstate *s = &pdpol_state;
663
664 mutex_enter(&s->lock);
665 clock_tune();
666 mutex_exit(&s->lock);
667 }
668
669 /*
670 * uvmpdpol_pagerealize_locked: take the intended state set on a page and
671 * make it real. return true if any work was done.
672 */
673 static bool
674 uvmpdpol_pagerealize_locked(struct vm_page *pg)
675 {
676 struct uvmpdpol_globalstate *s __diagused = &pdpol_state;
677
678 KASSERT(mutex_owned(&s->lock));
679 KASSERT(mutex_owned(&pg->interlock));
680
681 switch (pg->pqflags & (PQ_INTENT_MASK | PQ_INTENT_SET)) {
682 case PQ_INTENT_A | PQ_INTENT_SET:
683 case PQ_INTENT_E | PQ_INTENT_SET:
684 uvmpdpol_pageactivate_locked(pg);
685 return true;
686 case PQ_INTENT_I | PQ_INTENT_SET:
687 uvmpdpol_pagedeactivate_locked(pg);
688 return true;
689 case PQ_INTENT_D | PQ_INTENT_SET:
690 uvmpdpol_pagedequeue_locked(pg);
691 return true;
692 default:
693 return false;
694 }
695 }
696
697 /*
698 * uvmpdpol_flush: return the current uvm_cpu with all of its pending
699 * updates flushed to the global queues. this routine may block, and
700 * so can switch cpu. the idea is to empty to queue on whatever cpu
701 * we finally end up on.
702 */
703 static struct uvm_cpu *
704 uvmpdpol_flush(void)
705 {
706 struct uvmpdpol_globalstate *s __diagused = &pdpol_state;
707 struct uvm_cpu *ucpu;
708 struct vm_page *pg;
709
710 KASSERT(kpreempt_disabled());
711
712 mutex_enter(&s->lock);
713 for (;;) {
714 /*
715 * prefer scanning forwards (even though mutex_enter() is
716 * serializing) so as to not defeat any prefetch logic in
717 * the CPU. that means elsewhere enqueuing backwards, like
718 * a stack, but not so important there as pages are being
719 * added singularly.
720 *
721 * prefetch the next "struct vm_page" while working on the
722 * current one. this has a measurable and very positive
723 * effect in reducing the amount of time spent here under
724 * the global lock.
725 */
726 ucpu = curcpu()->ci_data.cpu_uvm;
727 KASSERT(ucpu->pdqhead <= ucpu->pdqtail);
728 if (__predict_false(ucpu->pdqhead == ucpu->pdqtail)) {
729 break;
730 }
731 pg = ucpu->pdq[ucpu->pdqhead++];
732 if (__predict_true(ucpu->pdqhead != ucpu->pdqtail)) {
733 __builtin_prefetch(ucpu->pdq[ucpu->pdqhead]);
734 }
735 mutex_enter(&pg->interlock);
736 pg->pqflags &= ~PQ_INTENT_QUEUED;
737 (void)uvmpdpol_pagerealize_locked(pg);
738 mutex_exit(&pg->interlock);
739 }
740 mutex_exit(&s->lock);
741 return ucpu;
742 }
743
744 /*
745 * uvmpdpol_pagerealize: realize any intent set on the page. in this
746 * implementation, that means putting the page on a per-CPU queue to be
747 * dealt with later.
748 */
749 void
750 uvmpdpol_pagerealize(struct vm_page *pg)
751 {
752 struct uvm_cpu *ucpu;
753
754 /*
755 * drain the per per-CPU queue if full, then enter the page.
756 */
757 kpreempt_disable();
758 ucpu = curcpu()->ci_data.cpu_uvm;
759 if (__predict_false(ucpu->pdqhead == 0)) {
760 ucpu = uvmpdpol_flush();
761 }
762 ucpu->pdq[--(ucpu->pdqhead)] = pg;
763 kpreempt_enable();
764 }
765
766 /*
767 * uvmpdpol_idle: called from the system idle loop. periodically purge any
768 * pending updates back to the global queues.
769 */
770 void
771 uvmpdpol_idle(struct uvm_cpu *ucpu)
772 {
773 struct uvmpdpol_globalstate *s = &pdpol_state;
774 struct vm_page *pg;
775
776 KASSERT(kpreempt_disabled());
777
778 /*
779 * if no pages in the queue, we have nothing to do.
780 */
781 if (ucpu->pdqhead == ucpu->pdqtail) {
782 ucpu->pdqtime = hardclock_ticks;
783 return;
784 }
785
786 /*
787 * don't do this more than ~8 times a second as it would needlessly
788 * exert pressure.
789 */
790 if (hardclock_ticks - ucpu->pdqtime < (hz >> 3)) {
791 return;
792 }
793
794 /*
795 * the idle LWP can't block, so we have to try for the lock. if we
796 * get it, purge the per-CPU pending update queue. continually
797 * check for a pending resched: in that case exit immediately.
798 */
799 if (mutex_tryenter(&s->lock)) {
800 while (ucpu->pdqhead != ucpu->pdqtail) {
801 pg = ucpu->pdq[ucpu->pdqhead];
802 if (!mutex_tryenter(&pg->interlock)) {
803 break;
804 }
805 ucpu->pdqhead++;
806 pg->pqflags &= ~PQ_INTENT_QUEUED;
807 (void)uvmpdpol_pagerealize_locked(pg);
808 mutex_exit(&pg->interlock);
809 if (curcpu()->ci_want_resched) {
810 break;
811 }
812 }
813 if (ucpu->pdqhead == ucpu->pdqtail) {
814 ucpu->pdqtime = hardclock_ticks;
815 }
816 mutex_exit(&s->lock);
817 }
818 }
819
820 #if !defined(PDSIM)
821
822 #include <sys/sysctl.h> /* XXX SYSCTL_DESCR */
823
824 void
825 uvmpdpol_sysctlsetup(void)
826 {
827 struct uvmpdpol_globalstate *s = &pdpol_state;
828
829 uvm_pctparam_createsysctlnode(&s->s_anonmin, "anonmin",
830 SYSCTL_DESCR("Percentage of physical memory reserved "
831 "for anonymous application data"));
832 uvm_pctparam_createsysctlnode(&s->s_filemin, "filemin",
833 SYSCTL_DESCR("Percentage of physical memory reserved "
834 "for cached file data"));
835 uvm_pctparam_createsysctlnode(&s->s_execmin, "execmin",
836 SYSCTL_DESCR("Percentage of physical memory reserved "
837 "for cached executable data"));
838
839 uvm_pctparam_createsysctlnode(&s->s_anonmax, "anonmax",
840 SYSCTL_DESCR("Percentage of physical memory which will "
841 "be reclaimed from other usage for "
842 "anonymous application data"));
843 uvm_pctparam_createsysctlnode(&s->s_filemax, "filemax",
844 SYSCTL_DESCR("Percentage of physical memory which will "
845 "be reclaimed from other usage for cached "
846 "file data"));
847 uvm_pctparam_createsysctlnode(&s->s_execmax, "execmax",
848 SYSCTL_DESCR("Percentage of physical memory which will "
849 "be reclaimed from other usage for cached "
850 "executable data"));
851
852 uvm_pctparam_createsysctlnode(&s->s_inactivepct, "inactivepct",
853 SYSCTL_DESCR("Percentage of inactive queue of "
854 "the entire (active + inactive) queue"));
855 }
856
857 #endif /* !defined(PDSIM) */
858
859 #if defined(PDSIM)
860 void
861 pdsim_dump(const char *id)
862 {
863 #if defined(DEBUG)
864 /* XXX */
865 #endif /* defined(DEBUG) */
866 }
867 #endif /* defined(PDSIM) */
868