uvm_pdpolicy_clock.c revision 1.38 1 /* $NetBSD: uvm_pdpolicy_clock.c,v 1.38 2020/06/11 19:20:47 ad Exp $ */
2 /* NetBSD: uvm_pdaemon.c,v 1.72 2006/01/05 10:47:33 yamt Exp $ */
3
4 /*-
5 * Copyright (c) 2019, 2020 The NetBSD Foundation, Inc.
6 * All rights reserved.
7 *
8 * This code is derived from software contributed to The NetBSD Foundation
9 * by Andrew Doran.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 * POSSIBILITY OF SUCH DAMAGE.
31 */
32
33 /*
34 * Copyright (c) 1997 Charles D. Cranor and Washington University.
35 * Copyright (c) 1991, 1993, The Regents of the University of California.
36 *
37 * All rights reserved.
38 *
39 * This code is derived from software contributed to Berkeley by
40 * The Mach Operating System project at Carnegie-Mellon University.
41 *
42 * Redistribution and use in source and binary forms, with or without
43 * modification, are permitted provided that the following conditions
44 * are met:
45 * 1. Redistributions of source code must retain the above copyright
46 * notice, this list of conditions and the following disclaimer.
47 * 2. Redistributions in binary form must reproduce the above copyright
48 * notice, this list of conditions and the following disclaimer in the
49 * documentation and/or other materials provided with the distribution.
50 * 3. Neither the name of the University nor the names of its contributors
51 * may be used to endorse or promote products derived from this software
52 * without specific prior written permission.
53 *
54 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
55 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
56 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
57 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
58 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
59 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
60 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
61 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
62 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
63 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
64 * SUCH DAMAGE.
65 *
66 * @(#)vm_pageout.c 8.5 (Berkeley) 2/14/94
67 * from: Id: uvm_pdaemon.c,v 1.1.2.32 1998/02/06 05:26:30 chs Exp
68 *
69 *
70 * Copyright (c) 1987, 1990 Carnegie-Mellon University.
71 * All rights reserved.
72 *
73 * Permission to use, copy, modify and distribute this software and
74 * its documentation is hereby granted, provided that both the copyright
75 * notice and this permission notice appear in all copies of the
76 * software, derivative works or modified versions, and any portions
77 * thereof, and that both notices appear in supporting documentation.
78 *
79 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
80 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
81 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
82 *
83 * Carnegie Mellon requests users of this software to return to
84 *
85 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
86 * School of Computer Science
87 * Carnegie Mellon University
88 * Pittsburgh PA 15213-3890
89 *
90 * any improvements or extensions that they make and grant Carnegie the
91 * rights to redistribute these changes.
92 */
93
94 #if defined(PDSIM)
95
96 #include "pdsim.h"
97
98 #else /* defined(PDSIM) */
99
100 #include <sys/cdefs.h>
101 __KERNEL_RCSID(0, "$NetBSD: uvm_pdpolicy_clock.c,v 1.38 2020/06/11 19:20:47 ad Exp $");
102
103 #include <sys/param.h>
104 #include <sys/proc.h>
105 #include <sys/systm.h>
106 #include <sys/kernel.h>
107 #include <sys/kmem.h>
108 #include <sys/atomic.h>
109
110 #include <uvm/uvm.h>
111 #include <uvm/uvm_pdpolicy.h>
112 #include <uvm/uvm_pdpolicy_impl.h>
113 #include <uvm/uvm_stat.h>
114
115 #endif /* defined(PDSIM) */
116
117 /*
118 * per-CPU queue of pending page status changes. 128 entries makes for a
119 * 1kB queue on _LP64 and has been found to be a reasonable compromise that
120 * keeps lock contention events and wait times low, while not using too much
121 * memory nor allowing global state to fall too far behind.
122 */
123 #if !defined(CLOCK_PDQ_SIZE)
124 #define CLOCK_PDQ_SIZE 128
125 #endif /* !defined(CLOCK_PDQ_SIZE) */
126
127 #define PQ_INACTIVE 0x00000010 /* page is in inactive list */
128 #define PQ_ACTIVE 0x00000020 /* page is in active list */
129
130 #if !defined(CLOCK_INACTIVEPCT)
131 #define CLOCK_INACTIVEPCT 33
132 #endif /* !defined(CLOCK_INACTIVEPCT) */
133
134 struct uvmpdpol_globalstate {
135 kmutex_t lock; /* lock on state */
136 /* <= compiler pads here */
137 struct pglist s_activeq /* allocated pages, in use */
138 __aligned(COHERENCY_UNIT);
139 struct pglist s_inactiveq; /* pages between the clock hands */
140 int s_active;
141 int s_inactive;
142 int s_inactarg;
143 struct uvm_pctparam s_anonmin;
144 struct uvm_pctparam s_filemin;
145 struct uvm_pctparam s_execmin;
146 struct uvm_pctparam s_anonmax;
147 struct uvm_pctparam s_filemax;
148 struct uvm_pctparam s_execmax;
149 struct uvm_pctparam s_inactivepct;
150 };
151
152 struct uvmpdpol_scanstate {
153 bool ss_anonreact, ss_filereact, ss_execreact;
154 struct vm_page ss_marker;
155 };
156
157 static void uvmpdpol_pageactivate_locked(struct vm_page *);
158 static void uvmpdpol_pagedeactivate_locked(struct vm_page *);
159 static void uvmpdpol_pagedequeue_locked(struct vm_page *);
160 static bool uvmpdpol_pagerealize_locked(struct vm_page *);
161 static struct uvm_cpu *uvmpdpol_flush(void);
162
163 static struct uvmpdpol_globalstate pdpol_state __cacheline_aligned;
164 static struct uvmpdpol_scanstate pdpol_scanstate;
165
166 PDPOL_EVCNT_DEFINE(reactexec)
167 PDPOL_EVCNT_DEFINE(reactfile)
168 PDPOL_EVCNT_DEFINE(reactanon)
169
170 static void
171 clock_tune(void)
172 {
173 struct uvmpdpol_globalstate *s = &pdpol_state;
174
175 s->s_inactarg = UVM_PCTPARAM_APPLY(&s->s_inactivepct,
176 s->s_active + s->s_inactive);
177 if (s->s_inactarg <= uvmexp.freetarg) {
178 s->s_inactarg = uvmexp.freetarg + 1;
179 }
180 }
181
182 void
183 uvmpdpol_scaninit(void)
184 {
185 struct uvmpdpol_globalstate *s = &pdpol_state;
186 struct uvmpdpol_scanstate *ss = &pdpol_scanstate;
187 int t;
188 bool anonunder, fileunder, execunder;
189 bool anonover, fileover, execover;
190 bool anonreact, filereact, execreact;
191 int64_t freepg, anonpg, filepg, execpg;
192
193 /*
194 * decide which types of pages we want to reactivate instead of freeing
195 * to keep usage within the minimum and maximum usage limits.
196 */
197
198 cpu_count_sync_all();
199 freepg = uvm_availmem(false);
200 anonpg = cpu_count_get(CPU_COUNT_ANONPAGES);
201 filepg = cpu_count_get(CPU_COUNT_FILEPAGES);
202 execpg = cpu_count_get(CPU_COUNT_EXECPAGES);
203
204 mutex_enter(&s->lock);
205 t = s->s_active + s->s_inactive + freepg;
206 anonunder = anonpg <= UVM_PCTPARAM_APPLY(&s->s_anonmin, t);
207 fileunder = filepg <= UVM_PCTPARAM_APPLY(&s->s_filemin, t);
208 execunder = execpg <= UVM_PCTPARAM_APPLY(&s->s_execmin, t);
209 anonover = anonpg > UVM_PCTPARAM_APPLY(&s->s_anonmax, t);
210 fileover = filepg > UVM_PCTPARAM_APPLY(&s->s_filemax, t);
211 execover = execpg > UVM_PCTPARAM_APPLY(&s->s_execmax, t);
212 anonreact = anonunder || (!anonover && (fileover || execover));
213 filereact = fileunder || (!fileover && (anonover || execover));
214 execreact = execunder || (!execover && (anonover || fileover));
215 if (filereact && execreact && (anonreact || uvm_swapisfull())) {
216 anonreact = filereact = execreact = false;
217 }
218 ss->ss_anonreact = anonreact;
219 ss->ss_filereact = filereact;
220 ss->ss_execreact = execreact;
221 memset(&ss->ss_marker, 0, sizeof(ss->ss_marker));
222 ss->ss_marker.flags = PG_MARKER;
223 TAILQ_INSERT_HEAD(&pdpol_state.s_inactiveq, &ss->ss_marker, pdqueue);
224 mutex_exit(&s->lock);
225 }
226
227 void
228 uvmpdpol_scanfini(void)
229 {
230 struct uvmpdpol_globalstate *s = &pdpol_state;
231 struct uvmpdpol_scanstate *ss = &pdpol_scanstate;
232
233 mutex_enter(&s->lock);
234 TAILQ_REMOVE(&pdpol_state.s_inactiveq, &ss->ss_marker, pdqueue);
235 mutex_exit(&s->lock);
236 }
237
238 struct vm_page *
239 uvmpdpol_selectvictim(krwlock_t **plock)
240 {
241 struct uvmpdpol_globalstate *s = &pdpol_state;
242 struct uvmpdpol_scanstate *ss = &pdpol_scanstate;
243 struct vm_page *pg;
244 krwlock_t *lock;
245
246 mutex_enter(&s->lock);
247 while (/* CONSTCOND */ 1) {
248 struct vm_anon *anon;
249 struct uvm_object *uobj;
250
251 pg = TAILQ_NEXT(&ss->ss_marker, pdqueue);
252 if (pg == NULL) {
253 break;
254 }
255 KASSERT((pg->flags & PG_MARKER) == 0);
256 uvmexp.pdscans++;
257
258 /*
259 * acquire interlock to stablize page identity.
260 * if we have caught the page in a state of flux
261 * deal with it and retry.
262 */
263 mutex_enter(&pg->interlock);
264 if (uvmpdpol_pagerealize_locked(pg)) {
265 mutex_exit(&pg->interlock);
266 continue;
267 }
268
269 /*
270 * now prepare to move on to the next page.
271 */
272 TAILQ_REMOVE(&pdpol_state.s_inactiveq, &ss->ss_marker,
273 pdqueue);
274 TAILQ_INSERT_AFTER(&pdpol_state.s_inactiveq, pg,
275 &ss->ss_marker, pdqueue);
276
277 /*
278 * enforce the minimum thresholds on different
279 * types of memory usage. if reusing the current
280 * page would reduce that type of usage below its
281 * minimum, reactivate the page instead and move
282 * on to the next page.
283 */
284 anon = pg->uanon;
285 uobj = pg->uobject;
286 if (uobj && UVM_OBJ_IS_VTEXT(uobj) && ss->ss_execreact) {
287 uvmpdpol_pageactivate_locked(pg);
288 mutex_exit(&pg->interlock);
289 PDPOL_EVCNT_INCR(reactexec);
290 continue;
291 }
292 if (uobj && UVM_OBJ_IS_VNODE(uobj) &&
293 !UVM_OBJ_IS_VTEXT(uobj) && ss->ss_filereact) {
294 uvmpdpol_pageactivate_locked(pg);
295 mutex_exit(&pg->interlock);
296 PDPOL_EVCNT_INCR(reactfile);
297 continue;
298 }
299 if ((anon || UVM_OBJ_IS_AOBJ(uobj)) && ss->ss_anonreact) {
300 uvmpdpol_pageactivate_locked(pg);
301 mutex_exit(&pg->interlock);
302 PDPOL_EVCNT_INCR(reactanon);
303 continue;
304 }
305
306 /*
307 * try to lock the object that owns the page.
308 *
309 * with the page interlock held, we can drop s->lock, which
310 * could otherwise serve as a barrier to us getting the
311 * object locked, because the owner of the object's lock may
312 * be blocked on s->lock (i.e. a deadlock).
313 *
314 * whatever happens, uvmpd_trylockowner() will release the
315 * interlock. with the interlock dropped we can then
316 * re-acquire our own lock. the order is:
317 *
318 * object -> pdpol -> interlock.
319 */
320 mutex_exit(&s->lock);
321 lock = uvmpd_trylockowner(pg);
322 /* pg->interlock now released */
323 mutex_enter(&s->lock);
324 if (lock == NULL) {
325 /* didn't get it - try the next page. */
326 continue;
327 }
328
329 /*
330 * move referenced pages back to active queue and skip to
331 * next page.
332 */
333 if (pmap_is_referenced(pg)) {
334 mutex_enter(&pg->interlock);
335 uvmpdpol_pageactivate_locked(pg);
336 mutex_exit(&pg->interlock);
337 uvmexp.pdreact++;
338 rw_exit(lock);
339 continue;
340 }
341
342 /* we have a potential victim. */
343 *plock = lock;
344 break;
345 }
346 mutex_exit(&s->lock);
347 return pg;
348 }
349
350 void
351 uvmpdpol_balancequeue(int swap_shortage)
352 {
353 struct uvmpdpol_globalstate *s = &pdpol_state;
354 int inactive_shortage;
355 struct vm_page *p, marker;
356 krwlock_t *lock;
357
358 /*
359 * we have done the scan to get free pages. now we work on meeting
360 * our inactive target.
361 */
362
363 memset(&marker, 0, sizeof(marker));
364 marker.flags = PG_MARKER;
365
366 mutex_enter(&s->lock);
367 TAILQ_INSERT_HEAD(&pdpol_state.s_activeq, &marker, pdqueue);
368 for (;;) {
369 inactive_shortage =
370 pdpol_state.s_inactarg - pdpol_state.s_inactive;
371 if (inactive_shortage <= 0 && swap_shortage <= 0) {
372 break;
373 }
374 p = TAILQ_NEXT(&marker, pdqueue);
375 if (p == NULL) {
376 break;
377 }
378 KASSERT((p->flags & PG_MARKER) == 0);
379
380 /*
381 * acquire interlock to stablize page identity.
382 * if we have caught the page in a state of flux
383 * deal with it and retry.
384 */
385 mutex_enter(&p->interlock);
386 if (uvmpdpol_pagerealize_locked(p)) {
387 mutex_exit(&p->interlock);
388 continue;
389 }
390
391 /*
392 * now prepare to move on to the next page.
393 */
394 TAILQ_REMOVE(&pdpol_state.s_activeq, &marker, pdqueue);
395 TAILQ_INSERT_AFTER(&pdpol_state.s_activeq, p, &marker,
396 pdqueue);
397
398 /*
399 * try to lock the object that owns the page. see comments
400 * in uvmpdol_selectvictim().
401 */
402 mutex_exit(&s->lock);
403 lock = uvmpd_trylockowner(p);
404 /* p->interlock now released */
405 mutex_enter(&s->lock);
406 if (lock == NULL) {
407 /* didn't get it - try the next page. */
408 continue;
409 }
410
411 /*
412 * if there's a shortage of swap slots, try to free it.
413 */
414 if (swap_shortage > 0 && (p->flags & PG_SWAPBACKED) != 0 &&
415 (p->flags & PG_BUSY) == 0) {
416 if (uvmpd_dropswap(p)) {
417 swap_shortage--;
418 }
419 }
420
421 /*
422 * if there's a shortage of inactive pages, deactivate.
423 */
424 if (inactive_shortage > 0) {
425 pmap_clear_reference(p);
426 mutex_enter(&p->interlock);
427 uvmpdpol_pagedeactivate_locked(p);
428 mutex_exit(&p->interlock);
429 uvmexp.pddeact++;
430 inactive_shortage--;
431 }
432 rw_exit(lock);
433 }
434 TAILQ_REMOVE(&pdpol_state.s_activeq, &marker, pdqueue);
435 mutex_exit(&s->lock);
436 }
437
438 static void
439 uvmpdpol_pagedeactivate_locked(struct vm_page *pg)
440 {
441 struct uvmpdpol_globalstate *s __diagused = &pdpol_state;
442
443 KASSERT(mutex_owned(&s->lock));
444 KASSERT(mutex_owned(&pg->interlock));
445 KASSERT((pg->pqflags & (PQ_INTENT_MASK | PQ_INTENT_SET)) !=
446 (PQ_INTENT_D | PQ_INTENT_SET));
447
448 if (pg->pqflags & PQ_ACTIVE) {
449 TAILQ_REMOVE(&pdpol_state.s_activeq, pg, pdqueue);
450 KASSERT(pdpol_state.s_active > 0);
451 pdpol_state.s_active--;
452 }
453 if ((pg->pqflags & PQ_INACTIVE) == 0) {
454 KASSERT(pg->wire_count == 0);
455 TAILQ_INSERT_TAIL(&pdpol_state.s_inactiveq, pg, pdqueue);
456 pdpol_state.s_inactive++;
457 }
458 pg->pqflags &= ~(PQ_ACTIVE | PQ_INTENT_SET);
459 pg->pqflags |= PQ_INACTIVE;
460 }
461
462 void
463 uvmpdpol_pagedeactivate(struct vm_page *pg)
464 {
465
466 KASSERT(uvm_page_owner_locked_p(pg, false));
467 KASSERT(mutex_owned(&pg->interlock));
468
469 /*
470 * we have to clear the reference bit now, as when it comes time to
471 * realize the intent we won't have the object locked any more.
472 */
473 pmap_clear_reference(pg);
474 uvmpdpol_set_intent(pg, PQ_INTENT_I);
475 }
476
477 static void
478 uvmpdpol_pageactivate_locked(struct vm_page *pg)
479 {
480 struct uvmpdpol_globalstate *s __diagused = &pdpol_state;
481
482 KASSERT(mutex_owned(&s->lock));
483 KASSERT(mutex_owned(&pg->interlock));
484 KASSERT((pg->pqflags & (PQ_INTENT_MASK | PQ_INTENT_SET)) !=
485 (PQ_INTENT_D | PQ_INTENT_SET));
486
487 uvmpdpol_pagedequeue_locked(pg);
488 TAILQ_INSERT_TAIL(&pdpol_state.s_activeq, pg, pdqueue);
489 pdpol_state.s_active++;
490 pg->pqflags &= ~(PQ_INACTIVE | PQ_INTENT_SET);
491 pg->pqflags |= PQ_ACTIVE;
492 }
493
494 void
495 uvmpdpol_pageactivate(struct vm_page *pg)
496 {
497
498 KASSERT(uvm_page_owner_locked_p(pg, false));
499 KASSERT(mutex_owned(&pg->interlock));
500
501 uvmpdpol_set_intent(pg, PQ_INTENT_A);
502 }
503
504 static void
505 uvmpdpol_pagedequeue_locked(struct vm_page *pg)
506 {
507 struct uvmpdpol_globalstate *s __diagused = &pdpol_state;
508
509 KASSERT(mutex_owned(&s->lock));
510 KASSERT(mutex_owned(&pg->interlock));
511
512 if (pg->pqflags & PQ_ACTIVE) {
513 TAILQ_REMOVE(&pdpol_state.s_activeq, pg, pdqueue);
514 KASSERT((pg->pqflags & PQ_INACTIVE) == 0);
515 KASSERT(pdpol_state.s_active > 0);
516 pdpol_state.s_active--;
517 } else if (pg->pqflags & PQ_INACTIVE) {
518 TAILQ_REMOVE(&pdpol_state.s_inactiveq, pg, pdqueue);
519 KASSERT(pdpol_state.s_inactive > 0);
520 pdpol_state.s_inactive--;
521 }
522 pg->pqflags &= ~(PQ_ACTIVE | PQ_INACTIVE | PQ_INTENT_SET);
523 }
524
525 void
526 uvmpdpol_pagedequeue(struct vm_page *pg)
527 {
528
529 KASSERT(uvm_page_owner_locked_p(pg, true));
530 KASSERT(mutex_owned(&pg->interlock));
531
532 uvmpdpol_set_intent(pg, PQ_INTENT_D);
533 }
534
535 void
536 uvmpdpol_pageenqueue(struct vm_page *pg)
537 {
538
539 KASSERT(uvm_page_owner_locked_p(pg, false));
540 KASSERT(mutex_owned(&pg->interlock));
541
542 uvmpdpol_set_intent(pg, PQ_INTENT_E);
543 }
544
545 void
546 uvmpdpol_anfree(struct vm_anon *an)
547 {
548 }
549
550 bool
551 uvmpdpol_pageisqueued_p(struct vm_page *pg)
552 {
553 uint32_t pqflags;
554
555 /*
556 * if there's an intent set, we have to consider it. otherwise,
557 * return the actual state. we may be called unlocked for the
558 * purpose of assertions, which is safe due to the page lifecycle.
559 */
560 pqflags = atomic_load_relaxed(&pg->pqflags);
561 if ((pqflags & PQ_INTENT_SET) != 0) {
562 return (pqflags & PQ_INTENT_MASK) != PQ_INTENT_D;
563 } else {
564 return (pqflags & (PQ_ACTIVE | PQ_INACTIVE)) != 0;
565 }
566 }
567
568 bool
569 uvmpdpol_pageactivate_p(struct vm_page *pg)
570 {
571 uint32_t pqflags;
572
573 /* consider intent in preference to actual state. */
574 pqflags = atomic_load_relaxed(&pg->pqflags);
575 if ((pqflags & PQ_INTENT_SET) != 0) {
576 pqflags &= PQ_INTENT_MASK;
577 return pqflags != PQ_INTENT_A && pqflags != PQ_INTENT_E;
578 } else {
579 /*
580 * TODO: Enabling this may be too much of a big hammer,
581 * since we do get useful information from activations.
582 * Think about it more and maybe come up with a heuristic
583 * or something.
584 *
585 * return (pqflags & PQ_ACTIVE) == 0;
586 */
587 return true;
588 }
589 }
590
591 void
592 uvmpdpol_estimatepageable(int *active, int *inactive)
593 {
594 struct uvmpdpol_globalstate *s = &pdpol_state;
595
596 /*
597 * Don't take any locks here. This can be called from DDB, and in
598 * any case the numbers are stale the instant the lock is dropped,
599 * so it just doesn't matter.
600 */
601 if (active) {
602 *active = s->s_active;
603 }
604 if (inactive) {
605 *inactive = s->s_inactive;
606 }
607 }
608
609 #if !defined(PDSIM)
610 static int
611 min_check(struct uvm_pctparam *pct, int t)
612 {
613 struct uvmpdpol_globalstate *s = &pdpol_state;
614 int total = t;
615
616 if (pct != &s->s_anonmin) {
617 total += uvm_pctparam_get(&s->s_anonmin);
618 }
619 if (pct != &s->s_filemin) {
620 total += uvm_pctparam_get(&s->s_filemin);
621 }
622 if (pct != &s->s_execmin) {
623 total += uvm_pctparam_get(&s->s_execmin);
624 }
625 if (total > 95) {
626 return EINVAL;
627 }
628 return 0;
629 }
630 #endif /* !defined(PDSIM) */
631
632 void
633 uvmpdpol_init(void)
634 {
635 struct uvmpdpol_globalstate *s = &pdpol_state;
636
637 mutex_init(&s->lock, MUTEX_DEFAULT, IPL_NONE);
638 TAILQ_INIT(&s->s_activeq);
639 TAILQ_INIT(&s->s_inactiveq);
640 uvm_pctparam_init(&s->s_inactivepct, CLOCK_INACTIVEPCT, NULL);
641 uvm_pctparam_init(&s->s_anonmin, 10, min_check);
642 uvm_pctparam_init(&s->s_filemin, 10, min_check);
643 uvm_pctparam_init(&s->s_execmin, 5, min_check);
644 uvm_pctparam_init(&s->s_anonmax, 80, NULL);
645 uvm_pctparam_init(&s->s_filemax, 50, NULL);
646 uvm_pctparam_init(&s->s_execmax, 30, NULL);
647 }
648
649 void
650 uvmpdpol_init_cpu(struct uvm_cpu *ucpu)
651 {
652
653 ucpu->pdq =
654 kmem_alloc(CLOCK_PDQ_SIZE * sizeof(struct vm_page *), KM_SLEEP);
655 ucpu->pdqhead = CLOCK_PDQ_SIZE;
656 ucpu->pdqtail = CLOCK_PDQ_SIZE;
657 }
658
659 void
660 uvmpdpol_reinit(void)
661 {
662 }
663
664 bool
665 uvmpdpol_needsscan_p(void)
666 {
667
668 /*
669 * this must be an unlocked check: can be called from interrupt.
670 */
671 return pdpol_state.s_inactive < pdpol_state.s_inactarg;
672 }
673
674 void
675 uvmpdpol_tune(void)
676 {
677 struct uvmpdpol_globalstate *s = &pdpol_state;
678
679 mutex_enter(&s->lock);
680 clock_tune();
681 mutex_exit(&s->lock);
682 }
683
684 /*
685 * uvmpdpol_pagerealize_locked: take the intended state set on a page and
686 * make it real. return true if any work was done.
687 */
688 static bool
689 uvmpdpol_pagerealize_locked(struct vm_page *pg)
690 {
691 struct uvmpdpol_globalstate *s __diagused = &pdpol_state;
692
693 KASSERT(mutex_owned(&s->lock));
694 KASSERT(mutex_owned(&pg->interlock));
695
696 switch (pg->pqflags & (PQ_INTENT_MASK | PQ_INTENT_SET)) {
697 case PQ_INTENT_A | PQ_INTENT_SET:
698 case PQ_INTENT_E | PQ_INTENT_SET:
699 uvmpdpol_pageactivate_locked(pg);
700 return true;
701 case PQ_INTENT_I | PQ_INTENT_SET:
702 uvmpdpol_pagedeactivate_locked(pg);
703 return true;
704 case PQ_INTENT_D | PQ_INTENT_SET:
705 uvmpdpol_pagedequeue_locked(pg);
706 return true;
707 default:
708 return false;
709 }
710 }
711
712 /*
713 * uvmpdpol_flush: return the current uvm_cpu with all of its pending
714 * updates flushed to the global queues. this routine may block, and
715 * so can switch cpu. the idea is to empty to queue on whatever cpu
716 * we finally end up on.
717 */
718 static struct uvm_cpu *
719 uvmpdpol_flush(void)
720 {
721 struct uvmpdpol_globalstate *s __diagused = &pdpol_state;
722 struct uvm_cpu *ucpu;
723 struct vm_page *pg;
724
725 KASSERT(kpreempt_disabled());
726
727 mutex_enter(&s->lock);
728 for (;;) {
729 /*
730 * prefer scanning forwards (even though mutex_enter() is
731 * serializing) so as to not defeat any prefetch logic in
732 * the CPU. that means elsewhere enqueuing backwards, like
733 * a stack, but not so important there as pages are being
734 * added singularly.
735 *
736 * prefetch the next "struct vm_page" while working on the
737 * current one. this has a measurable and very positive
738 * effect in reducing the amount of time spent here under
739 * the global lock.
740 */
741 ucpu = curcpu()->ci_data.cpu_uvm;
742 KASSERT(ucpu->pdqhead <= ucpu->pdqtail);
743 if (__predict_false(ucpu->pdqhead == ucpu->pdqtail)) {
744 break;
745 }
746 pg = ucpu->pdq[ucpu->pdqhead++];
747 if (__predict_true(ucpu->pdqhead != ucpu->pdqtail)) {
748 __builtin_prefetch(ucpu->pdq[ucpu->pdqhead]);
749 }
750 mutex_enter(&pg->interlock);
751 pg->pqflags &= ~PQ_INTENT_QUEUED;
752 (void)uvmpdpol_pagerealize_locked(pg);
753 mutex_exit(&pg->interlock);
754 }
755 mutex_exit(&s->lock);
756 return ucpu;
757 }
758
759 /*
760 * uvmpdpol_pagerealize: realize any intent set on the page. in this
761 * implementation, that means putting the page on a per-CPU queue to be
762 * dealt with later.
763 */
764 void
765 uvmpdpol_pagerealize(struct vm_page *pg)
766 {
767 struct uvm_cpu *ucpu;
768
769 /*
770 * drain the per per-CPU queue if full, then enter the page.
771 */
772 kpreempt_disable();
773 ucpu = curcpu()->ci_data.cpu_uvm;
774 if (__predict_false(ucpu->pdqhead == 0)) {
775 ucpu = uvmpdpol_flush();
776 }
777 ucpu->pdq[--(ucpu->pdqhead)] = pg;
778 kpreempt_enable();
779 }
780
781 /*
782 * uvmpdpol_idle: called from the system idle loop. periodically purge any
783 * pending updates back to the global queues.
784 */
785 void
786 uvmpdpol_idle(struct uvm_cpu *ucpu)
787 {
788 struct uvmpdpol_globalstate *s = &pdpol_state;
789 struct vm_page *pg;
790
791 KASSERT(kpreempt_disabled());
792
793 /*
794 * if no pages in the queue, we have nothing to do.
795 */
796 if (ucpu->pdqhead == ucpu->pdqtail) {
797 ucpu->pdqtime = getticks();
798 return;
799 }
800
801 /*
802 * don't do this more than ~8 times a second as it would needlessly
803 * exert pressure.
804 */
805 if (getticks() - ucpu->pdqtime < (hz >> 3)) {
806 return;
807 }
808
809 /*
810 * the idle LWP can't block, so we have to try for the lock. if we
811 * get it, purge the per-CPU pending update queue. continually
812 * check for a pending resched: in that case exit immediately.
813 */
814 if (mutex_tryenter(&s->lock)) {
815 while (ucpu->pdqhead != ucpu->pdqtail) {
816 pg = ucpu->pdq[ucpu->pdqhead];
817 if (!mutex_tryenter(&pg->interlock)) {
818 break;
819 }
820 ucpu->pdqhead++;
821 pg->pqflags &= ~PQ_INTENT_QUEUED;
822 (void)uvmpdpol_pagerealize_locked(pg);
823 mutex_exit(&pg->interlock);
824 if (curcpu()->ci_want_resched) {
825 break;
826 }
827 }
828 if (ucpu->pdqhead == ucpu->pdqtail) {
829 ucpu->pdqtime = getticks();
830 }
831 mutex_exit(&s->lock);
832 }
833 }
834
835 #if !defined(PDSIM)
836
837 #include <sys/sysctl.h> /* XXX SYSCTL_DESCR */
838
839 void
840 uvmpdpol_sysctlsetup(void)
841 {
842 struct uvmpdpol_globalstate *s = &pdpol_state;
843
844 uvm_pctparam_createsysctlnode(&s->s_anonmin, "anonmin",
845 SYSCTL_DESCR("Percentage of physical memory reserved "
846 "for anonymous application data"));
847 uvm_pctparam_createsysctlnode(&s->s_filemin, "filemin",
848 SYSCTL_DESCR("Percentage of physical memory reserved "
849 "for cached file data"));
850 uvm_pctparam_createsysctlnode(&s->s_execmin, "execmin",
851 SYSCTL_DESCR("Percentage of physical memory reserved "
852 "for cached executable data"));
853
854 uvm_pctparam_createsysctlnode(&s->s_anonmax, "anonmax",
855 SYSCTL_DESCR("Percentage of physical memory which will "
856 "be reclaimed from other usage for "
857 "anonymous application data"));
858 uvm_pctparam_createsysctlnode(&s->s_filemax, "filemax",
859 SYSCTL_DESCR("Percentage of physical memory which will "
860 "be reclaimed from other usage for cached "
861 "file data"));
862 uvm_pctparam_createsysctlnode(&s->s_execmax, "execmax",
863 SYSCTL_DESCR("Percentage of physical memory which will "
864 "be reclaimed from other usage for cached "
865 "executable data"));
866
867 uvm_pctparam_createsysctlnode(&s->s_inactivepct, "inactivepct",
868 SYSCTL_DESCR("Percentage of inactive queue of "
869 "the entire (active + inactive) queue"));
870 }
871
872 #endif /* !defined(PDSIM) */
873
874 #if defined(PDSIM)
875 void
876 pdsim_dump(const char *id)
877 {
878 #if defined(DEBUG)
879 /* XXX */
880 #endif /* defined(DEBUG) */
881 }
882 #endif /* defined(PDSIM) */
883