uvm_pdpolicy_clock.c revision 1.23 1 /* $NetBSD: uvm_pdpolicy_clock.c,v 1.23 2019/12/27 13:13:17 ad Exp $ */
2 /* NetBSD: uvm_pdaemon.c,v 1.72 2006/01/05 10:47:33 yamt Exp $ */
3
4 /*
5 * Copyright (c) 1997 Charles D. Cranor and Washington University.
6 * Copyright (c) 1991, 1993, The Regents of the University of California.
7 *
8 * All rights reserved.
9 *
10 * This code is derived from software contributed to Berkeley by
11 * The Mach Operating System project at Carnegie-Mellon University.
12 *
13 * Redistribution and use in source and binary forms, with or without
14 * modification, are permitted provided that the following conditions
15 * are met:
16 * 1. Redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer.
18 * 2. Redistributions in binary form must reproduce the above copyright
19 * notice, this list of conditions and the following disclaimer in the
20 * documentation and/or other materials provided with the distribution.
21 * 3. Neither the name of the University nor the names of its contributors
22 * may be used to endorse or promote products derived from this software
23 * without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35 * SUCH DAMAGE.
36 *
37 * @(#)vm_pageout.c 8.5 (Berkeley) 2/14/94
38 * from: Id: uvm_pdaemon.c,v 1.1.2.32 1998/02/06 05:26:30 chs Exp
39 *
40 *
41 * Copyright (c) 1987, 1990 Carnegie-Mellon University.
42 * All rights reserved.
43 *
44 * Permission to use, copy, modify and distribute this software and
45 * its documentation is hereby granted, provided that both the copyright
46 * notice and this permission notice appear in all copies of the
47 * software, derivative works or modified versions, and any portions
48 * thereof, and that both notices appear in supporting documentation.
49 *
50 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
51 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
52 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
53 *
54 * Carnegie Mellon requests users of this software to return to
55 *
56 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
57 * School of Computer Science
58 * Carnegie Mellon University
59 * Pittsburgh PA 15213-3890
60 *
61 * any improvements or extensions that they make and grant Carnegie the
62 * rights to redistribute these changes.
63 */
64
65 #if defined(PDSIM)
66
67 #include "pdsim.h"
68
69 #else /* defined(PDSIM) */
70
71 #include <sys/cdefs.h>
72 __KERNEL_RCSID(0, "$NetBSD: uvm_pdpolicy_clock.c,v 1.23 2019/12/27 13:13:17 ad Exp $");
73
74 #include <sys/param.h>
75 #include <sys/proc.h>
76 #include <sys/systm.h>
77 #include <sys/kernel.h>
78
79 #include <uvm/uvm.h>
80 #include <uvm/uvm_pdpolicy.h>
81 #include <uvm/uvm_pdpolicy_impl.h>
82 #include <uvm/uvm_stat.h>
83
84 #endif /* defined(PDSIM) */
85
86 #define PQ_TIME 0xfffffffc /* time of last activation */
87 #define PQ_INACTIVE 0x00000001 /* page is in inactive list */
88 #define PQ_ACTIVE 0x00000002 /* page is in active list */
89
90 #if !defined(CLOCK_INACTIVEPCT)
91 #define CLOCK_INACTIVEPCT 33
92 #endif /* !defined(CLOCK_INACTIVEPCT) */
93
94 struct uvmpdpol_globalstate {
95 kmutex_t lock; /* lock on state */
96 /* <= compiler pads here */
97 struct pglist s_activeq /* allocated pages, in use */
98 __aligned(COHERENCY_UNIT);
99 struct pglist s_inactiveq; /* pages between the clock hands */
100 int s_active;
101 int s_inactive;
102 int s_inactarg;
103 struct uvm_pctparam s_anonmin;
104 struct uvm_pctparam s_filemin;
105 struct uvm_pctparam s_execmin;
106 struct uvm_pctparam s_anonmax;
107 struct uvm_pctparam s_filemax;
108 struct uvm_pctparam s_execmax;
109 struct uvm_pctparam s_inactivepct;
110 };
111
112 struct uvmpdpol_scanstate {
113 bool ss_first;
114 bool ss_anonreact, ss_filereact, ss_execreact;
115 struct vm_page *ss_nextpg;
116 };
117
118 static void uvmpdpol_pageactivate_locked(struct vm_page *);
119 static void uvmpdpol_pagedeactivate_locked(struct vm_page *);
120 static void uvmpdpol_pagedequeue_locked(struct vm_page *);
121
122 static struct uvmpdpol_globalstate pdpol_state __cacheline_aligned;
123 static struct uvmpdpol_scanstate pdpol_scanstate;
124
125 PDPOL_EVCNT_DEFINE(reactexec)
126 PDPOL_EVCNT_DEFINE(reactfile)
127 PDPOL_EVCNT_DEFINE(reactanon)
128
129 static void
130 clock_tune(void)
131 {
132 struct uvmpdpol_globalstate *s = &pdpol_state;
133
134 s->s_inactarg = UVM_PCTPARAM_APPLY(&s->s_inactivepct,
135 s->s_active + s->s_inactive);
136 if (s->s_inactarg <= uvmexp.freetarg) {
137 s->s_inactarg = uvmexp.freetarg + 1;
138 }
139 }
140
141 void
142 uvmpdpol_scaninit(void)
143 {
144 struct uvmpdpol_globalstate *s = &pdpol_state;
145 struct uvmpdpol_scanstate *ss = &pdpol_scanstate;
146 int t;
147 bool anonunder, fileunder, execunder;
148 bool anonover, fileover, execover;
149 bool anonreact, filereact, execreact;
150 int64_t freepg, anonpg, filepg, execpg;
151
152 /*
153 * decide which types of pages we want to reactivate instead of freeing
154 * to keep usage within the minimum and maximum usage limits.
155 */
156
157 cpu_count_sync_all();
158 freepg = uvm_free();
159 anonpg = cpu_count_get(CPU_COUNT_ANONPAGES);
160 filepg = cpu_count_get(CPU_COUNT_FILEPAGES);
161 execpg = cpu_count_get(CPU_COUNT_EXECPAGES);
162
163 mutex_enter(&s->lock);
164 t = s->s_active + s->s_inactive + freepg;
165 anonunder = anonpg <= UVM_PCTPARAM_APPLY(&s->s_anonmin, t);
166 fileunder = filepg <= UVM_PCTPARAM_APPLY(&s->s_filemin, t);
167 execunder = execpg <= UVM_PCTPARAM_APPLY(&s->s_execmin, t);
168 anonover = anonpg > UVM_PCTPARAM_APPLY(&s->s_anonmax, t);
169 fileover = filepg > UVM_PCTPARAM_APPLY(&s->s_filemax, t);
170 execover = execpg > UVM_PCTPARAM_APPLY(&s->s_execmax, t);
171 anonreact = anonunder || (!anonover && (fileover || execover));
172 filereact = fileunder || (!fileover && (anonover || execover));
173 execreact = execunder || (!execover && (anonover || fileover));
174 if (filereact && execreact && (anonreact || uvm_swapisfull())) {
175 anonreact = filereact = execreact = false;
176 }
177 ss->ss_anonreact = anonreact;
178 ss->ss_filereact = filereact;
179 ss->ss_execreact = execreact;
180
181 ss->ss_first = true;
182 mutex_exit(&s->lock);
183 }
184
185 struct vm_page *
186 uvmpdpol_selectvictim(kmutex_t **plock)
187 {
188 struct uvmpdpol_globalstate *s = &pdpol_state;
189 struct uvmpdpol_scanstate *ss = &pdpol_scanstate;
190 struct vm_page *pg;
191 kmutex_t *lock;
192
193 mutex_enter(&s->lock);
194 while (/* CONSTCOND */ 1) {
195 struct vm_anon *anon;
196 struct uvm_object *uobj;
197
198 if (ss->ss_first) {
199 pg = TAILQ_FIRST(&pdpol_state.s_inactiveq);
200 ss->ss_first = false;
201 } else {
202 pg = ss->ss_nextpg;
203 if (pg != NULL && (pg->pqflags & PQ_INACTIVE) == 0) {
204 pg = TAILQ_FIRST(&pdpol_state.s_inactiveq);
205 }
206 }
207 if (pg == NULL) {
208 break;
209 }
210 ss->ss_nextpg = TAILQ_NEXT(pg, pdqueue);
211 uvmexp.pdscans++;
212
213 /*
214 * acquire interlock to stablize page identity.
215 * if we have caught the page in a state of flux
216 * and it should be dequeued, do it now and then
217 * move on to the next.
218 */
219 mutex_enter(&pg->interlock);
220 if ((pg->uobject == NULL && pg->uanon == NULL) ||
221 pg->wire_count > 0) {
222 mutex_exit(&pg->interlock);
223 uvmpdpol_pagedequeue_locked(pg);
224 continue;
225 }
226
227 /*
228 * enforce the minimum thresholds on different
229 * types of memory usage. if reusing the current
230 * page would reduce that type of usage below its
231 * minimum, reactivate the page instead and move
232 * on to the next page.
233 */
234 anon = pg->uanon;
235 uobj = pg->uobject;
236 if (uobj && UVM_OBJ_IS_VTEXT(uobj) && ss->ss_execreact) {
237 mutex_exit(&pg->interlock);
238 uvmpdpol_pageactivate_locked(pg);
239 PDPOL_EVCNT_INCR(reactexec);
240 continue;
241 }
242 if (uobj && UVM_OBJ_IS_VNODE(uobj) &&
243 !UVM_OBJ_IS_VTEXT(uobj) && ss->ss_filereact) {
244 mutex_exit(&pg->interlock);
245 uvmpdpol_pageactivate_locked(pg);
246 PDPOL_EVCNT_INCR(reactfile);
247 continue;
248 }
249 if ((anon || UVM_OBJ_IS_AOBJ(uobj)) && ss->ss_anonreact) {
250 mutex_exit(&pg->interlock);
251 uvmpdpol_pageactivate_locked(pg);
252 PDPOL_EVCNT_INCR(reactanon);
253 continue;
254 }
255
256 /*
257 * try to lock the object that owns the page.
258 *
259 * with the page interlock held, we can drop s->lock, which
260 * could otherwise serve as a barrier to us getting the
261 * object locked, because the owner of the object's lock may
262 * be blocked on s->lock (i.e. a deadlock).
263 *
264 * whatever happens, uvmpd_trylockowner() will release the
265 * interlock. with the interlock dropped we can then
266 * re-acquire our own lock. the order is:
267 *
268 * object -> pdpol -> interlock.
269 */
270 mutex_exit(&s->lock);
271 lock = uvmpd_trylockowner(pg);
272 /* pg->interlock now released */
273 mutex_enter(&s->lock);
274 if (lock == NULL) {
275 /* didn't get it - try the next page. */
276 continue;
277 }
278
279 /*
280 * move referenced pages back to active queue and skip to
281 * next page.
282 */
283 if (pmap_is_referenced(pg)) {
284 uvmpdpol_pageactivate_locked(pg);
285 uvmexp.pdreact++;
286 mutex_exit(lock);
287 continue;
288 }
289
290 /* we have a potential victim. */
291 *plock = lock;
292 break;
293 }
294 mutex_exit(&s->lock);
295 return pg;
296 }
297
298 void
299 uvmpdpol_balancequeue(int swap_shortage)
300 {
301 struct uvmpdpol_globalstate *s = &pdpol_state;
302 int inactive_shortage;
303 struct vm_page *p, *nextpg;
304 kmutex_t *lock;
305
306 /*
307 * we have done the scan to get free pages. now we work on meeting
308 * our inactive target.
309 */
310
311 mutex_enter(&s->lock);
312 inactive_shortage = pdpol_state.s_inactarg - pdpol_state.s_inactive;
313 for (p = TAILQ_FIRST(&pdpol_state.s_activeq);
314 p != NULL && (inactive_shortage > 0 || swap_shortage > 0);
315 p = nextpg) {
316 nextpg = TAILQ_NEXT(p, pdqueue);
317
318 /*
319 * if there's a shortage of swap slots, try to free it.
320 */
321
322 if (swap_shortage > 0 && (p->flags & PG_SWAPBACKED) != 0) {
323 mutex_enter(&p->interlock);
324 mutex_exit(&s->lock);
325 if (uvmpd_trydropswap(p)) {
326 swap_shortage--;
327 }
328 /* p->interlock now released */
329 mutex_enter(&s->lock);
330 }
331
332 /*
333 * if there's a shortage of inactive pages, deactivate.
334 */
335
336 if (inactive_shortage <= 0) {
337 continue;
338 }
339
340 /*
341 * acquire interlock to stablize page identity.
342 * if we have caught the page in a state of flux
343 * and it should be dequeued, do it now and then
344 * move on to the next.
345 */
346 mutex_enter(&p->interlock);
347 if ((p->uobject == NULL && p->uanon == NULL) ||
348 p->wire_count > 0) {
349 mutex_exit(&p->interlock);
350 uvmpdpol_pagedequeue_locked(p);
351 continue;
352 }
353 mutex_exit(&s->lock);
354 lock = uvmpd_trylockowner(p);
355 /* p->interlock now released */
356 mutex_enter(&s->lock);
357 if (lock != NULL) {
358 uvmpdpol_pagedeactivate_locked(p);
359 uvmexp.pddeact++;
360 inactive_shortage--;
361 mutex_exit(lock);
362 }
363 }
364 mutex_exit(&s->lock);
365 }
366
367 static void
368 uvmpdpol_pagedeactivate_locked(struct vm_page *pg)
369 {
370
371 KASSERT(uvm_page_locked_p(pg));
372
373 if (pg->pqflags & PQ_ACTIVE) {
374 TAILQ_REMOVE(&pdpol_state.s_activeq, pg, pdqueue);
375 pg->pqflags &= ~(PQ_ACTIVE | PQ_TIME);
376 KASSERT(pdpol_state.s_active > 0);
377 pdpol_state.s_active--;
378 }
379 if ((pg->pqflags & PQ_INACTIVE) == 0) {
380 KASSERT(pg->wire_count == 0);
381 pmap_clear_reference(pg);
382 TAILQ_INSERT_TAIL(&pdpol_state.s_inactiveq, pg, pdqueue);
383 pg->pqflags |= PQ_INACTIVE;
384 pdpol_state.s_inactive++;
385 }
386 }
387
388 void
389 uvmpdpol_pagedeactivate(struct vm_page *pg)
390 {
391 struct uvmpdpol_globalstate *s = &pdpol_state;
392
393 mutex_enter(&s->lock);
394 uvmpdpol_pagedeactivate_locked(pg);
395 mutex_exit(&s->lock);
396 }
397
398 static void
399 uvmpdpol_pageactivate_locked(struct vm_page *pg)
400 {
401
402 uvmpdpol_pagedequeue_locked(pg);
403 TAILQ_INSERT_TAIL(&pdpol_state.s_activeq, pg, pdqueue);
404 pg->pqflags = PQ_ACTIVE | (hardclock_ticks & PQ_TIME);
405 pdpol_state.s_active++;
406 }
407
408 void
409 uvmpdpol_pageactivate(struct vm_page *pg)
410 {
411 struct uvmpdpol_globalstate *s = &pdpol_state;
412
413 /* Safety: PQ_ACTIVE clear also tells us if it is not enqueued. */
414 if ((pg->pqflags & PQ_ACTIVE) == 0 ||
415 ((hardclock_ticks & PQ_TIME) - (pg->pqflags & PQ_TIME)) >= hz) {
416 mutex_enter(&s->lock);
417 uvmpdpol_pageactivate_locked(pg);
418 mutex_exit(&s->lock);
419 }
420 }
421
422 static void
423 uvmpdpol_pagedequeue_locked(struct vm_page *pg)
424 {
425
426 if (pg->pqflags & PQ_ACTIVE) {
427 TAILQ_REMOVE(&pdpol_state.s_activeq, pg, pdqueue);
428 pg->pqflags &= ~(PQ_ACTIVE | PQ_TIME);
429 KASSERT(pdpol_state.s_active > 0);
430 pdpol_state.s_active--;
431 } else if (pg->pqflags & PQ_INACTIVE) {
432 TAILQ_REMOVE(&pdpol_state.s_inactiveq, pg, pdqueue);
433 pg->pqflags &= ~PQ_INACTIVE;
434 KASSERT(pdpol_state.s_inactive > 0);
435 pdpol_state.s_inactive--;
436 }
437 }
438
439 void
440 uvmpdpol_pagedequeue(struct vm_page *pg)
441 {
442 struct uvmpdpol_globalstate *s = &pdpol_state;
443
444 mutex_enter(&s->lock);
445 uvmpdpol_pagedequeue_locked(pg);
446 mutex_exit(&s->lock);
447 }
448
449 void
450 uvmpdpol_pageenqueue(struct vm_page *pg)
451 {
452 struct uvmpdpol_globalstate *s = &pdpol_state;
453
454 mutex_enter(&s->lock);
455 uvmpdpol_pageactivate_locked(pg);
456 mutex_exit(&s->lock);
457 }
458
459 void
460 uvmpdpol_anfree(struct vm_anon *an)
461 {
462 }
463
464 bool
465 uvmpdpol_pageisqueued_p(struct vm_page *pg)
466 {
467
468 /* Safe to test unlocked due to page life-cycle. */
469 return (pg->pqflags & (PQ_ACTIVE | PQ_INACTIVE)) != 0;
470 }
471
472 void
473 uvmpdpol_estimatepageable(int *active, int *inactive)
474 {
475 struct uvmpdpol_globalstate *s = &pdpol_state;
476
477 mutex_enter(&s->lock);
478 if (active) {
479 *active = pdpol_state.s_active;
480 }
481 if (inactive) {
482 *inactive = pdpol_state.s_inactive;
483 }
484 mutex_exit(&s->lock);
485 }
486
487 #if !defined(PDSIM)
488 static int
489 min_check(struct uvm_pctparam *pct, int t)
490 {
491 struct uvmpdpol_globalstate *s = &pdpol_state;
492 int total = t;
493
494 if (pct != &s->s_anonmin) {
495 total += uvm_pctparam_get(&s->s_anonmin);
496 }
497 if (pct != &s->s_filemin) {
498 total += uvm_pctparam_get(&s->s_filemin);
499 }
500 if (pct != &s->s_execmin) {
501 total += uvm_pctparam_get(&s->s_execmin);
502 }
503 if (total > 95) {
504 return EINVAL;
505 }
506 return 0;
507 }
508 #endif /* !defined(PDSIM) */
509
510 void
511 uvmpdpol_init(void)
512 {
513 struct uvmpdpol_globalstate *s = &pdpol_state;
514
515 mutex_init(&s->lock, MUTEX_DEFAULT, IPL_NONE);
516 TAILQ_INIT(&s->s_activeq);
517 TAILQ_INIT(&s->s_inactiveq);
518 uvm_pctparam_init(&s->s_inactivepct, CLOCK_INACTIVEPCT, NULL);
519 uvm_pctparam_init(&s->s_anonmin, 10, min_check);
520 uvm_pctparam_init(&s->s_filemin, 10, min_check);
521 uvm_pctparam_init(&s->s_execmin, 5, min_check);
522 uvm_pctparam_init(&s->s_anonmax, 80, NULL);
523 uvm_pctparam_init(&s->s_filemax, 50, NULL);
524 uvm_pctparam_init(&s->s_execmax, 30, NULL);
525 }
526
527 void
528 uvmpdpol_reinit(void)
529 {
530 }
531
532 bool
533 uvmpdpol_needsscan_p(void)
534 {
535
536 /* This must be an unlocked check: can be called from interrupt. */
537 return pdpol_state.s_inactive < pdpol_state.s_inactarg;
538 }
539
540 void
541 uvmpdpol_tune(void)
542 {
543 struct uvmpdpol_globalstate *s = &pdpol_state;
544
545 mutex_enter(&s->lock);
546 clock_tune();
547 mutex_exit(&s->lock);
548 }
549
550 #if !defined(PDSIM)
551
552 #include <sys/sysctl.h> /* XXX SYSCTL_DESCR */
553
554 void
555 uvmpdpol_sysctlsetup(void)
556 {
557 struct uvmpdpol_globalstate *s = &pdpol_state;
558
559 uvm_pctparam_createsysctlnode(&s->s_anonmin, "anonmin",
560 SYSCTL_DESCR("Percentage of physical memory reserved "
561 "for anonymous application data"));
562 uvm_pctparam_createsysctlnode(&s->s_filemin, "filemin",
563 SYSCTL_DESCR("Percentage of physical memory reserved "
564 "for cached file data"));
565 uvm_pctparam_createsysctlnode(&s->s_execmin, "execmin",
566 SYSCTL_DESCR("Percentage of physical memory reserved "
567 "for cached executable data"));
568
569 uvm_pctparam_createsysctlnode(&s->s_anonmax, "anonmax",
570 SYSCTL_DESCR("Percentage of physical memory which will "
571 "be reclaimed from other usage for "
572 "anonymous application data"));
573 uvm_pctparam_createsysctlnode(&s->s_filemax, "filemax",
574 SYSCTL_DESCR("Percentage of physical memory which will "
575 "be reclaimed from other usage for cached "
576 "file data"));
577 uvm_pctparam_createsysctlnode(&s->s_execmax, "execmax",
578 SYSCTL_DESCR("Percentage of physical memory which will "
579 "be reclaimed from other usage for cached "
580 "executable data"));
581
582 uvm_pctparam_createsysctlnode(&s->s_inactivepct, "inactivepct",
583 SYSCTL_DESCR("Percentage of inactive queue of "
584 "the entire (active + inactive) queue"));
585 }
586
587 #endif /* !defined(PDSIM) */
588
589 #if defined(PDSIM)
590 void
591 pdsim_dump(const char *id)
592 {
593 #if defined(DEBUG)
594 /* XXX */
595 #endif /* defined(DEBUG) */
596 }
597 #endif /* defined(PDSIM) */
598