uvm_pdpolicy_clock.c revision 1.19 1 /* $NetBSD: uvm_pdpolicy_clock.c,v 1.19 2019/12/16 19:18:26 ad Exp $ */
2 /* NetBSD: uvm_pdaemon.c,v 1.72 2006/01/05 10:47:33 yamt Exp $ */
3
4 /*
5 * Copyright (c) 1997 Charles D. Cranor and Washington University.
6 * Copyright (c) 1991, 1993, The Regents of the University of California.
7 *
8 * All rights reserved.
9 *
10 * This code is derived from software contributed to Berkeley by
11 * The Mach Operating System project at Carnegie-Mellon University.
12 *
13 * Redistribution and use in source and binary forms, with or without
14 * modification, are permitted provided that the following conditions
15 * are met:
16 * 1. Redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer.
18 * 2. Redistributions in binary form must reproduce the above copyright
19 * notice, this list of conditions and the following disclaimer in the
20 * documentation and/or other materials provided with the distribution.
21 * 3. Neither the name of the University nor the names of its contributors
22 * may be used to endorse or promote products derived from this software
23 * without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35 * SUCH DAMAGE.
36 *
37 * @(#)vm_pageout.c 8.5 (Berkeley) 2/14/94
38 * from: Id: uvm_pdaemon.c,v 1.1.2.32 1998/02/06 05:26:30 chs Exp
39 *
40 *
41 * Copyright (c) 1987, 1990 Carnegie-Mellon University.
42 * All rights reserved.
43 *
44 * Permission to use, copy, modify and distribute this software and
45 * its documentation is hereby granted, provided that both the copyright
46 * notice and this permission notice appear in all copies of the
47 * software, derivative works or modified versions, and any portions
48 * thereof, and that both notices appear in supporting documentation.
49 *
50 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
51 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
52 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
53 *
54 * Carnegie Mellon requests users of this software to return to
55 *
56 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
57 * School of Computer Science
58 * Carnegie Mellon University
59 * Pittsburgh PA 15213-3890
60 *
61 * any improvements or extensions that they make and grant Carnegie the
62 * rights to redistribute these changes.
63 */
64
65 #if defined(PDSIM)
66
67 #include "pdsim.h"
68
69 #else /* defined(PDSIM) */
70
71 #include <sys/cdefs.h>
72 __KERNEL_RCSID(0, "$NetBSD: uvm_pdpolicy_clock.c,v 1.19 2019/12/16 19:18:26 ad Exp $");
73
74 #include <sys/param.h>
75 #include <sys/proc.h>
76 #include <sys/systm.h>
77 #include <sys/kernel.h>
78
79 #include <uvm/uvm.h>
80 #include <uvm/uvm_pdpolicy.h>
81 #include <uvm/uvm_pdpolicy_impl.h>
82 #include <uvm/uvm_stat.h>
83
84 #endif /* defined(PDSIM) */
85
86 #define PQ_TIME 0xfffffffc /* time of last activation */
87 #define PQ_INACTIVE 0x00000001 /* page is in inactive list */
88 #define PQ_ACTIVE 0x00000002 /* page is in active list */
89
90 #if !defined(CLOCK_INACTIVEPCT)
91 #define CLOCK_INACTIVEPCT 33
92 #endif /* !defined(CLOCK_INACTIVEPCT) */
93
94 struct uvmpdpol_globalstate {
95 kmutex_t lock; /* lock on state */
96 /* <= compiler pads here */
97 struct pglist s_activeq /* allocated pages, in use */
98 __aligned(COHERENCY_UNIT);
99 struct pglist s_inactiveq; /* pages between the clock hands */
100 int s_active;
101 int s_inactive;
102 int s_inactarg;
103 struct uvm_pctparam s_anonmin;
104 struct uvm_pctparam s_filemin;
105 struct uvm_pctparam s_execmin;
106 struct uvm_pctparam s_anonmax;
107 struct uvm_pctparam s_filemax;
108 struct uvm_pctparam s_execmax;
109 struct uvm_pctparam s_inactivepct;
110 };
111
112 struct uvmpdpol_scanstate {
113 bool ss_first;
114 bool ss_anonreact, ss_filereact, ss_execreact;
115 struct vm_page *ss_nextpg;
116 };
117
118 static void uvmpdpol_pageactivate_locked(struct vm_page *);
119 static void uvmpdpol_pagedeactivate_locked(struct vm_page *);
120 static void uvmpdpol_pagedequeue_locked(struct vm_page *);
121
122 static struct uvmpdpol_globalstate pdpol_state __cacheline_aligned;
123 static struct uvmpdpol_scanstate pdpol_scanstate;
124
125 PDPOL_EVCNT_DEFINE(reactexec)
126 PDPOL_EVCNT_DEFINE(reactfile)
127 PDPOL_EVCNT_DEFINE(reactanon)
128
129 static void
130 clock_tune(void)
131 {
132 struct uvmpdpol_globalstate *s = &pdpol_state;
133
134 s->s_inactarg = UVM_PCTPARAM_APPLY(&s->s_inactivepct,
135 s->s_active + s->s_inactive);
136 if (s->s_inactarg <= uvmexp.freetarg) {
137 s->s_inactarg = uvmexp.freetarg + 1;
138 }
139 }
140
141 void
142 uvmpdpol_scaninit(void)
143 {
144 struct uvmpdpol_globalstate *s = &pdpol_state;
145 struct uvmpdpol_scanstate *ss = &pdpol_scanstate;
146 int t;
147 bool anonunder, fileunder, execunder;
148 bool anonover, fileover, execover;
149 bool anonreact, filereact, execreact;
150
151 /*
152 * decide which types of pages we want to reactivate instead of freeing
153 * to keep usage within the minimum and maximum usage limits.
154 */
155
156 mutex_enter(&s->lock);
157 t = s->s_active + s->s_inactive + uvmexp.free;
158 anonunder = uvmexp.anonpages <= UVM_PCTPARAM_APPLY(&s->s_anonmin, t);
159 fileunder = uvmexp.filepages <= UVM_PCTPARAM_APPLY(&s->s_filemin, t);
160 execunder = uvmexp.execpages <= UVM_PCTPARAM_APPLY(&s->s_execmin, t);
161 anonover = uvmexp.anonpages > UVM_PCTPARAM_APPLY(&s->s_anonmax, t);
162 fileover = uvmexp.filepages > UVM_PCTPARAM_APPLY(&s->s_filemax, t);
163 execover = uvmexp.execpages > UVM_PCTPARAM_APPLY(&s->s_execmax, t);
164 anonreact = anonunder || (!anonover && (fileover || execover));
165 filereact = fileunder || (!fileover && (anonover || execover));
166 execreact = execunder || (!execover && (anonover || fileover));
167 if (filereact && execreact && (anonreact || uvm_swapisfull())) {
168 anonreact = filereact = execreact = false;
169 }
170 ss->ss_anonreact = anonreact;
171 ss->ss_filereact = filereact;
172 ss->ss_execreact = execreact;
173
174 ss->ss_first = true;
175 mutex_exit(&s->lock);
176 }
177
178 struct vm_page *
179 uvmpdpol_selectvictim(kmutex_t **plock)
180 {
181 struct uvmpdpol_globalstate *s = &pdpol_state;
182 struct uvmpdpol_scanstate *ss = &pdpol_scanstate;
183 struct vm_page *pg;
184 kmutex_t *lock;
185
186 mutex_enter(&s->lock);
187 while (/* CONSTCOND */ 1) {
188 struct vm_anon *anon;
189 struct uvm_object *uobj;
190
191 if (ss->ss_first) {
192 pg = TAILQ_FIRST(&pdpol_state.s_inactiveq);
193 ss->ss_first = false;
194 } else {
195 pg = ss->ss_nextpg;
196 if (pg != NULL && (pg->pqflags & PQ_INACTIVE) == 0) {
197 pg = TAILQ_FIRST(&pdpol_state.s_inactiveq);
198 }
199 }
200 if (pg == NULL) {
201 break;
202 }
203 ss->ss_nextpg = TAILQ_NEXT(pg, pageq.queue);
204 KASSERT(pg->wire_count == 0);
205
206 uvmexp.pdscans++;
207
208 /*
209 * acquire interlock to stablize page identity.
210 * if we have caught the page in a state of flux
211 * and it should be dequeued, do it now and then
212 * move on to the next.
213 */
214 mutex_enter(&pg->interlock);
215 if ((pg->uobject == NULL && pg->uanon == NULL) ||
216 pg->wire_count > 0) {
217 mutex_exit(&pg->interlock);
218 uvmpdpol_pagedequeue_locked(pg);
219 continue;
220 }
221
222 /*
223 * enforce the minimum thresholds on different
224 * types of memory usage. if reusing the current
225 * page would reduce that type of usage below its
226 * minimum, reactivate the page instead and move
227 * on to the next page.
228 */
229 anon = pg->uanon;
230 uobj = pg->uobject;
231 if (uobj && UVM_OBJ_IS_VTEXT(uobj) && ss->ss_execreact) {
232 mutex_exit(&pg->interlock);
233 uvmpdpol_pageactivate_locked(pg);
234 PDPOL_EVCNT_INCR(reactexec);
235 continue;
236 }
237 if (uobj && UVM_OBJ_IS_VNODE(uobj) &&
238 !UVM_OBJ_IS_VTEXT(uobj) && ss->ss_filereact) {
239 mutex_exit(&pg->interlock);
240 uvmpdpol_pageactivate_locked(pg);
241 PDPOL_EVCNT_INCR(reactfile);
242 continue;
243 }
244 if ((anon || UVM_OBJ_IS_AOBJ(uobj)) && ss->ss_anonreact) {
245 mutex_exit(&pg->interlock);
246 uvmpdpol_pageactivate_locked(pg);
247 PDPOL_EVCNT_INCR(reactanon);
248 continue;
249 }
250
251 /*
252 * try to lock the object that owns the page.
253 *
254 * with the page interlock held, we can drop s->lock, which
255 * could otherwise serve as a barrier to us getting the
256 * object locked, because the owner of the object's lock may
257 * be blocked on s->lock (i.e. a deadlock).
258 *
259 * whatever happens, uvmpd_trylockowner() will release the
260 * interlock. with the interlock dropped we can then
261 * re-acquire our own lock. the order is:
262 *
263 * object -> pdpol -> interlock.
264 */
265 mutex_exit(&s->lock);
266 lock = uvmpd_trylockowner(pg);
267 /* pg->interlock now released */
268 mutex_enter(&s->lock);
269 if (lock == NULL) {
270 /* didn't get it - try the next page. */
271 continue;
272 }
273
274 /*
275 * move referenced pages back to active queue and skip to
276 * next page.
277 */
278 if (pmap_is_referenced(pg)) {
279 uvmpdpol_pageactivate_locked(pg);
280 uvmexp.pdreact++;
281 mutex_exit(lock);
282 continue;
283 }
284
285 /* we have a potential victim. */
286 *plock = lock;
287 break;
288 }
289 mutex_exit(&s->lock);
290 return pg;
291 }
292
293 void
294 uvmpdpol_balancequeue(int swap_shortage)
295 {
296 struct uvmpdpol_globalstate *s = &pdpol_state;
297 int inactive_shortage;
298 struct vm_page *p, *nextpg;
299 kmutex_t *lock;
300
301 /*
302 * we have done the scan to get free pages. now we work on meeting
303 * our inactive target.
304 */
305
306 mutex_enter(&s->lock);
307 inactive_shortage = pdpol_state.s_inactarg - pdpol_state.s_inactive;
308 for (p = TAILQ_FIRST(&pdpol_state.s_activeq);
309 p != NULL && (inactive_shortage > 0 || swap_shortage > 0);
310 p = nextpg) {
311 nextpg = TAILQ_NEXT(p, pageq.queue);
312
313 /*
314 * if there's a shortage of swap slots, try to free it.
315 */
316
317 if (swap_shortage > 0 && (p->flags & PG_SWAPBACKED) != 0) {
318 mutex_enter(&p->interlock);
319 mutex_exit(&s->lock);
320 if (uvmpd_trydropswap(p)) {
321 swap_shortage--;
322 }
323 /* p->interlock now released */
324 mutex_enter(&s->lock);
325 }
326
327 /*
328 * if there's a shortage of inactive pages, deactivate.
329 */
330
331 if (inactive_shortage <= 0) {
332 continue;
333 }
334
335 /*
336 * acquire interlock to stablize page identity.
337 * if we have caught the page in a state of flux
338 * and it should be dequeued, do it now and then
339 * move on to the next.
340 */
341 mutex_enter(&p->interlock);
342 if ((p->uobject == NULL && p->uanon == NULL) ||
343 p->wire_count > 0) {
344 mutex_exit(&p->interlock);
345 uvmpdpol_pagedequeue_locked(p);
346 continue;
347 }
348 mutex_exit(&s->lock);
349 lock = uvmpd_trylockowner(p);
350 /* p->interlock now released */
351 mutex_enter(&s->lock);
352 if (lock != NULL) {
353 uvmpdpol_pagedeactivate_locked(p);
354 uvmexp.pddeact++;
355 inactive_shortage--;
356 mutex_exit(lock);
357 }
358 }
359 mutex_exit(&s->lock);
360 }
361
362 static void
363 uvmpdpol_pagedeactivate_locked(struct vm_page *pg)
364 {
365
366 KASSERT(uvm_page_locked_p(pg));
367
368 if (pg->pqflags & PQ_ACTIVE) {
369 TAILQ_REMOVE(&pdpol_state.s_activeq, pg, pageq.queue);
370 pg->pqflags &= ~(PQ_ACTIVE | PQ_TIME);
371 KASSERT(pdpol_state.s_active > 0);
372 pdpol_state.s_active--;
373 }
374 if ((pg->pqflags & PQ_INACTIVE) == 0) {
375 KASSERT(pg->wire_count == 0);
376 pmap_clear_reference(pg);
377 TAILQ_INSERT_TAIL(&pdpol_state.s_inactiveq, pg, pageq.queue);
378 pg->pqflags |= PQ_INACTIVE;
379 pdpol_state.s_inactive++;
380 }
381 }
382
383 void
384 uvmpdpol_pagedeactivate(struct vm_page *pg)
385 {
386 struct uvmpdpol_globalstate *s = &pdpol_state;
387
388 mutex_enter(&s->lock);
389 uvmpdpol_pagedeactivate_locked(pg);
390 mutex_exit(&s->lock);
391 }
392
393 static void
394 uvmpdpol_pageactivate_locked(struct vm_page *pg)
395 {
396
397 uvmpdpol_pagedequeue_locked(pg);
398 TAILQ_INSERT_TAIL(&pdpol_state.s_activeq, pg, pageq.queue);
399 pg->pqflags = PQ_ACTIVE | (hardclock_ticks & PQ_TIME);
400 pdpol_state.s_active++;
401 }
402
403 void
404 uvmpdpol_pageactivate(struct vm_page *pg)
405 {
406 struct uvmpdpol_globalstate *s = &pdpol_state;
407
408 /* Safety: PQ_ACTIVE clear also tells us if it is not enqueued. */
409 if ((pg->pqflags & PQ_ACTIVE) == 0 ||
410 ((hardclock_ticks & PQ_TIME) - (pg->pqflags & PQ_TIME)) >= hz) {
411 mutex_enter(&s->lock);
412 uvmpdpol_pageactivate_locked(pg);
413 mutex_exit(&s->lock);
414 }
415 }
416
417 static void
418 uvmpdpol_pagedequeue_locked(struct vm_page *pg)
419 {
420
421 if (pg->pqflags & PQ_ACTIVE) {
422 TAILQ_REMOVE(&pdpol_state.s_activeq, pg, pageq.queue);
423 pg->pqflags &= ~(PQ_ACTIVE | PQ_TIME);
424 KASSERT(pdpol_state.s_active > 0);
425 pdpol_state.s_active--;
426 } else if (pg->pqflags & PQ_INACTIVE) {
427 TAILQ_REMOVE(&pdpol_state.s_inactiveq, pg, pageq.queue);
428 pg->pqflags &= ~PQ_INACTIVE;
429 KASSERT(pdpol_state.s_inactive > 0);
430 pdpol_state.s_inactive--;
431 }
432 }
433
434 void
435 uvmpdpol_pagedequeue(struct vm_page *pg)
436 {
437 struct uvmpdpol_globalstate *s = &pdpol_state;
438
439 mutex_enter(&s->lock);
440 uvmpdpol_pagedequeue_locked(pg);
441 mutex_exit(&s->lock);
442 }
443
444 void
445 uvmpdpol_pageenqueue(struct vm_page *pg)
446 {
447 struct uvmpdpol_globalstate *s = &pdpol_state;
448
449 mutex_enter(&s->lock);
450 uvmpdpol_pageactivate_locked(pg);
451 mutex_exit(&s->lock);
452 }
453
454 void
455 uvmpdpol_anfree(struct vm_anon *an)
456 {
457 }
458
459 bool
460 uvmpdpol_pageisqueued_p(struct vm_page *pg)
461 {
462
463 /* Safe to test unlocked due to page life-cycle. */
464 return (pg->pqflags & (PQ_ACTIVE | PQ_INACTIVE)) != 0;
465 }
466
467 void
468 uvmpdpol_estimatepageable(int *active, int *inactive)
469 {
470 struct uvmpdpol_globalstate *s = &pdpol_state;
471
472 mutex_enter(&s->lock);
473 if (active) {
474 *active = pdpol_state.s_active;
475 }
476 if (inactive) {
477 *inactive = pdpol_state.s_inactive;
478 }
479 mutex_exit(&s->lock);
480 }
481
482 #if !defined(PDSIM)
483 static int
484 min_check(struct uvm_pctparam *pct, int t)
485 {
486 struct uvmpdpol_globalstate *s = &pdpol_state;
487 int total = t;
488
489 if (pct != &s->s_anonmin) {
490 total += uvm_pctparam_get(&s->s_anonmin);
491 }
492 if (pct != &s->s_filemin) {
493 total += uvm_pctparam_get(&s->s_filemin);
494 }
495 if (pct != &s->s_execmin) {
496 total += uvm_pctparam_get(&s->s_execmin);
497 }
498 if (total > 95) {
499 return EINVAL;
500 }
501 return 0;
502 }
503 #endif /* !defined(PDSIM) */
504
505 void
506 uvmpdpol_init(void)
507 {
508 struct uvmpdpol_globalstate *s = &pdpol_state;
509
510 mutex_init(&s->lock, MUTEX_DEFAULT, IPL_NONE);
511 TAILQ_INIT(&s->s_activeq);
512 TAILQ_INIT(&s->s_inactiveq);
513 uvm_pctparam_init(&s->s_inactivepct, CLOCK_INACTIVEPCT, NULL);
514 uvm_pctparam_init(&s->s_anonmin, 10, min_check);
515 uvm_pctparam_init(&s->s_filemin, 10, min_check);
516 uvm_pctparam_init(&s->s_execmin, 5, min_check);
517 uvm_pctparam_init(&s->s_anonmax, 80, NULL);
518 uvm_pctparam_init(&s->s_filemax, 50, NULL);
519 uvm_pctparam_init(&s->s_execmax, 30, NULL);
520 }
521
522 void
523 uvmpdpol_reinit(void)
524 {
525 }
526
527 bool
528 uvmpdpol_needsscan_p(void)
529 {
530
531 /* This must be an unlocked check: can be called from interrupt. */
532 return pdpol_state.s_inactive < pdpol_state.s_inactarg;
533 }
534
535 void
536 uvmpdpol_tune(void)
537 {
538 struct uvmpdpol_globalstate *s = &pdpol_state;
539
540 mutex_enter(&s->lock);
541 clock_tune();
542 mutex_exit(&s->lock);
543 }
544
545 #if !defined(PDSIM)
546
547 #include <sys/sysctl.h> /* XXX SYSCTL_DESCR */
548
549 void
550 uvmpdpol_sysctlsetup(void)
551 {
552 struct uvmpdpol_globalstate *s = &pdpol_state;
553
554 uvm_pctparam_createsysctlnode(&s->s_anonmin, "anonmin",
555 SYSCTL_DESCR("Percentage of physical memory reserved "
556 "for anonymous application data"));
557 uvm_pctparam_createsysctlnode(&s->s_filemin, "filemin",
558 SYSCTL_DESCR("Percentage of physical memory reserved "
559 "for cached file data"));
560 uvm_pctparam_createsysctlnode(&s->s_execmin, "execmin",
561 SYSCTL_DESCR("Percentage of physical memory reserved "
562 "for cached executable data"));
563
564 uvm_pctparam_createsysctlnode(&s->s_anonmax, "anonmax",
565 SYSCTL_DESCR("Percentage of physical memory which will "
566 "be reclaimed from other usage for "
567 "anonymous application data"));
568 uvm_pctparam_createsysctlnode(&s->s_filemax, "filemax",
569 SYSCTL_DESCR("Percentage of physical memory which will "
570 "be reclaimed from other usage for cached "
571 "file data"));
572 uvm_pctparam_createsysctlnode(&s->s_execmax, "execmax",
573 SYSCTL_DESCR("Percentage of physical memory which will "
574 "be reclaimed from other usage for cached "
575 "executable data"));
576
577 uvm_pctparam_createsysctlnode(&s->s_inactivepct, "inactivepct",
578 SYSCTL_DESCR("Percentage of inactive queue of "
579 "the entire (active + inactive) queue"));
580 }
581
582 #endif /* !defined(PDSIM) */
583
584 #if defined(PDSIM)
585 void
586 pdsim_dump(const char *id)
587 {
588 #if defined(DEBUG)
589 /* XXX */
590 #endif /* defined(DEBUG) */
591 }
592 #endif /* defined(PDSIM) */
593