uvm_pdpolicy_clock.c revision 1.29 1 1.29 mlelstv /* $NetBSD: uvm_pdpolicy_clock.c,v 1.29 2020/01/01 01:18:34 mlelstv Exp $ */
2 1.2 yamt /* NetBSD: uvm_pdaemon.c,v 1.72 2006/01/05 10:47:33 yamt Exp $ */
3 1.2 yamt
4 1.28 ad /*-
5 1.28 ad * Copyright (c) 2019 The NetBSD Foundation, Inc.
6 1.28 ad * All rights reserved.
7 1.28 ad *
8 1.28 ad * This code is derived from software contributed to The NetBSD Foundation
9 1.28 ad * by Andrew Doran.
10 1.28 ad *
11 1.28 ad * Redistribution and use in source and binary forms, with or without
12 1.28 ad * modification, are permitted provided that the following conditions
13 1.28 ad * are met:
14 1.28 ad * 1. Redistributions of source code must retain the above copyright
15 1.28 ad * notice, this list of conditions and the following disclaimer.
16 1.28 ad * 2. Redistributions in binary form must reproduce the above copyright
17 1.28 ad * notice, this list of conditions and the following disclaimer in the
18 1.28 ad * documentation and/or other materials provided with the distribution.
19 1.28 ad *
20 1.28 ad * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21 1.28 ad * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22 1.28 ad * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23 1.28 ad * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24 1.28 ad * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 1.28 ad * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 1.28 ad * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 1.28 ad * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 1.28 ad * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 1.28 ad * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 1.28 ad * POSSIBILITY OF SUCH DAMAGE.
31 1.28 ad */
32 1.28 ad
33 1.2 yamt /*
34 1.2 yamt * Copyright (c) 1997 Charles D. Cranor and Washington University.
35 1.2 yamt * Copyright (c) 1991, 1993, The Regents of the University of California.
36 1.2 yamt *
37 1.2 yamt * All rights reserved.
38 1.2 yamt *
39 1.2 yamt * This code is derived from software contributed to Berkeley by
40 1.2 yamt * The Mach Operating System project at Carnegie-Mellon University.
41 1.2 yamt *
42 1.2 yamt * Redistribution and use in source and binary forms, with or without
43 1.2 yamt * modification, are permitted provided that the following conditions
44 1.2 yamt * are met:
45 1.2 yamt * 1. Redistributions of source code must retain the above copyright
46 1.2 yamt * notice, this list of conditions and the following disclaimer.
47 1.2 yamt * 2. Redistributions in binary form must reproduce the above copyright
48 1.2 yamt * notice, this list of conditions and the following disclaimer in the
49 1.2 yamt * documentation and/or other materials provided with the distribution.
50 1.13 chuck * 3. Neither the name of the University nor the names of its contributors
51 1.2 yamt * may be used to endorse or promote products derived from this software
52 1.2 yamt * without specific prior written permission.
53 1.2 yamt *
54 1.2 yamt * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
55 1.2 yamt * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
56 1.2 yamt * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
57 1.2 yamt * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
58 1.2 yamt * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
59 1.2 yamt * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
60 1.2 yamt * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
61 1.2 yamt * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
62 1.2 yamt * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
63 1.2 yamt * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
64 1.2 yamt * SUCH DAMAGE.
65 1.2 yamt *
66 1.2 yamt * @(#)vm_pageout.c 8.5 (Berkeley) 2/14/94
67 1.2 yamt * from: Id: uvm_pdaemon.c,v 1.1.2.32 1998/02/06 05:26:30 chs Exp
68 1.2 yamt *
69 1.2 yamt *
70 1.2 yamt * Copyright (c) 1987, 1990 Carnegie-Mellon University.
71 1.2 yamt * All rights reserved.
72 1.2 yamt *
73 1.2 yamt * Permission to use, copy, modify and distribute this software and
74 1.2 yamt * its documentation is hereby granted, provided that both the copyright
75 1.2 yamt * notice and this permission notice appear in all copies of the
76 1.2 yamt * software, derivative works or modified versions, and any portions
77 1.2 yamt * thereof, and that both notices appear in supporting documentation.
78 1.2 yamt *
79 1.2 yamt * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
80 1.2 yamt * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
81 1.2 yamt * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
82 1.2 yamt *
83 1.2 yamt * Carnegie Mellon requests users of this software to return to
84 1.2 yamt *
85 1.2 yamt * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
86 1.2 yamt * School of Computer Science
87 1.2 yamt * Carnegie Mellon University
88 1.2 yamt * Pittsburgh PA 15213-3890
89 1.2 yamt *
90 1.2 yamt * any improvements or extensions that they make and grant Carnegie the
91 1.2 yamt * rights to redistribute these changes.
92 1.2 yamt */
93 1.2 yamt
94 1.2 yamt #if defined(PDSIM)
95 1.2 yamt
96 1.2 yamt #include "pdsim.h"
97 1.2 yamt
98 1.2 yamt #else /* defined(PDSIM) */
99 1.2 yamt
100 1.2 yamt #include <sys/cdefs.h>
101 1.29 mlelstv __KERNEL_RCSID(0, "$NetBSD: uvm_pdpolicy_clock.c,v 1.29 2020/01/01 01:18:34 mlelstv Exp $");
102 1.2 yamt
103 1.2 yamt #include <sys/param.h>
104 1.2 yamt #include <sys/proc.h>
105 1.2 yamt #include <sys/systm.h>
106 1.2 yamt #include <sys/kernel.h>
107 1.28 ad #include <sys/kmem.h>
108 1.29 mlelstv #include <sys/atomic.h>
109 1.2 yamt
110 1.2 yamt #include <uvm/uvm.h>
111 1.2 yamt #include <uvm/uvm_pdpolicy.h>
112 1.2 yamt #include <uvm/uvm_pdpolicy_impl.h>
113 1.18 ad #include <uvm/uvm_stat.h>
114 1.2 yamt
115 1.2 yamt #endif /* defined(PDSIM) */
116 1.2 yamt
117 1.28 ad /*
118 1.28 ad * per-CPU queue of pending page status changes. 128 entries makes for a
119 1.28 ad * 1kB queue on _LP64 and has been found to be a reasonable compromise that
120 1.28 ad * keeps lock contention events and wait times low, while not using too much
121 1.28 ad * memory nor allowing global state to fall too far behind.
122 1.28 ad */
123 1.28 ad #if !defined(CLOCK_PDQ_SIZE)
124 1.28 ad #define CLOCK_PDQ_SIZE 128
125 1.28 ad #endif /* !defined(CLOCK_PDQ_SIZE) */
126 1.28 ad
127 1.28 ad #define PQ_TIME 0xffffffc0 /* time of last activation */
128 1.28 ad #define PQ_INACTIVE 0x00000010 /* page is in inactive list */
129 1.28 ad #define PQ_ACTIVE 0x00000020 /* page is in active list */
130 1.2 yamt
131 1.2 yamt #if !defined(CLOCK_INACTIVEPCT)
132 1.2 yamt #define CLOCK_INACTIVEPCT 33
133 1.2 yamt #endif /* !defined(CLOCK_INACTIVEPCT) */
134 1.2 yamt
135 1.2 yamt struct uvmpdpol_globalstate {
136 1.18 ad kmutex_t lock; /* lock on state */
137 1.18 ad /* <= compiler pads here */
138 1.18 ad struct pglist s_activeq /* allocated pages, in use */
139 1.18 ad __aligned(COHERENCY_UNIT);
140 1.2 yamt struct pglist s_inactiveq; /* pages between the clock hands */
141 1.2 yamt int s_active;
142 1.2 yamt int s_inactive;
143 1.2 yamt int s_inactarg;
144 1.2 yamt struct uvm_pctparam s_anonmin;
145 1.2 yamt struct uvm_pctparam s_filemin;
146 1.2 yamt struct uvm_pctparam s_execmin;
147 1.2 yamt struct uvm_pctparam s_anonmax;
148 1.2 yamt struct uvm_pctparam s_filemax;
149 1.2 yamt struct uvm_pctparam s_execmax;
150 1.2 yamt struct uvm_pctparam s_inactivepct;
151 1.2 yamt };
152 1.2 yamt
153 1.2 yamt struct uvmpdpol_scanstate {
154 1.7 thorpej bool ss_anonreact, ss_filereact, ss_execreact;
155 1.24 ad struct vm_page ss_marker;
156 1.2 yamt };
157 1.2 yamt
158 1.18 ad static void uvmpdpol_pageactivate_locked(struct vm_page *);
159 1.18 ad static void uvmpdpol_pagedeactivate_locked(struct vm_page *);
160 1.18 ad static void uvmpdpol_pagedequeue_locked(struct vm_page *);
161 1.28 ad static bool uvmpdpol_pagerealize_locked(struct vm_page *);
162 1.28 ad static struct uvm_cpu *uvmpdpol_flush(void);
163 1.18 ad
164 1.18 ad static struct uvmpdpol_globalstate pdpol_state __cacheline_aligned;
165 1.2 yamt static struct uvmpdpol_scanstate pdpol_scanstate;
166 1.2 yamt
167 1.2 yamt PDPOL_EVCNT_DEFINE(reactexec)
168 1.2 yamt PDPOL_EVCNT_DEFINE(reactfile)
169 1.2 yamt PDPOL_EVCNT_DEFINE(reactanon)
170 1.2 yamt
171 1.2 yamt static void
172 1.2 yamt clock_tune(void)
173 1.2 yamt {
174 1.2 yamt struct uvmpdpol_globalstate *s = &pdpol_state;
175 1.2 yamt
176 1.2 yamt s->s_inactarg = UVM_PCTPARAM_APPLY(&s->s_inactivepct,
177 1.2 yamt s->s_active + s->s_inactive);
178 1.2 yamt if (s->s_inactarg <= uvmexp.freetarg) {
179 1.2 yamt s->s_inactarg = uvmexp.freetarg + 1;
180 1.2 yamt }
181 1.2 yamt }
182 1.2 yamt
183 1.2 yamt void
184 1.2 yamt uvmpdpol_scaninit(void)
185 1.2 yamt {
186 1.2 yamt struct uvmpdpol_globalstate *s = &pdpol_state;
187 1.2 yamt struct uvmpdpol_scanstate *ss = &pdpol_scanstate;
188 1.2 yamt int t;
189 1.7 thorpej bool anonunder, fileunder, execunder;
190 1.7 thorpej bool anonover, fileover, execover;
191 1.7 thorpej bool anonreact, filereact, execreact;
192 1.20 ad int64_t freepg, anonpg, filepg, execpg;
193 1.2 yamt
194 1.2 yamt /*
195 1.2 yamt * decide which types of pages we want to reactivate instead of freeing
196 1.2 yamt * to keep usage within the minimum and maximum usage limits.
197 1.2 yamt */
198 1.2 yamt
199 1.20 ad cpu_count_sync_all();
200 1.27 ad freepg = uvm_availmem();
201 1.20 ad anonpg = cpu_count_get(CPU_COUNT_ANONPAGES);
202 1.20 ad filepg = cpu_count_get(CPU_COUNT_FILEPAGES);
203 1.20 ad execpg = cpu_count_get(CPU_COUNT_EXECPAGES);
204 1.20 ad
205 1.18 ad mutex_enter(&s->lock);
206 1.20 ad t = s->s_active + s->s_inactive + freepg;
207 1.20 ad anonunder = anonpg <= UVM_PCTPARAM_APPLY(&s->s_anonmin, t);
208 1.20 ad fileunder = filepg <= UVM_PCTPARAM_APPLY(&s->s_filemin, t);
209 1.20 ad execunder = execpg <= UVM_PCTPARAM_APPLY(&s->s_execmin, t);
210 1.20 ad anonover = anonpg > UVM_PCTPARAM_APPLY(&s->s_anonmax, t);
211 1.20 ad fileover = filepg > UVM_PCTPARAM_APPLY(&s->s_filemax, t);
212 1.20 ad execover = execpg > UVM_PCTPARAM_APPLY(&s->s_execmax, t);
213 1.2 yamt anonreact = anonunder || (!anonover && (fileover || execover));
214 1.2 yamt filereact = fileunder || (!fileover && (anonover || execover));
215 1.2 yamt execreact = execunder || (!execover && (anonover || fileover));
216 1.2 yamt if (filereact && execreact && (anonreact || uvm_swapisfull())) {
217 1.8 thorpej anonreact = filereact = execreact = false;
218 1.2 yamt }
219 1.2 yamt ss->ss_anonreact = anonreact;
220 1.2 yamt ss->ss_filereact = filereact;
221 1.2 yamt ss->ss_execreact = execreact;
222 1.24 ad memset(&ss->ss_marker, 0, sizeof(ss->ss_marker));
223 1.24 ad ss->ss_marker.flags = PG_MARKER;
224 1.24 ad TAILQ_INSERT_HEAD(&pdpol_state.s_inactiveq, &ss->ss_marker, pdqueue);
225 1.24 ad mutex_exit(&s->lock);
226 1.24 ad }
227 1.24 ad
228 1.24 ad void
229 1.24 ad uvmpdpol_scanfini(void)
230 1.24 ad {
231 1.24 ad struct uvmpdpol_globalstate *s = &pdpol_state;
232 1.24 ad struct uvmpdpol_scanstate *ss = &pdpol_scanstate;
233 1.2 yamt
234 1.24 ad mutex_enter(&s->lock);
235 1.24 ad TAILQ_REMOVE(&pdpol_state.s_inactiveq, &ss->ss_marker, pdqueue);
236 1.18 ad mutex_exit(&s->lock);
237 1.2 yamt }
238 1.2 yamt
239 1.2 yamt struct vm_page *
240 1.18 ad uvmpdpol_selectvictim(kmutex_t **plock)
241 1.2 yamt {
242 1.18 ad struct uvmpdpol_globalstate *s = &pdpol_state;
243 1.2 yamt struct uvmpdpol_scanstate *ss = &pdpol_scanstate;
244 1.2 yamt struct vm_page *pg;
245 1.14 rmind kmutex_t *lock;
246 1.2 yamt
247 1.18 ad mutex_enter(&s->lock);
248 1.2 yamt while (/* CONSTCOND */ 1) {
249 1.2 yamt struct vm_anon *anon;
250 1.2 yamt struct uvm_object *uobj;
251 1.2 yamt
252 1.24 ad pg = TAILQ_NEXT(&ss->ss_marker, pdqueue);
253 1.2 yamt if (pg == NULL) {
254 1.2 yamt break;
255 1.2 yamt }
256 1.24 ad KASSERT((pg->flags & PG_MARKER) == 0);
257 1.2 yamt uvmexp.pdscans++;
258 1.2 yamt
259 1.2 yamt /*
260 1.18 ad * acquire interlock to stablize page identity.
261 1.18 ad * if we have caught the page in a state of flux
262 1.28 ad * deal with it and retry.
263 1.2 yamt */
264 1.18 ad mutex_enter(&pg->interlock);
265 1.28 ad if (uvmpdpol_pagerealize_locked(pg)) {
266 1.28 ad mutex_exit(&pg->interlock);
267 1.28 ad continue;
268 1.2 yamt }
269 1.2 yamt
270 1.2 yamt /*
271 1.24 ad * now prepare to move on to the next page.
272 1.24 ad */
273 1.24 ad TAILQ_REMOVE(&pdpol_state.s_inactiveq, &ss->ss_marker,
274 1.24 ad pdqueue);
275 1.24 ad TAILQ_INSERT_AFTER(&pdpol_state.s_inactiveq, pg,
276 1.24 ad &ss->ss_marker, pdqueue);
277 1.24 ad
278 1.24 ad /*
279 1.2 yamt * enforce the minimum thresholds on different
280 1.2 yamt * types of memory usage. if reusing the current
281 1.2 yamt * page would reduce that type of usage below its
282 1.2 yamt * minimum, reactivate the page instead and move
283 1.2 yamt * on to the next page.
284 1.2 yamt */
285 1.18 ad anon = pg->uanon;
286 1.18 ad uobj = pg->uobject;
287 1.2 yamt if (uobj && UVM_OBJ_IS_VTEXT(uobj) && ss->ss_execreact) {
288 1.28 ad uvmpdpol_pageactivate_locked(pg);
289 1.18 ad mutex_exit(&pg->interlock);
290 1.2 yamt PDPOL_EVCNT_INCR(reactexec);
291 1.2 yamt continue;
292 1.2 yamt }
293 1.2 yamt if (uobj && UVM_OBJ_IS_VNODE(uobj) &&
294 1.2 yamt !UVM_OBJ_IS_VTEXT(uobj) && ss->ss_filereact) {
295 1.28 ad uvmpdpol_pageactivate_locked(pg);
296 1.18 ad mutex_exit(&pg->interlock);
297 1.2 yamt PDPOL_EVCNT_INCR(reactfile);
298 1.2 yamt continue;
299 1.2 yamt }
300 1.2 yamt if ((anon || UVM_OBJ_IS_AOBJ(uobj)) && ss->ss_anonreact) {
301 1.28 ad uvmpdpol_pageactivate_locked(pg);
302 1.18 ad mutex_exit(&pg->interlock);
303 1.2 yamt PDPOL_EVCNT_INCR(reactanon);
304 1.2 yamt continue;
305 1.2 yamt }
306 1.2 yamt
307 1.18 ad /*
308 1.18 ad * try to lock the object that owns the page.
309 1.18 ad *
310 1.18 ad * with the page interlock held, we can drop s->lock, which
311 1.18 ad * could otherwise serve as a barrier to us getting the
312 1.18 ad * object locked, because the owner of the object's lock may
313 1.18 ad * be blocked on s->lock (i.e. a deadlock).
314 1.18 ad *
315 1.18 ad * whatever happens, uvmpd_trylockowner() will release the
316 1.18 ad * interlock. with the interlock dropped we can then
317 1.18 ad * re-acquire our own lock. the order is:
318 1.18 ad *
319 1.18 ad * object -> pdpol -> interlock.
320 1.18 ad */
321 1.18 ad mutex_exit(&s->lock);
322 1.18 ad lock = uvmpd_trylockowner(pg);
323 1.18 ad /* pg->interlock now released */
324 1.18 ad mutex_enter(&s->lock);
325 1.18 ad if (lock == NULL) {
326 1.18 ad /* didn't get it - try the next page. */
327 1.18 ad continue;
328 1.18 ad }
329 1.18 ad
330 1.18 ad /*
331 1.18 ad * move referenced pages back to active queue and skip to
332 1.18 ad * next page.
333 1.18 ad */
334 1.18 ad if (pmap_is_referenced(pg)) {
335 1.28 ad mutex_enter(&pg->interlock);
336 1.18 ad uvmpdpol_pageactivate_locked(pg);
337 1.28 ad mutex_exit(&pg->interlock);
338 1.18 ad uvmexp.pdreact++;
339 1.18 ad mutex_exit(lock);
340 1.18 ad continue;
341 1.18 ad }
342 1.18 ad
343 1.18 ad /* we have a potential victim. */
344 1.18 ad *plock = lock;
345 1.2 yamt break;
346 1.2 yamt }
347 1.18 ad mutex_exit(&s->lock);
348 1.2 yamt return pg;
349 1.2 yamt }
350 1.2 yamt
351 1.2 yamt void
352 1.2 yamt uvmpdpol_balancequeue(int swap_shortage)
353 1.2 yamt {
354 1.18 ad struct uvmpdpol_globalstate *s = &pdpol_state;
355 1.2 yamt int inactive_shortage;
356 1.24 ad struct vm_page *p, marker;
357 1.14 rmind kmutex_t *lock;
358 1.2 yamt
359 1.2 yamt /*
360 1.2 yamt * we have done the scan to get free pages. now we work on meeting
361 1.2 yamt * our inactive target.
362 1.2 yamt */
363 1.2 yamt
364 1.24 ad memset(&marker, 0, sizeof(marker));
365 1.24 ad marker.flags = PG_MARKER;
366 1.24 ad
367 1.18 ad mutex_enter(&s->lock);
368 1.24 ad TAILQ_INSERT_HEAD(&pdpol_state.s_activeq, &marker, pdqueue);
369 1.24 ad for (;;) {
370 1.24 ad inactive_shortage =
371 1.24 ad pdpol_state.s_inactarg - pdpol_state.s_inactive;
372 1.24 ad if (inactive_shortage <= 0 && swap_shortage <= 0) {
373 1.24 ad break;
374 1.2 yamt }
375 1.24 ad p = TAILQ_NEXT(&marker, pdqueue);
376 1.24 ad if (p == NULL) {
377 1.24 ad break;
378 1.14 rmind }
379 1.24 ad KASSERT((p->flags & PG_MARKER) == 0);
380 1.14 rmind
381 1.18 ad /*
382 1.18 ad * acquire interlock to stablize page identity.
383 1.18 ad * if we have caught the page in a state of flux
384 1.28 ad * deal with it and retry.
385 1.18 ad */
386 1.18 ad mutex_enter(&p->interlock);
387 1.28 ad if (uvmpdpol_pagerealize_locked(p)) {
388 1.28 ad mutex_exit(&p->interlock);
389 1.28 ad continue;
390 1.18 ad }
391 1.24 ad
392 1.24 ad /*
393 1.24 ad * now prepare to move on to the next page.
394 1.24 ad */
395 1.24 ad TAILQ_REMOVE(&pdpol_state.s_activeq, &marker, pdqueue);
396 1.24 ad TAILQ_INSERT_AFTER(&pdpol_state.s_activeq, p, &marker,
397 1.24 ad pdqueue);
398 1.24 ad
399 1.24 ad /*
400 1.24 ad * try to lock the object that owns the page. see comments
401 1.24 ad * in uvmpdol_selectvictim().
402 1.24 ad */
403 1.24 ad mutex_exit(&s->lock);
404 1.24 ad lock = uvmpd_trylockowner(p);
405 1.24 ad /* p->interlock now released */
406 1.24 ad mutex_enter(&s->lock);
407 1.24 ad if (lock == NULL) {
408 1.24 ad /* didn't get it - try the next page. */
409 1.24 ad continue;
410 1.24 ad }
411 1.24 ad
412 1.24 ad /*
413 1.24 ad * if there's a shortage of swap slots, try to free it.
414 1.24 ad */
415 1.24 ad if (swap_shortage > 0 && (p->flags & PG_SWAPBACKED) != 0 &&
416 1.24 ad (p->flags & PG_BUSY) == 0) {
417 1.24 ad if (uvmpd_dropswap(p)) {
418 1.24 ad swap_shortage--;
419 1.24 ad }
420 1.24 ad }
421 1.24 ad
422 1.24 ad /*
423 1.24 ad * if there's a shortage of inactive pages, deactivate.
424 1.24 ad */
425 1.24 ad if (inactive_shortage > 0) {
426 1.28 ad pmap_clear_reference(p);
427 1.28 ad mutex_enter(&p->interlock);
428 1.18 ad uvmpdpol_pagedeactivate_locked(p);
429 1.28 ad mutex_exit(&p->interlock);
430 1.2 yamt uvmexp.pddeact++;
431 1.2 yamt inactive_shortage--;
432 1.2 yamt }
433 1.24 ad mutex_exit(lock);
434 1.2 yamt }
435 1.24 ad TAILQ_REMOVE(&pdpol_state.s_activeq, &marker, pdqueue);
436 1.18 ad mutex_exit(&s->lock);
437 1.2 yamt }
438 1.2 yamt
439 1.18 ad static void
440 1.18 ad uvmpdpol_pagedeactivate_locked(struct vm_page *pg)
441 1.2 yamt {
442 1.28 ad struct uvmpdpol_globalstate *s __diagused = &pdpol_state;
443 1.2 yamt
444 1.28 ad KASSERT(mutex_owned(&s->lock));
445 1.28 ad KASSERT(mutex_owned(&pg->interlock));
446 1.28 ad KASSERT((pg->pqflags & (PQ_INTENT_MASK | PQ_INTENT_SET)) !=
447 1.28 ad (PQ_INTENT_D | PQ_INTENT_SET));
448 1.14 rmind
449 1.2 yamt if (pg->pqflags & PQ_ACTIVE) {
450 1.23 ad TAILQ_REMOVE(&pdpol_state.s_activeq, pg, pdqueue);
451 1.2 yamt KASSERT(pdpol_state.s_active > 0);
452 1.2 yamt pdpol_state.s_active--;
453 1.2 yamt }
454 1.2 yamt if ((pg->pqflags & PQ_INACTIVE) == 0) {
455 1.2 yamt KASSERT(pg->wire_count == 0);
456 1.23 ad TAILQ_INSERT_TAIL(&pdpol_state.s_inactiveq, pg, pdqueue);
457 1.2 yamt pdpol_state.s_inactive++;
458 1.2 yamt }
459 1.28 ad pg->pqflags = (pg->pqflags & PQ_INTENT_QUEUED) | PQ_INACTIVE;
460 1.2 yamt }
461 1.2 yamt
462 1.2 yamt void
463 1.18 ad uvmpdpol_pagedeactivate(struct vm_page *pg)
464 1.18 ad {
465 1.18 ad
466 1.28 ad KASSERT(uvm_page_owner_locked_p(pg));
467 1.28 ad KASSERT(mutex_owned(&pg->interlock));
468 1.28 ad
469 1.28 ad /*
470 1.28 ad * we have to clear the reference bit now, as when it comes time to
471 1.28 ad * realize the intent we won't have the object locked any more.
472 1.28 ad */
473 1.28 ad pmap_clear_reference(pg);
474 1.28 ad uvmpdpol_set_intent(pg, PQ_INTENT_I);
475 1.18 ad }
476 1.18 ad
477 1.18 ad static void
478 1.18 ad uvmpdpol_pageactivate_locked(struct vm_page *pg)
479 1.2 yamt {
480 1.28 ad struct uvmpdpol_globalstate *s __diagused = &pdpol_state;
481 1.28 ad
482 1.28 ad KASSERT(mutex_owned(&s->lock));
483 1.28 ad KASSERT(mutex_owned(&pg->interlock));
484 1.28 ad KASSERT((pg->pqflags & (PQ_INTENT_MASK | PQ_INTENT_SET)) !=
485 1.28 ad (PQ_INTENT_D | PQ_INTENT_SET));
486 1.2 yamt
487 1.18 ad uvmpdpol_pagedequeue_locked(pg);
488 1.23 ad TAILQ_INSERT_TAIL(&pdpol_state.s_activeq, pg, pdqueue);
489 1.2 yamt pdpol_state.s_active++;
490 1.28 ad pg->pqflags = (pg->pqflags & PQ_INTENT_QUEUED) | PQ_ACTIVE |
491 1.28 ad (hardclock_ticks & PQ_TIME);
492 1.2 yamt }
493 1.2 yamt
494 1.2 yamt void
495 1.18 ad uvmpdpol_pageactivate(struct vm_page *pg)
496 1.18 ad {
497 1.28 ad uint32_t pqflags;
498 1.28 ad
499 1.28 ad KASSERT(uvm_page_owner_locked_p(pg));
500 1.28 ad KASSERT(mutex_owned(&pg->interlock));
501 1.18 ad
502 1.28 ad /*
503 1.28 ad * if there is any intent set on the page, or the page is not
504 1.28 ad * active, or the page was activated in the "distant" past, then
505 1.28 ad * it needs to be activated anew.
506 1.28 ad */
507 1.28 ad pqflags = pg->pqflags;
508 1.28 ad if ((pqflags & PQ_INTENT_SET) != 0 ||
509 1.28 ad (pqflags & PQ_ACTIVE) == 0 ||
510 1.28 ad ((hardclock_ticks & PQ_TIME) - (pqflags & PQ_TIME)) > hz) {
511 1.28 ad uvmpdpol_set_intent(pg, PQ_INTENT_A);
512 1.18 ad }
513 1.18 ad }
514 1.18 ad
515 1.18 ad static void
516 1.18 ad uvmpdpol_pagedequeue_locked(struct vm_page *pg)
517 1.2 yamt {
518 1.28 ad struct uvmpdpol_globalstate *s __diagused = &pdpol_state;
519 1.28 ad
520 1.28 ad KASSERT(mutex_owned(&s->lock));
521 1.28 ad KASSERT(mutex_owned(&pg->interlock));
522 1.2 yamt
523 1.2 yamt if (pg->pqflags & PQ_ACTIVE) {
524 1.23 ad TAILQ_REMOVE(&pdpol_state.s_activeq, pg, pdqueue);
525 1.28 ad KASSERT((pg->pqflags & PQ_INACTIVE) == 0);
526 1.2 yamt KASSERT(pdpol_state.s_active > 0);
527 1.2 yamt pdpol_state.s_active--;
528 1.2 yamt } else if (pg->pqflags & PQ_INACTIVE) {
529 1.23 ad TAILQ_REMOVE(&pdpol_state.s_inactiveq, pg, pdqueue);
530 1.2 yamt KASSERT(pdpol_state.s_inactive > 0);
531 1.2 yamt pdpol_state.s_inactive--;
532 1.2 yamt }
533 1.28 ad pg->pqflags &= PQ_INTENT_QUEUED;
534 1.2 yamt }
535 1.2 yamt
536 1.2 yamt void
537 1.18 ad uvmpdpol_pagedequeue(struct vm_page *pg)
538 1.18 ad {
539 1.18 ad
540 1.28 ad KASSERT(uvm_page_owner_locked_p(pg));
541 1.28 ad KASSERT(mutex_owned(&pg->interlock));
542 1.28 ad
543 1.28 ad uvmpdpol_set_intent(pg, PQ_INTENT_D);
544 1.18 ad }
545 1.18 ad
546 1.18 ad void
547 1.2 yamt uvmpdpol_pageenqueue(struct vm_page *pg)
548 1.2 yamt {
549 1.2 yamt
550 1.28 ad KASSERT(uvm_page_owner_locked_p(pg));
551 1.28 ad KASSERT(mutex_owned(&pg->interlock));
552 1.28 ad
553 1.28 ad uvmpdpol_set_intent(pg, PQ_INTENT_E);
554 1.2 yamt }
555 1.2 yamt
556 1.2 yamt void
557 1.5 yamt uvmpdpol_anfree(struct vm_anon *an)
558 1.2 yamt {
559 1.2 yamt }
560 1.2 yamt
561 1.7 thorpej bool
562 1.2 yamt uvmpdpol_pageisqueued_p(struct vm_page *pg)
563 1.2 yamt {
564 1.28 ad uint32_t pqflags;
565 1.2 yamt
566 1.28 ad /*
567 1.28 ad * if there's an intent set, we have to consider it. otherwise,
568 1.28 ad * return the actual state. we may be called unlocked for the
569 1.28 ad * purpose of assertions, which is safe due to the page lifecycle.
570 1.28 ad */
571 1.28 ad pqflags = atomic_load_relaxed(&pg->pqflags);
572 1.28 ad if ((pqflags & PQ_INTENT_SET) != 0) {
573 1.28 ad return (pqflags & PQ_INTENT_MASK) != PQ_INTENT_D;
574 1.28 ad } else {
575 1.28 ad return (pqflags & (PQ_ACTIVE | PQ_INACTIVE)) != 0;
576 1.28 ad }
577 1.2 yamt }
578 1.2 yamt
579 1.2 yamt void
580 1.2 yamt uvmpdpol_estimatepageable(int *active, int *inactive)
581 1.2 yamt {
582 1.18 ad struct uvmpdpol_globalstate *s = &pdpol_state;
583 1.2 yamt
584 1.18 ad mutex_enter(&s->lock);
585 1.2 yamt if (active) {
586 1.2 yamt *active = pdpol_state.s_active;
587 1.2 yamt }
588 1.2 yamt if (inactive) {
589 1.2 yamt *inactive = pdpol_state.s_inactive;
590 1.2 yamt }
591 1.18 ad mutex_exit(&s->lock);
592 1.2 yamt }
593 1.2 yamt
594 1.2 yamt #if !defined(PDSIM)
595 1.2 yamt static int
596 1.2 yamt min_check(struct uvm_pctparam *pct, int t)
597 1.2 yamt {
598 1.2 yamt struct uvmpdpol_globalstate *s = &pdpol_state;
599 1.2 yamt int total = t;
600 1.2 yamt
601 1.2 yamt if (pct != &s->s_anonmin) {
602 1.2 yamt total += uvm_pctparam_get(&s->s_anonmin);
603 1.2 yamt }
604 1.2 yamt if (pct != &s->s_filemin) {
605 1.2 yamt total += uvm_pctparam_get(&s->s_filemin);
606 1.2 yamt }
607 1.2 yamt if (pct != &s->s_execmin) {
608 1.2 yamt total += uvm_pctparam_get(&s->s_execmin);
609 1.2 yamt }
610 1.2 yamt if (total > 95) {
611 1.2 yamt return EINVAL;
612 1.2 yamt }
613 1.2 yamt return 0;
614 1.2 yamt }
615 1.2 yamt #endif /* !defined(PDSIM) */
616 1.2 yamt
617 1.2 yamt void
618 1.2 yamt uvmpdpol_init(void)
619 1.2 yamt {
620 1.2 yamt struct uvmpdpol_globalstate *s = &pdpol_state;
621 1.2 yamt
622 1.18 ad mutex_init(&s->lock, MUTEX_DEFAULT, IPL_NONE);
623 1.2 yamt TAILQ_INIT(&s->s_activeq);
624 1.2 yamt TAILQ_INIT(&s->s_inactiveq);
625 1.2 yamt uvm_pctparam_init(&s->s_inactivepct, CLOCK_INACTIVEPCT, NULL);
626 1.2 yamt uvm_pctparam_init(&s->s_anonmin, 10, min_check);
627 1.2 yamt uvm_pctparam_init(&s->s_filemin, 10, min_check);
628 1.2 yamt uvm_pctparam_init(&s->s_execmin, 5, min_check);
629 1.2 yamt uvm_pctparam_init(&s->s_anonmax, 80, NULL);
630 1.2 yamt uvm_pctparam_init(&s->s_filemax, 50, NULL);
631 1.2 yamt uvm_pctparam_init(&s->s_execmax, 30, NULL);
632 1.2 yamt }
633 1.2 yamt
634 1.2 yamt void
635 1.28 ad uvmpdpol_init_cpu(struct uvm_cpu *ucpu)
636 1.28 ad {
637 1.28 ad
638 1.28 ad ucpu->pdq =
639 1.28 ad kmem_alloc(CLOCK_PDQ_SIZE * sizeof(struct vm_page *), KM_SLEEP);
640 1.28 ad ucpu->pdqhead = CLOCK_PDQ_SIZE;
641 1.28 ad ucpu->pdqtail = CLOCK_PDQ_SIZE;
642 1.28 ad }
643 1.28 ad
644 1.28 ad void
645 1.2 yamt uvmpdpol_reinit(void)
646 1.2 yamt {
647 1.2 yamt }
648 1.2 yamt
649 1.7 thorpej bool
650 1.2 yamt uvmpdpol_needsscan_p(void)
651 1.2 yamt {
652 1.2 yamt
653 1.28 ad /*
654 1.28 ad * this must be an unlocked check: can be called from interrupt.
655 1.28 ad */
656 1.17 para return pdpol_state.s_inactive < pdpol_state.s_inactarg;
657 1.2 yamt }
658 1.2 yamt
659 1.2 yamt void
660 1.2 yamt uvmpdpol_tune(void)
661 1.2 yamt {
662 1.18 ad struct uvmpdpol_globalstate *s = &pdpol_state;
663 1.2 yamt
664 1.18 ad mutex_enter(&s->lock);
665 1.2 yamt clock_tune();
666 1.18 ad mutex_exit(&s->lock);
667 1.2 yamt }
668 1.2 yamt
669 1.28 ad /*
670 1.28 ad * uvmpdpol_pagerealize_locked: take the intended state set on an indivdual
671 1.28 ad * page and make it real. return true if any work was done.
672 1.28 ad */
673 1.28 ad static bool
674 1.28 ad uvmpdpol_pagerealize_locked(struct vm_page *pg)
675 1.28 ad {
676 1.28 ad struct uvmpdpol_globalstate *s __diagused = &pdpol_state;
677 1.28 ad
678 1.28 ad KASSERT(mutex_owned(&s->lock));
679 1.28 ad KASSERT(mutex_owned(&pg->interlock));
680 1.28 ad
681 1.28 ad switch (pg->pqflags & (PQ_INTENT_MASK | PQ_INTENT_SET)) {
682 1.28 ad case PQ_INTENT_A | PQ_INTENT_SET:
683 1.28 ad case PQ_INTENT_E | PQ_INTENT_SET:
684 1.28 ad uvmpdpol_pageactivate_locked(pg);
685 1.28 ad return true;
686 1.28 ad case PQ_INTENT_I | PQ_INTENT_SET:
687 1.28 ad uvmpdpol_pagedeactivate_locked(pg);
688 1.28 ad return true;
689 1.28 ad case PQ_INTENT_D | PQ_INTENT_SET:
690 1.28 ad uvmpdpol_pagedequeue_locked(pg);
691 1.28 ad return true;
692 1.28 ad default:
693 1.28 ad return false;
694 1.28 ad }
695 1.28 ad }
696 1.28 ad
697 1.28 ad /*
698 1.28 ad * uvmpdpol_flush: return the current uvm_cpu with all of its pending
699 1.28 ad * updates flushed to the global queues. this routine may block, and
700 1.28 ad * so can switch cpu. the idea is to empty to queue on whatever cpu
701 1.28 ad * we finally end up on.
702 1.28 ad */
703 1.28 ad static struct uvm_cpu *
704 1.28 ad uvmpdpol_flush(void)
705 1.28 ad {
706 1.28 ad struct uvmpdpol_globalstate *s __diagused = &pdpol_state;
707 1.28 ad struct uvm_cpu *ucpu;
708 1.28 ad struct vm_page *pg;
709 1.28 ad
710 1.28 ad KASSERT(kpreempt_disabled());
711 1.28 ad
712 1.28 ad mutex_enter(&s->lock);
713 1.28 ad for (;;) {
714 1.28 ad /*
715 1.28 ad * prefer scanning forwards (even though mutex_enter() is
716 1.28 ad * serializing) so as to not defeat any prefetch logic in
717 1.28 ad * the CPU. that means elsewhere enqueuing backwards, like
718 1.28 ad * a stack, but not so important there as pages are being
719 1.28 ad * added singularly.
720 1.28 ad *
721 1.28 ad * prefetch the next "struct vm_page" while working on the
722 1.28 ad * current one. this has a measurable and very positive
723 1.28 ad * effect in reducing the amount of time spent here under
724 1.28 ad * the global lock.
725 1.28 ad */
726 1.28 ad ucpu = curcpu()->ci_data.cpu_uvm;
727 1.28 ad KASSERT(ucpu->pdqhead <= ucpu->pdqtail);
728 1.28 ad if (__predict_false(ucpu->pdqhead == ucpu->pdqtail)) {
729 1.28 ad break;
730 1.28 ad }
731 1.28 ad pg = ucpu->pdq[ucpu->pdqhead++];
732 1.28 ad if (__predict_true(ucpu->pdqhead != ucpu->pdqtail)) {
733 1.28 ad __builtin_prefetch(ucpu->pdq[ucpu->pdqhead]);
734 1.28 ad }
735 1.28 ad mutex_enter(&pg->interlock);
736 1.28 ad pg->pqflags &= ~PQ_INTENT_QUEUED;
737 1.28 ad (void)uvmpdpol_pagerealize_locked(pg);
738 1.28 ad mutex_exit(&pg->interlock);
739 1.28 ad }
740 1.28 ad mutex_exit(&s->lock);
741 1.28 ad return ucpu;
742 1.28 ad }
743 1.28 ad
744 1.28 ad /*
745 1.28 ad * uvmpdpol_pagerealize: realize any intent set on the page. in this
746 1.28 ad * implementation, that means putting the page on a per-CPU queue to be
747 1.28 ad * dealt with later.
748 1.28 ad */
749 1.28 ad void
750 1.28 ad uvmpdpol_pagerealize(struct vm_page *pg)
751 1.28 ad {
752 1.28 ad struct uvm_cpu *ucpu;
753 1.28 ad
754 1.28 ad /*
755 1.28 ad * drain the per per-CPU queue if full, then enter the page.
756 1.28 ad */
757 1.28 ad kpreempt_disable();
758 1.28 ad ucpu = curcpu()->ci_data.cpu_uvm;
759 1.28 ad if (__predict_false(ucpu->pdqhead == 0)) {
760 1.28 ad ucpu = uvmpdpol_flush();
761 1.28 ad }
762 1.28 ad ucpu->pdq[--(ucpu->pdqhead)] = pg;
763 1.28 ad kpreempt_enable();
764 1.28 ad }
765 1.28 ad
766 1.28 ad /*
767 1.28 ad * uvmpdpol_idle: called from the system idle loop. periodically purge any
768 1.28 ad * pending updates back to the global queues.
769 1.28 ad */
770 1.28 ad void
771 1.28 ad uvmpdpol_idle(struct uvm_cpu *ucpu)
772 1.28 ad {
773 1.28 ad struct uvmpdpol_globalstate *s = &pdpol_state;
774 1.28 ad struct vm_page *pg;
775 1.28 ad
776 1.28 ad KASSERT(kpreempt_disabled());
777 1.28 ad
778 1.28 ad /*
779 1.28 ad * if no pages in the queue, we have nothing to do.
780 1.28 ad */
781 1.28 ad if (ucpu->pdqhead == ucpu->pdqtail) {
782 1.28 ad ucpu->pdqtime = hardclock_ticks;
783 1.28 ad return;
784 1.28 ad }
785 1.28 ad
786 1.28 ad /*
787 1.28 ad * don't do this more than ~8 times a second as it would needlessly
788 1.28 ad * exert pressure.
789 1.28 ad */
790 1.28 ad if (hardclock_ticks - ucpu->pdqtime < (hz >> 3)) {
791 1.28 ad return;
792 1.28 ad }
793 1.28 ad
794 1.28 ad /*
795 1.28 ad * the idle LWP can't block, so we have to try for the lock. if we
796 1.28 ad * get it, purge the per-CPU pending update queue. continually
797 1.28 ad * check for a pending resched: in that case exit immediately.
798 1.28 ad */
799 1.28 ad if (mutex_tryenter(&s->lock)) {
800 1.28 ad while (ucpu->pdqhead != ucpu->pdqtail) {
801 1.28 ad pg = ucpu->pdq[ucpu->pdqhead];
802 1.28 ad if (!mutex_tryenter(&pg->interlock)) {
803 1.28 ad break;
804 1.28 ad }
805 1.28 ad ucpu->pdqhead++;
806 1.28 ad pg->pqflags &= ~PQ_INTENT_QUEUED;
807 1.28 ad (void)uvmpdpol_pagerealize_locked(pg);
808 1.28 ad mutex_exit(&pg->interlock);
809 1.28 ad if (curcpu()->ci_want_resched) {
810 1.28 ad break;
811 1.28 ad }
812 1.28 ad }
813 1.28 ad if (ucpu->pdqhead == ucpu->pdqtail) {
814 1.28 ad ucpu->pdqtime = hardclock_ticks;
815 1.28 ad }
816 1.28 ad mutex_exit(&s->lock);
817 1.28 ad }
818 1.28 ad }
819 1.28 ad
820 1.2 yamt #if !defined(PDSIM)
821 1.2 yamt
822 1.2 yamt #include <sys/sysctl.h> /* XXX SYSCTL_DESCR */
823 1.2 yamt
824 1.2 yamt void
825 1.2 yamt uvmpdpol_sysctlsetup(void)
826 1.2 yamt {
827 1.2 yamt struct uvmpdpol_globalstate *s = &pdpol_state;
828 1.2 yamt
829 1.2 yamt uvm_pctparam_createsysctlnode(&s->s_anonmin, "anonmin",
830 1.2 yamt SYSCTL_DESCR("Percentage of physical memory reserved "
831 1.2 yamt "for anonymous application data"));
832 1.2 yamt uvm_pctparam_createsysctlnode(&s->s_filemin, "filemin",
833 1.2 yamt SYSCTL_DESCR("Percentage of physical memory reserved "
834 1.11 martin "for cached file data"));
835 1.2 yamt uvm_pctparam_createsysctlnode(&s->s_execmin, "execmin",
836 1.2 yamt SYSCTL_DESCR("Percentage of physical memory reserved "
837 1.11 martin "for cached executable data"));
838 1.2 yamt
839 1.2 yamt uvm_pctparam_createsysctlnode(&s->s_anonmax, "anonmax",
840 1.2 yamt SYSCTL_DESCR("Percentage of physical memory which will "
841 1.2 yamt "be reclaimed from other usage for "
842 1.2 yamt "anonymous application data"));
843 1.2 yamt uvm_pctparam_createsysctlnode(&s->s_filemax, "filemax",
844 1.2 yamt SYSCTL_DESCR("Percentage of physical memory which will "
845 1.2 yamt "be reclaimed from other usage for cached "
846 1.2 yamt "file data"));
847 1.2 yamt uvm_pctparam_createsysctlnode(&s->s_execmax, "execmax",
848 1.2 yamt SYSCTL_DESCR("Percentage of physical memory which will "
849 1.2 yamt "be reclaimed from other usage for cached "
850 1.2 yamt "executable data"));
851 1.2 yamt
852 1.2 yamt uvm_pctparam_createsysctlnode(&s->s_inactivepct, "inactivepct",
853 1.2 yamt SYSCTL_DESCR("Percentage of inactive queue of "
854 1.2 yamt "the entire (active + inactive) queue"));
855 1.2 yamt }
856 1.2 yamt
857 1.2 yamt #endif /* !defined(PDSIM) */
858 1.2 yamt
859 1.2 yamt #if defined(PDSIM)
860 1.2 yamt void
861 1.2 yamt pdsim_dump(const char *id)
862 1.2 yamt {
863 1.2 yamt #if defined(DEBUG)
864 1.2 yamt /* XXX */
865 1.2 yamt #endif /* defined(DEBUG) */
866 1.2 yamt }
867 1.2 yamt #endif /* defined(PDSIM) */
868