uvm_pdpolicy_clock.c revision 1.36 1 1.36 maxv /* $NetBSD: uvm_pdpolicy_clock.c,v 1.36 2020/04/02 16:29:30 maxv Exp $ */
2 1.2 yamt /* NetBSD: uvm_pdaemon.c,v 1.72 2006/01/05 10:47:33 yamt Exp $ */
3 1.2 yamt
4 1.28 ad /*-
5 1.34 ad * Copyright (c) 2019, 2020 The NetBSD Foundation, Inc.
6 1.28 ad * All rights reserved.
7 1.28 ad *
8 1.28 ad * This code is derived from software contributed to The NetBSD Foundation
9 1.28 ad * by Andrew Doran.
10 1.28 ad *
11 1.28 ad * Redistribution and use in source and binary forms, with or without
12 1.28 ad * modification, are permitted provided that the following conditions
13 1.28 ad * are met:
14 1.28 ad * 1. Redistributions of source code must retain the above copyright
15 1.28 ad * notice, this list of conditions and the following disclaimer.
16 1.28 ad * 2. Redistributions in binary form must reproduce the above copyright
17 1.28 ad * notice, this list of conditions and the following disclaimer in the
18 1.28 ad * documentation and/or other materials provided with the distribution.
19 1.28 ad *
20 1.28 ad * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21 1.28 ad * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22 1.28 ad * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23 1.28 ad * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24 1.28 ad * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 1.28 ad * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 1.28 ad * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 1.28 ad * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 1.28 ad * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 1.28 ad * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 1.28 ad * POSSIBILITY OF SUCH DAMAGE.
31 1.28 ad */
32 1.28 ad
33 1.2 yamt /*
34 1.2 yamt * Copyright (c) 1997 Charles D. Cranor and Washington University.
35 1.2 yamt * Copyright (c) 1991, 1993, The Regents of the University of California.
36 1.2 yamt *
37 1.2 yamt * All rights reserved.
38 1.2 yamt *
39 1.2 yamt * This code is derived from software contributed to Berkeley by
40 1.2 yamt * The Mach Operating System project at Carnegie-Mellon University.
41 1.2 yamt *
42 1.2 yamt * Redistribution and use in source and binary forms, with or without
43 1.2 yamt * modification, are permitted provided that the following conditions
44 1.2 yamt * are met:
45 1.2 yamt * 1. Redistributions of source code must retain the above copyright
46 1.2 yamt * notice, this list of conditions and the following disclaimer.
47 1.2 yamt * 2. Redistributions in binary form must reproduce the above copyright
48 1.2 yamt * notice, this list of conditions and the following disclaimer in the
49 1.2 yamt * documentation and/or other materials provided with the distribution.
50 1.13 chuck * 3. Neither the name of the University nor the names of its contributors
51 1.2 yamt * may be used to endorse or promote products derived from this software
52 1.2 yamt * without specific prior written permission.
53 1.2 yamt *
54 1.2 yamt * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
55 1.2 yamt * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
56 1.2 yamt * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
57 1.2 yamt * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
58 1.2 yamt * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
59 1.2 yamt * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
60 1.2 yamt * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
61 1.2 yamt * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
62 1.2 yamt * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
63 1.2 yamt * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
64 1.2 yamt * SUCH DAMAGE.
65 1.2 yamt *
66 1.2 yamt * @(#)vm_pageout.c 8.5 (Berkeley) 2/14/94
67 1.2 yamt * from: Id: uvm_pdaemon.c,v 1.1.2.32 1998/02/06 05:26:30 chs Exp
68 1.2 yamt *
69 1.2 yamt *
70 1.2 yamt * Copyright (c) 1987, 1990 Carnegie-Mellon University.
71 1.2 yamt * All rights reserved.
72 1.2 yamt *
73 1.2 yamt * Permission to use, copy, modify and distribute this software and
74 1.2 yamt * its documentation is hereby granted, provided that both the copyright
75 1.2 yamt * notice and this permission notice appear in all copies of the
76 1.2 yamt * software, derivative works or modified versions, and any portions
77 1.2 yamt * thereof, and that both notices appear in supporting documentation.
78 1.2 yamt *
79 1.2 yamt * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
80 1.2 yamt * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
81 1.2 yamt * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
82 1.2 yamt *
83 1.2 yamt * Carnegie Mellon requests users of this software to return to
84 1.2 yamt *
85 1.2 yamt * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
86 1.2 yamt * School of Computer Science
87 1.2 yamt * Carnegie Mellon University
88 1.2 yamt * Pittsburgh PA 15213-3890
89 1.2 yamt *
90 1.2 yamt * any improvements or extensions that they make and grant Carnegie the
91 1.2 yamt * rights to redistribute these changes.
92 1.2 yamt */
93 1.2 yamt
94 1.2 yamt #if defined(PDSIM)
95 1.2 yamt
96 1.2 yamt #include "pdsim.h"
97 1.2 yamt
98 1.2 yamt #else /* defined(PDSIM) */
99 1.2 yamt
100 1.2 yamt #include <sys/cdefs.h>
101 1.36 maxv __KERNEL_RCSID(0, "$NetBSD: uvm_pdpolicy_clock.c,v 1.36 2020/04/02 16:29:30 maxv Exp $");
102 1.2 yamt
103 1.2 yamt #include <sys/param.h>
104 1.2 yamt #include <sys/proc.h>
105 1.2 yamt #include <sys/systm.h>
106 1.2 yamt #include <sys/kernel.h>
107 1.28 ad #include <sys/kmem.h>
108 1.29 mlelstv #include <sys/atomic.h>
109 1.2 yamt
110 1.2 yamt #include <uvm/uvm.h>
111 1.2 yamt #include <uvm/uvm_pdpolicy.h>
112 1.2 yamt #include <uvm/uvm_pdpolicy_impl.h>
113 1.18 ad #include <uvm/uvm_stat.h>
114 1.2 yamt
115 1.2 yamt #endif /* defined(PDSIM) */
116 1.2 yamt
117 1.28 ad /*
118 1.28 ad * per-CPU queue of pending page status changes. 128 entries makes for a
119 1.28 ad * 1kB queue on _LP64 and has been found to be a reasonable compromise that
120 1.28 ad * keeps lock contention events and wait times low, while not using too much
121 1.28 ad * memory nor allowing global state to fall too far behind.
122 1.28 ad */
123 1.28 ad #if !defined(CLOCK_PDQ_SIZE)
124 1.28 ad #define CLOCK_PDQ_SIZE 128
125 1.28 ad #endif /* !defined(CLOCK_PDQ_SIZE) */
126 1.28 ad
127 1.28 ad #define PQ_INACTIVE 0x00000010 /* page is in inactive list */
128 1.28 ad #define PQ_ACTIVE 0x00000020 /* page is in active list */
129 1.2 yamt
130 1.2 yamt #if !defined(CLOCK_INACTIVEPCT)
131 1.2 yamt #define CLOCK_INACTIVEPCT 33
132 1.2 yamt #endif /* !defined(CLOCK_INACTIVEPCT) */
133 1.2 yamt
134 1.2 yamt struct uvmpdpol_globalstate {
135 1.18 ad kmutex_t lock; /* lock on state */
136 1.18 ad /* <= compiler pads here */
137 1.18 ad struct pglist s_activeq /* allocated pages, in use */
138 1.18 ad __aligned(COHERENCY_UNIT);
139 1.2 yamt struct pglist s_inactiveq; /* pages between the clock hands */
140 1.2 yamt int s_active;
141 1.2 yamt int s_inactive;
142 1.2 yamt int s_inactarg;
143 1.2 yamt struct uvm_pctparam s_anonmin;
144 1.2 yamt struct uvm_pctparam s_filemin;
145 1.2 yamt struct uvm_pctparam s_execmin;
146 1.2 yamt struct uvm_pctparam s_anonmax;
147 1.2 yamt struct uvm_pctparam s_filemax;
148 1.2 yamt struct uvm_pctparam s_execmax;
149 1.2 yamt struct uvm_pctparam s_inactivepct;
150 1.2 yamt };
151 1.2 yamt
152 1.2 yamt struct uvmpdpol_scanstate {
153 1.7 thorpej bool ss_anonreact, ss_filereact, ss_execreact;
154 1.24 ad struct vm_page ss_marker;
155 1.2 yamt };
156 1.2 yamt
157 1.18 ad static void uvmpdpol_pageactivate_locked(struct vm_page *);
158 1.18 ad static void uvmpdpol_pagedeactivate_locked(struct vm_page *);
159 1.18 ad static void uvmpdpol_pagedequeue_locked(struct vm_page *);
160 1.28 ad static bool uvmpdpol_pagerealize_locked(struct vm_page *);
161 1.28 ad static struct uvm_cpu *uvmpdpol_flush(void);
162 1.18 ad
163 1.18 ad static struct uvmpdpol_globalstate pdpol_state __cacheline_aligned;
164 1.2 yamt static struct uvmpdpol_scanstate pdpol_scanstate;
165 1.2 yamt
166 1.2 yamt PDPOL_EVCNT_DEFINE(reactexec)
167 1.2 yamt PDPOL_EVCNT_DEFINE(reactfile)
168 1.2 yamt PDPOL_EVCNT_DEFINE(reactanon)
169 1.2 yamt
170 1.2 yamt static void
171 1.2 yamt clock_tune(void)
172 1.2 yamt {
173 1.2 yamt struct uvmpdpol_globalstate *s = &pdpol_state;
174 1.2 yamt
175 1.2 yamt s->s_inactarg = UVM_PCTPARAM_APPLY(&s->s_inactivepct,
176 1.2 yamt s->s_active + s->s_inactive);
177 1.2 yamt if (s->s_inactarg <= uvmexp.freetarg) {
178 1.2 yamt s->s_inactarg = uvmexp.freetarg + 1;
179 1.2 yamt }
180 1.2 yamt }
181 1.2 yamt
182 1.2 yamt void
183 1.2 yamt uvmpdpol_scaninit(void)
184 1.2 yamt {
185 1.2 yamt struct uvmpdpol_globalstate *s = &pdpol_state;
186 1.2 yamt struct uvmpdpol_scanstate *ss = &pdpol_scanstate;
187 1.2 yamt int t;
188 1.7 thorpej bool anonunder, fileunder, execunder;
189 1.7 thorpej bool anonover, fileover, execover;
190 1.7 thorpej bool anonreact, filereact, execreact;
191 1.20 ad int64_t freepg, anonpg, filepg, execpg;
192 1.2 yamt
193 1.2 yamt /*
194 1.2 yamt * decide which types of pages we want to reactivate instead of freeing
195 1.2 yamt * to keep usage within the minimum and maximum usage limits.
196 1.2 yamt */
197 1.2 yamt
198 1.20 ad cpu_count_sync_all();
199 1.27 ad freepg = uvm_availmem();
200 1.20 ad anonpg = cpu_count_get(CPU_COUNT_ANONPAGES);
201 1.20 ad filepg = cpu_count_get(CPU_COUNT_FILEPAGES);
202 1.20 ad execpg = cpu_count_get(CPU_COUNT_EXECPAGES);
203 1.20 ad
204 1.18 ad mutex_enter(&s->lock);
205 1.20 ad t = s->s_active + s->s_inactive + freepg;
206 1.20 ad anonunder = anonpg <= UVM_PCTPARAM_APPLY(&s->s_anonmin, t);
207 1.20 ad fileunder = filepg <= UVM_PCTPARAM_APPLY(&s->s_filemin, t);
208 1.20 ad execunder = execpg <= UVM_PCTPARAM_APPLY(&s->s_execmin, t);
209 1.20 ad anonover = anonpg > UVM_PCTPARAM_APPLY(&s->s_anonmax, t);
210 1.20 ad fileover = filepg > UVM_PCTPARAM_APPLY(&s->s_filemax, t);
211 1.20 ad execover = execpg > UVM_PCTPARAM_APPLY(&s->s_execmax, t);
212 1.2 yamt anonreact = anonunder || (!anonover && (fileover || execover));
213 1.2 yamt filereact = fileunder || (!fileover && (anonover || execover));
214 1.2 yamt execreact = execunder || (!execover && (anonover || fileover));
215 1.2 yamt if (filereact && execreact && (anonreact || uvm_swapisfull())) {
216 1.8 thorpej anonreact = filereact = execreact = false;
217 1.2 yamt }
218 1.2 yamt ss->ss_anonreact = anonreact;
219 1.2 yamt ss->ss_filereact = filereact;
220 1.2 yamt ss->ss_execreact = execreact;
221 1.24 ad memset(&ss->ss_marker, 0, sizeof(ss->ss_marker));
222 1.24 ad ss->ss_marker.flags = PG_MARKER;
223 1.24 ad TAILQ_INSERT_HEAD(&pdpol_state.s_inactiveq, &ss->ss_marker, pdqueue);
224 1.24 ad mutex_exit(&s->lock);
225 1.24 ad }
226 1.24 ad
227 1.24 ad void
228 1.24 ad uvmpdpol_scanfini(void)
229 1.24 ad {
230 1.24 ad struct uvmpdpol_globalstate *s = &pdpol_state;
231 1.24 ad struct uvmpdpol_scanstate *ss = &pdpol_scanstate;
232 1.2 yamt
233 1.24 ad mutex_enter(&s->lock);
234 1.24 ad TAILQ_REMOVE(&pdpol_state.s_inactiveq, &ss->ss_marker, pdqueue);
235 1.18 ad mutex_exit(&s->lock);
236 1.2 yamt }
237 1.2 yamt
238 1.2 yamt struct vm_page *
239 1.33 ad uvmpdpol_selectvictim(krwlock_t **plock)
240 1.2 yamt {
241 1.18 ad struct uvmpdpol_globalstate *s = &pdpol_state;
242 1.2 yamt struct uvmpdpol_scanstate *ss = &pdpol_scanstate;
243 1.2 yamt struct vm_page *pg;
244 1.33 ad krwlock_t *lock;
245 1.2 yamt
246 1.18 ad mutex_enter(&s->lock);
247 1.2 yamt while (/* CONSTCOND */ 1) {
248 1.2 yamt struct vm_anon *anon;
249 1.2 yamt struct uvm_object *uobj;
250 1.2 yamt
251 1.24 ad pg = TAILQ_NEXT(&ss->ss_marker, pdqueue);
252 1.2 yamt if (pg == NULL) {
253 1.2 yamt break;
254 1.2 yamt }
255 1.24 ad KASSERT((pg->flags & PG_MARKER) == 0);
256 1.2 yamt uvmexp.pdscans++;
257 1.2 yamt
258 1.2 yamt /*
259 1.18 ad * acquire interlock to stablize page identity.
260 1.18 ad * if we have caught the page in a state of flux
261 1.28 ad * deal with it and retry.
262 1.2 yamt */
263 1.18 ad mutex_enter(&pg->interlock);
264 1.28 ad if (uvmpdpol_pagerealize_locked(pg)) {
265 1.28 ad mutex_exit(&pg->interlock);
266 1.28 ad continue;
267 1.2 yamt }
268 1.2 yamt
269 1.2 yamt /*
270 1.24 ad * now prepare to move on to the next page.
271 1.24 ad */
272 1.24 ad TAILQ_REMOVE(&pdpol_state.s_inactiveq, &ss->ss_marker,
273 1.24 ad pdqueue);
274 1.24 ad TAILQ_INSERT_AFTER(&pdpol_state.s_inactiveq, pg,
275 1.24 ad &ss->ss_marker, pdqueue);
276 1.24 ad
277 1.24 ad /*
278 1.2 yamt * enforce the minimum thresholds on different
279 1.2 yamt * types of memory usage. if reusing the current
280 1.2 yamt * page would reduce that type of usage below its
281 1.2 yamt * minimum, reactivate the page instead and move
282 1.2 yamt * on to the next page.
283 1.2 yamt */
284 1.18 ad anon = pg->uanon;
285 1.18 ad uobj = pg->uobject;
286 1.2 yamt if (uobj && UVM_OBJ_IS_VTEXT(uobj) && ss->ss_execreact) {
287 1.28 ad uvmpdpol_pageactivate_locked(pg);
288 1.18 ad mutex_exit(&pg->interlock);
289 1.2 yamt PDPOL_EVCNT_INCR(reactexec);
290 1.2 yamt continue;
291 1.2 yamt }
292 1.2 yamt if (uobj && UVM_OBJ_IS_VNODE(uobj) &&
293 1.2 yamt !UVM_OBJ_IS_VTEXT(uobj) && ss->ss_filereact) {
294 1.28 ad uvmpdpol_pageactivate_locked(pg);
295 1.18 ad mutex_exit(&pg->interlock);
296 1.2 yamt PDPOL_EVCNT_INCR(reactfile);
297 1.2 yamt continue;
298 1.2 yamt }
299 1.2 yamt if ((anon || UVM_OBJ_IS_AOBJ(uobj)) && ss->ss_anonreact) {
300 1.28 ad uvmpdpol_pageactivate_locked(pg);
301 1.18 ad mutex_exit(&pg->interlock);
302 1.2 yamt PDPOL_EVCNT_INCR(reactanon);
303 1.2 yamt continue;
304 1.2 yamt }
305 1.2 yamt
306 1.18 ad /*
307 1.18 ad * try to lock the object that owns the page.
308 1.18 ad *
309 1.18 ad * with the page interlock held, we can drop s->lock, which
310 1.18 ad * could otherwise serve as a barrier to us getting the
311 1.18 ad * object locked, because the owner of the object's lock may
312 1.18 ad * be blocked on s->lock (i.e. a deadlock).
313 1.18 ad *
314 1.18 ad * whatever happens, uvmpd_trylockowner() will release the
315 1.18 ad * interlock. with the interlock dropped we can then
316 1.18 ad * re-acquire our own lock. the order is:
317 1.18 ad *
318 1.18 ad * object -> pdpol -> interlock.
319 1.18 ad */
320 1.18 ad mutex_exit(&s->lock);
321 1.18 ad lock = uvmpd_trylockowner(pg);
322 1.18 ad /* pg->interlock now released */
323 1.18 ad mutex_enter(&s->lock);
324 1.18 ad if (lock == NULL) {
325 1.18 ad /* didn't get it - try the next page. */
326 1.18 ad continue;
327 1.18 ad }
328 1.18 ad
329 1.18 ad /*
330 1.18 ad * move referenced pages back to active queue and skip to
331 1.18 ad * next page.
332 1.18 ad */
333 1.18 ad if (pmap_is_referenced(pg)) {
334 1.28 ad mutex_enter(&pg->interlock);
335 1.18 ad uvmpdpol_pageactivate_locked(pg);
336 1.28 ad mutex_exit(&pg->interlock);
337 1.18 ad uvmexp.pdreact++;
338 1.33 ad rw_exit(lock);
339 1.18 ad continue;
340 1.18 ad }
341 1.18 ad
342 1.18 ad /* we have a potential victim. */
343 1.18 ad *plock = lock;
344 1.2 yamt break;
345 1.2 yamt }
346 1.18 ad mutex_exit(&s->lock);
347 1.2 yamt return pg;
348 1.2 yamt }
349 1.2 yamt
350 1.2 yamt void
351 1.2 yamt uvmpdpol_balancequeue(int swap_shortage)
352 1.2 yamt {
353 1.18 ad struct uvmpdpol_globalstate *s = &pdpol_state;
354 1.2 yamt int inactive_shortage;
355 1.24 ad struct vm_page *p, marker;
356 1.33 ad krwlock_t *lock;
357 1.2 yamt
358 1.2 yamt /*
359 1.2 yamt * we have done the scan to get free pages. now we work on meeting
360 1.2 yamt * our inactive target.
361 1.2 yamt */
362 1.2 yamt
363 1.24 ad memset(&marker, 0, sizeof(marker));
364 1.24 ad marker.flags = PG_MARKER;
365 1.24 ad
366 1.18 ad mutex_enter(&s->lock);
367 1.24 ad TAILQ_INSERT_HEAD(&pdpol_state.s_activeq, &marker, pdqueue);
368 1.24 ad for (;;) {
369 1.24 ad inactive_shortage =
370 1.24 ad pdpol_state.s_inactarg - pdpol_state.s_inactive;
371 1.24 ad if (inactive_shortage <= 0 && swap_shortage <= 0) {
372 1.24 ad break;
373 1.2 yamt }
374 1.24 ad p = TAILQ_NEXT(&marker, pdqueue);
375 1.24 ad if (p == NULL) {
376 1.24 ad break;
377 1.14 rmind }
378 1.24 ad KASSERT((p->flags & PG_MARKER) == 0);
379 1.14 rmind
380 1.18 ad /*
381 1.18 ad * acquire interlock to stablize page identity.
382 1.18 ad * if we have caught the page in a state of flux
383 1.28 ad * deal with it and retry.
384 1.18 ad */
385 1.18 ad mutex_enter(&p->interlock);
386 1.28 ad if (uvmpdpol_pagerealize_locked(p)) {
387 1.28 ad mutex_exit(&p->interlock);
388 1.28 ad continue;
389 1.18 ad }
390 1.24 ad
391 1.24 ad /*
392 1.24 ad * now prepare to move on to the next page.
393 1.24 ad */
394 1.24 ad TAILQ_REMOVE(&pdpol_state.s_activeq, &marker, pdqueue);
395 1.24 ad TAILQ_INSERT_AFTER(&pdpol_state.s_activeq, p, &marker,
396 1.24 ad pdqueue);
397 1.24 ad
398 1.24 ad /*
399 1.24 ad * try to lock the object that owns the page. see comments
400 1.24 ad * in uvmpdol_selectvictim().
401 1.24 ad */
402 1.24 ad mutex_exit(&s->lock);
403 1.24 ad lock = uvmpd_trylockowner(p);
404 1.24 ad /* p->interlock now released */
405 1.24 ad mutex_enter(&s->lock);
406 1.24 ad if (lock == NULL) {
407 1.24 ad /* didn't get it - try the next page. */
408 1.24 ad continue;
409 1.24 ad }
410 1.24 ad
411 1.24 ad /*
412 1.24 ad * if there's a shortage of swap slots, try to free it.
413 1.24 ad */
414 1.24 ad if (swap_shortage > 0 && (p->flags & PG_SWAPBACKED) != 0 &&
415 1.24 ad (p->flags & PG_BUSY) == 0) {
416 1.24 ad if (uvmpd_dropswap(p)) {
417 1.24 ad swap_shortage--;
418 1.24 ad }
419 1.24 ad }
420 1.24 ad
421 1.24 ad /*
422 1.24 ad * if there's a shortage of inactive pages, deactivate.
423 1.24 ad */
424 1.24 ad if (inactive_shortage > 0) {
425 1.28 ad pmap_clear_reference(p);
426 1.28 ad mutex_enter(&p->interlock);
427 1.18 ad uvmpdpol_pagedeactivate_locked(p);
428 1.28 ad mutex_exit(&p->interlock);
429 1.2 yamt uvmexp.pddeact++;
430 1.2 yamt inactive_shortage--;
431 1.2 yamt }
432 1.33 ad rw_exit(lock);
433 1.2 yamt }
434 1.24 ad TAILQ_REMOVE(&pdpol_state.s_activeq, &marker, pdqueue);
435 1.18 ad mutex_exit(&s->lock);
436 1.2 yamt }
437 1.2 yamt
438 1.18 ad static void
439 1.18 ad uvmpdpol_pagedeactivate_locked(struct vm_page *pg)
440 1.2 yamt {
441 1.28 ad struct uvmpdpol_globalstate *s __diagused = &pdpol_state;
442 1.2 yamt
443 1.28 ad KASSERT(mutex_owned(&s->lock));
444 1.28 ad KASSERT(mutex_owned(&pg->interlock));
445 1.28 ad KASSERT((pg->pqflags & (PQ_INTENT_MASK | PQ_INTENT_SET)) !=
446 1.28 ad (PQ_INTENT_D | PQ_INTENT_SET));
447 1.14 rmind
448 1.2 yamt if (pg->pqflags & PQ_ACTIVE) {
449 1.23 ad TAILQ_REMOVE(&pdpol_state.s_activeq, pg, pdqueue);
450 1.2 yamt KASSERT(pdpol_state.s_active > 0);
451 1.2 yamt pdpol_state.s_active--;
452 1.2 yamt }
453 1.2 yamt if ((pg->pqflags & PQ_INACTIVE) == 0) {
454 1.2 yamt KASSERT(pg->wire_count == 0);
455 1.23 ad TAILQ_INSERT_TAIL(&pdpol_state.s_inactiveq, pg, pdqueue);
456 1.2 yamt pdpol_state.s_inactive++;
457 1.2 yamt }
458 1.34 ad pg->pqflags &= ~(PQ_ACTIVE | PQ_INTENT_SET);
459 1.34 ad pg->pqflags |= PQ_INACTIVE;
460 1.2 yamt }
461 1.2 yamt
462 1.2 yamt void
463 1.18 ad uvmpdpol_pagedeactivate(struct vm_page *pg)
464 1.18 ad {
465 1.18 ad
466 1.35 ad KASSERT(uvm_page_owner_locked_p(pg, false));
467 1.28 ad KASSERT(mutex_owned(&pg->interlock));
468 1.28 ad
469 1.28 ad /*
470 1.28 ad * we have to clear the reference bit now, as when it comes time to
471 1.28 ad * realize the intent we won't have the object locked any more.
472 1.28 ad */
473 1.28 ad pmap_clear_reference(pg);
474 1.28 ad uvmpdpol_set_intent(pg, PQ_INTENT_I);
475 1.18 ad }
476 1.18 ad
477 1.18 ad static void
478 1.18 ad uvmpdpol_pageactivate_locked(struct vm_page *pg)
479 1.2 yamt {
480 1.28 ad struct uvmpdpol_globalstate *s __diagused = &pdpol_state;
481 1.28 ad
482 1.28 ad KASSERT(mutex_owned(&s->lock));
483 1.28 ad KASSERT(mutex_owned(&pg->interlock));
484 1.28 ad KASSERT((pg->pqflags & (PQ_INTENT_MASK | PQ_INTENT_SET)) !=
485 1.28 ad (PQ_INTENT_D | PQ_INTENT_SET));
486 1.2 yamt
487 1.18 ad uvmpdpol_pagedequeue_locked(pg);
488 1.23 ad TAILQ_INSERT_TAIL(&pdpol_state.s_activeq, pg, pdqueue);
489 1.2 yamt pdpol_state.s_active++;
490 1.34 ad pg->pqflags &= ~(PQ_INACTIVE | PQ_INTENT_SET);
491 1.34 ad pg->pqflags |= PQ_ACTIVE;
492 1.2 yamt }
493 1.2 yamt
494 1.2 yamt void
495 1.18 ad uvmpdpol_pageactivate(struct vm_page *pg)
496 1.18 ad {
497 1.28 ad
498 1.35 ad KASSERT(uvm_page_owner_locked_p(pg, false));
499 1.28 ad KASSERT(mutex_owned(&pg->interlock));
500 1.18 ad
501 1.31 ad uvmpdpol_set_intent(pg, PQ_INTENT_A);
502 1.18 ad }
503 1.18 ad
504 1.18 ad static void
505 1.18 ad uvmpdpol_pagedequeue_locked(struct vm_page *pg)
506 1.2 yamt {
507 1.28 ad struct uvmpdpol_globalstate *s __diagused = &pdpol_state;
508 1.28 ad
509 1.28 ad KASSERT(mutex_owned(&s->lock));
510 1.28 ad KASSERT(mutex_owned(&pg->interlock));
511 1.2 yamt
512 1.2 yamt if (pg->pqflags & PQ_ACTIVE) {
513 1.23 ad TAILQ_REMOVE(&pdpol_state.s_activeq, pg, pdqueue);
514 1.28 ad KASSERT((pg->pqflags & PQ_INACTIVE) == 0);
515 1.2 yamt KASSERT(pdpol_state.s_active > 0);
516 1.2 yamt pdpol_state.s_active--;
517 1.2 yamt } else if (pg->pqflags & PQ_INACTIVE) {
518 1.23 ad TAILQ_REMOVE(&pdpol_state.s_inactiveq, pg, pdqueue);
519 1.2 yamt KASSERT(pdpol_state.s_inactive > 0);
520 1.2 yamt pdpol_state.s_inactive--;
521 1.2 yamt }
522 1.34 ad pg->pqflags &= ~(PQ_ACTIVE | PQ_INACTIVE | PQ_INTENT_SET);
523 1.2 yamt }
524 1.2 yamt
525 1.2 yamt void
526 1.18 ad uvmpdpol_pagedequeue(struct vm_page *pg)
527 1.18 ad {
528 1.18 ad
529 1.33 ad KASSERT(uvm_page_owner_locked_p(pg, true));
530 1.28 ad KASSERT(mutex_owned(&pg->interlock));
531 1.28 ad
532 1.28 ad uvmpdpol_set_intent(pg, PQ_INTENT_D);
533 1.18 ad }
534 1.18 ad
535 1.18 ad void
536 1.2 yamt uvmpdpol_pageenqueue(struct vm_page *pg)
537 1.2 yamt {
538 1.2 yamt
539 1.35 ad KASSERT(uvm_page_owner_locked_p(pg, false));
540 1.28 ad KASSERT(mutex_owned(&pg->interlock));
541 1.28 ad
542 1.28 ad uvmpdpol_set_intent(pg, PQ_INTENT_E);
543 1.2 yamt }
544 1.2 yamt
545 1.2 yamt void
546 1.5 yamt uvmpdpol_anfree(struct vm_anon *an)
547 1.2 yamt {
548 1.2 yamt }
549 1.2 yamt
550 1.7 thorpej bool
551 1.2 yamt uvmpdpol_pageisqueued_p(struct vm_page *pg)
552 1.2 yamt {
553 1.28 ad uint32_t pqflags;
554 1.2 yamt
555 1.28 ad /*
556 1.28 ad * if there's an intent set, we have to consider it. otherwise,
557 1.28 ad * return the actual state. we may be called unlocked for the
558 1.28 ad * purpose of assertions, which is safe due to the page lifecycle.
559 1.28 ad */
560 1.28 ad pqflags = atomic_load_relaxed(&pg->pqflags);
561 1.28 ad if ((pqflags & PQ_INTENT_SET) != 0) {
562 1.28 ad return (pqflags & PQ_INTENT_MASK) != PQ_INTENT_D;
563 1.28 ad } else {
564 1.28 ad return (pqflags & (PQ_ACTIVE | PQ_INACTIVE)) != 0;
565 1.28 ad }
566 1.2 yamt }
567 1.2 yamt
568 1.2 yamt void
569 1.2 yamt uvmpdpol_estimatepageable(int *active, int *inactive)
570 1.2 yamt {
571 1.18 ad struct uvmpdpol_globalstate *s = &pdpol_state;
572 1.2 yamt
573 1.32 ad /*
574 1.32 ad * Don't take any locks here. This can be called from DDB, and in
575 1.32 ad * any case the numbers are stale the instant the lock is dropped,
576 1.32 ad * so it just doesn't matter.
577 1.32 ad */
578 1.2 yamt if (active) {
579 1.32 ad *active = s->s_active;
580 1.2 yamt }
581 1.2 yamt if (inactive) {
582 1.32 ad *inactive = s->s_inactive;
583 1.2 yamt }
584 1.2 yamt }
585 1.2 yamt
586 1.2 yamt #if !defined(PDSIM)
587 1.2 yamt static int
588 1.2 yamt min_check(struct uvm_pctparam *pct, int t)
589 1.2 yamt {
590 1.2 yamt struct uvmpdpol_globalstate *s = &pdpol_state;
591 1.2 yamt int total = t;
592 1.2 yamt
593 1.2 yamt if (pct != &s->s_anonmin) {
594 1.2 yamt total += uvm_pctparam_get(&s->s_anonmin);
595 1.2 yamt }
596 1.2 yamt if (pct != &s->s_filemin) {
597 1.2 yamt total += uvm_pctparam_get(&s->s_filemin);
598 1.2 yamt }
599 1.2 yamt if (pct != &s->s_execmin) {
600 1.2 yamt total += uvm_pctparam_get(&s->s_execmin);
601 1.2 yamt }
602 1.2 yamt if (total > 95) {
603 1.2 yamt return EINVAL;
604 1.2 yamt }
605 1.2 yamt return 0;
606 1.2 yamt }
607 1.2 yamt #endif /* !defined(PDSIM) */
608 1.2 yamt
609 1.2 yamt void
610 1.2 yamt uvmpdpol_init(void)
611 1.2 yamt {
612 1.2 yamt struct uvmpdpol_globalstate *s = &pdpol_state;
613 1.2 yamt
614 1.18 ad mutex_init(&s->lock, MUTEX_DEFAULT, IPL_NONE);
615 1.2 yamt TAILQ_INIT(&s->s_activeq);
616 1.2 yamt TAILQ_INIT(&s->s_inactiveq);
617 1.2 yamt uvm_pctparam_init(&s->s_inactivepct, CLOCK_INACTIVEPCT, NULL);
618 1.2 yamt uvm_pctparam_init(&s->s_anonmin, 10, min_check);
619 1.2 yamt uvm_pctparam_init(&s->s_filemin, 10, min_check);
620 1.2 yamt uvm_pctparam_init(&s->s_execmin, 5, min_check);
621 1.2 yamt uvm_pctparam_init(&s->s_anonmax, 80, NULL);
622 1.2 yamt uvm_pctparam_init(&s->s_filemax, 50, NULL);
623 1.2 yamt uvm_pctparam_init(&s->s_execmax, 30, NULL);
624 1.2 yamt }
625 1.2 yamt
626 1.2 yamt void
627 1.28 ad uvmpdpol_init_cpu(struct uvm_cpu *ucpu)
628 1.28 ad {
629 1.28 ad
630 1.28 ad ucpu->pdq =
631 1.28 ad kmem_alloc(CLOCK_PDQ_SIZE * sizeof(struct vm_page *), KM_SLEEP);
632 1.28 ad ucpu->pdqhead = CLOCK_PDQ_SIZE;
633 1.28 ad ucpu->pdqtail = CLOCK_PDQ_SIZE;
634 1.28 ad }
635 1.28 ad
636 1.28 ad void
637 1.2 yamt uvmpdpol_reinit(void)
638 1.2 yamt {
639 1.2 yamt }
640 1.2 yamt
641 1.7 thorpej bool
642 1.2 yamt uvmpdpol_needsscan_p(void)
643 1.2 yamt {
644 1.2 yamt
645 1.28 ad /*
646 1.28 ad * this must be an unlocked check: can be called from interrupt.
647 1.28 ad */
648 1.17 para return pdpol_state.s_inactive < pdpol_state.s_inactarg;
649 1.2 yamt }
650 1.2 yamt
651 1.2 yamt void
652 1.2 yamt uvmpdpol_tune(void)
653 1.2 yamt {
654 1.18 ad struct uvmpdpol_globalstate *s = &pdpol_state;
655 1.2 yamt
656 1.18 ad mutex_enter(&s->lock);
657 1.2 yamt clock_tune();
658 1.18 ad mutex_exit(&s->lock);
659 1.2 yamt }
660 1.2 yamt
661 1.28 ad /*
662 1.30 ad * uvmpdpol_pagerealize_locked: take the intended state set on a page and
663 1.30 ad * make it real. return true if any work was done.
664 1.28 ad */
665 1.28 ad static bool
666 1.28 ad uvmpdpol_pagerealize_locked(struct vm_page *pg)
667 1.28 ad {
668 1.28 ad struct uvmpdpol_globalstate *s __diagused = &pdpol_state;
669 1.28 ad
670 1.28 ad KASSERT(mutex_owned(&s->lock));
671 1.28 ad KASSERT(mutex_owned(&pg->interlock));
672 1.28 ad
673 1.28 ad switch (pg->pqflags & (PQ_INTENT_MASK | PQ_INTENT_SET)) {
674 1.28 ad case PQ_INTENT_A | PQ_INTENT_SET:
675 1.28 ad case PQ_INTENT_E | PQ_INTENT_SET:
676 1.28 ad uvmpdpol_pageactivate_locked(pg);
677 1.28 ad return true;
678 1.28 ad case PQ_INTENT_I | PQ_INTENT_SET:
679 1.28 ad uvmpdpol_pagedeactivate_locked(pg);
680 1.28 ad return true;
681 1.28 ad case PQ_INTENT_D | PQ_INTENT_SET:
682 1.28 ad uvmpdpol_pagedequeue_locked(pg);
683 1.28 ad return true;
684 1.28 ad default:
685 1.28 ad return false;
686 1.28 ad }
687 1.28 ad }
688 1.28 ad
689 1.28 ad /*
690 1.28 ad * uvmpdpol_flush: return the current uvm_cpu with all of its pending
691 1.28 ad * updates flushed to the global queues. this routine may block, and
692 1.28 ad * so can switch cpu. the idea is to empty to queue on whatever cpu
693 1.28 ad * we finally end up on.
694 1.28 ad */
695 1.28 ad static struct uvm_cpu *
696 1.28 ad uvmpdpol_flush(void)
697 1.28 ad {
698 1.28 ad struct uvmpdpol_globalstate *s __diagused = &pdpol_state;
699 1.28 ad struct uvm_cpu *ucpu;
700 1.28 ad struct vm_page *pg;
701 1.28 ad
702 1.28 ad KASSERT(kpreempt_disabled());
703 1.28 ad
704 1.28 ad mutex_enter(&s->lock);
705 1.28 ad for (;;) {
706 1.28 ad /*
707 1.28 ad * prefer scanning forwards (even though mutex_enter() is
708 1.28 ad * serializing) so as to not defeat any prefetch logic in
709 1.28 ad * the CPU. that means elsewhere enqueuing backwards, like
710 1.28 ad * a stack, but not so important there as pages are being
711 1.28 ad * added singularly.
712 1.28 ad *
713 1.28 ad * prefetch the next "struct vm_page" while working on the
714 1.28 ad * current one. this has a measurable and very positive
715 1.28 ad * effect in reducing the amount of time spent here under
716 1.28 ad * the global lock.
717 1.28 ad */
718 1.28 ad ucpu = curcpu()->ci_data.cpu_uvm;
719 1.28 ad KASSERT(ucpu->pdqhead <= ucpu->pdqtail);
720 1.28 ad if (__predict_false(ucpu->pdqhead == ucpu->pdqtail)) {
721 1.28 ad break;
722 1.28 ad }
723 1.28 ad pg = ucpu->pdq[ucpu->pdqhead++];
724 1.28 ad if (__predict_true(ucpu->pdqhead != ucpu->pdqtail)) {
725 1.28 ad __builtin_prefetch(ucpu->pdq[ucpu->pdqhead]);
726 1.28 ad }
727 1.28 ad mutex_enter(&pg->interlock);
728 1.28 ad pg->pqflags &= ~PQ_INTENT_QUEUED;
729 1.28 ad (void)uvmpdpol_pagerealize_locked(pg);
730 1.28 ad mutex_exit(&pg->interlock);
731 1.28 ad }
732 1.28 ad mutex_exit(&s->lock);
733 1.28 ad return ucpu;
734 1.28 ad }
735 1.28 ad
736 1.28 ad /*
737 1.28 ad * uvmpdpol_pagerealize: realize any intent set on the page. in this
738 1.28 ad * implementation, that means putting the page on a per-CPU queue to be
739 1.28 ad * dealt with later.
740 1.28 ad */
741 1.28 ad void
742 1.28 ad uvmpdpol_pagerealize(struct vm_page *pg)
743 1.28 ad {
744 1.28 ad struct uvm_cpu *ucpu;
745 1.28 ad
746 1.28 ad /*
747 1.28 ad * drain the per per-CPU queue if full, then enter the page.
748 1.28 ad */
749 1.28 ad kpreempt_disable();
750 1.28 ad ucpu = curcpu()->ci_data.cpu_uvm;
751 1.28 ad if (__predict_false(ucpu->pdqhead == 0)) {
752 1.28 ad ucpu = uvmpdpol_flush();
753 1.28 ad }
754 1.28 ad ucpu->pdq[--(ucpu->pdqhead)] = pg;
755 1.28 ad kpreempt_enable();
756 1.28 ad }
757 1.28 ad
758 1.28 ad /*
759 1.28 ad * uvmpdpol_idle: called from the system idle loop. periodically purge any
760 1.28 ad * pending updates back to the global queues.
761 1.28 ad */
762 1.28 ad void
763 1.28 ad uvmpdpol_idle(struct uvm_cpu *ucpu)
764 1.28 ad {
765 1.28 ad struct uvmpdpol_globalstate *s = &pdpol_state;
766 1.28 ad struct vm_page *pg;
767 1.28 ad
768 1.28 ad KASSERT(kpreempt_disabled());
769 1.28 ad
770 1.28 ad /*
771 1.28 ad * if no pages in the queue, we have nothing to do.
772 1.28 ad */
773 1.28 ad if (ucpu->pdqhead == ucpu->pdqtail) {
774 1.36 maxv ucpu->pdqtime = getticks();
775 1.28 ad return;
776 1.28 ad }
777 1.28 ad
778 1.28 ad /*
779 1.28 ad * don't do this more than ~8 times a second as it would needlessly
780 1.28 ad * exert pressure.
781 1.28 ad */
782 1.36 maxv if (getticks() - ucpu->pdqtime < (hz >> 3)) {
783 1.28 ad return;
784 1.28 ad }
785 1.28 ad
786 1.28 ad /*
787 1.28 ad * the idle LWP can't block, so we have to try for the lock. if we
788 1.28 ad * get it, purge the per-CPU pending update queue. continually
789 1.28 ad * check for a pending resched: in that case exit immediately.
790 1.28 ad */
791 1.28 ad if (mutex_tryenter(&s->lock)) {
792 1.28 ad while (ucpu->pdqhead != ucpu->pdqtail) {
793 1.28 ad pg = ucpu->pdq[ucpu->pdqhead];
794 1.28 ad if (!mutex_tryenter(&pg->interlock)) {
795 1.28 ad break;
796 1.28 ad }
797 1.28 ad ucpu->pdqhead++;
798 1.28 ad pg->pqflags &= ~PQ_INTENT_QUEUED;
799 1.28 ad (void)uvmpdpol_pagerealize_locked(pg);
800 1.28 ad mutex_exit(&pg->interlock);
801 1.28 ad if (curcpu()->ci_want_resched) {
802 1.28 ad break;
803 1.28 ad }
804 1.28 ad }
805 1.28 ad if (ucpu->pdqhead == ucpu->pdqtail) {
806 1.36 maxv ucpu->pdqtime = getticks();
807 1.28 ad }
808 1.28 ad mutex_exit(&s->lock);
809 1.28 ad }
810 1.28 ad }
811 1.28 ad
812 1.2 yamt #if !defined(PDSIM)
813 1.2 yamt
814 1.2 yamt #include <sys/sysctl.h> /* XXX SYSCTL_DESCR */
815 1.2 yamt
816 1.2 yamt void
817 1.2 yamt uvmpdpol_sysctlsetup(void)
818 1.2 yamt {
819 1.2 yamt struct uvmpdpol_globalstate *s = &pdpol_state;
820 1.2 yamt
821 1.2 yamt uvm_pctparam_createsysctlnode(&s->s_anonmin, "anonmin",
822 1.2 yamt SYSCTL_DESCR("Percentage of physical memory reserved "
823 1.2 yamt "for anonymous application data"));
824 1.2 yamt uvm_pctparam_createsysctlnode(&s->s_filemin, "filemin",
825 1.2 yamt SYSCTL_DESCR("Percentage of physical memory reserved "
826 1.11 martin "for cached file data"));
827 1.2 yamt uvm_pctparam_createsysctlnode(&s->s_execmin, "execmin",
828 1.2 yamt SYSCTL_DESCR("Percentage of physical memory reserved "
829 1.11 martin "for cached executable data"));
830 1.2 yamt
831 1.2 yamt uvm_pctparam_createsysctlnode(&s->s_anonmax, "anonmax",
832 1.2 yamt SYSCTL_DESCR("Percentage of physical memory which will "
833 1.2 yamt "be reclaimed from other usage for "
834 1.2 yamt "anonymous application data"));
835 1.2 yamt uvm_pctparam_createsysctlnode(&s->s_filemax, "filemax",
836 1.2 yamt SYSCTL_DESCR("Percentage of physical memory which will "
837 1.2 yamt "be reclaimed from other usage for cached "
838 1.2 yamt "file data"));
839 1.2 yamt uvm_pctparam_createsysctlnode(&s->s_execmax, "execmax",
840 1.2 yamt SYSCTL_DESCR("Percentage of physical memory which will "
841 1.2 yamt "be reclaimed from other usage for cached "
842 1.2 yamt "executable data"));
843 1.2 yamt
844 1.2 yamt uvm_pctparam_createsysctlnode(&s->s_inactivepct, "inactivepct",
845 1.2 yamt SYSCTL_DESCR("Percentage of inactive queue of "
846 1.2 yamt "the entire (active + inactive) queue"));
847 1.2 yamt }
848 1.2 yamt
849 1.2 yamt #endif /* !defined(PDSIM) */
850 1.2 yamt
851 1.2 yamt #if defined(PDSIM)
852 1.2 yamt void
853 1.2 yamt pdsim_dump(const char *id)
854 1.2 yamt {
855 1.2 yamt #if defined(DEBUG)
856 1.2 yamt /* XXX */
857 1.2 yamt #endif /* defined(DEBUG) */
858 1.2 yamt }
859 1.2 yamt #endif /* defined(PDSIM) */
860