uvm_pdpolicy_clock.c revision 1.33 1 1.33 ad /* $NetBSD: uvm_pdpolicy_clock.c,v 1.33 2020/02/23 15:46:43 ad Exp $ */
2 1.2 yamt /* NetBSD: uvm_pdaemon.c,v 1.72 2006/01/05 10:47:33 yamt Exp $ */
3 1.2 yamt
4 1.28 ad /*-
5 1.28 ad * Copyright (c) 2019 The NetBSD Foundation, Inc.
6 1.28 ad * All rights reserved.
7 1.28 ad *
8 1.28 ad * This code is derived from software contributed to The NetBSD Foundation
9 1.28 ad * by Andrew Doran.
10 1.28 ad *
11 1.28 ad * Redistribution and use in source and binary forms, with or without
12 1.28 ad * modification, are permitted provided that the following conditions
13 1.28 ad * are met:
14 1.28 ad * 1. Redistributions of source code must retain the above copyright
15 1.28 ad * notice, this list of conditions and the following disclaimer.
16 1.28 ad * 2. Redistributions in binary form must reproduce the above copyright
17 1.28 ad * notice, this list of conditions and the following disclaimer in the
18 1.28 ad * documentation and/or other materials provided with the distribution.
19 1.28 ad *
20 1.28 ad * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21 1.28 ad * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22 1.28 ad * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23 1.28 ad * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24 1.28 ad * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 1.28 ad * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 1.28 ad * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 1.28 ad * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 1.28 ad * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 1.28 ad * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 1.28 ad * POSSIBILITY OF SUCH DAMAGE.
31 1.28 ad */
32 1.28 ad
33 1.2 yamt /*
34 1.2 yamt * Copyright (c) 1997 Charles D. Cranor and Washington University.
35 1.2 yamt * Copyright (c) 1991, 1993, The Regents of the University of California.
36 1.2 yamt *
37 1.2 yamt * All rights reserved.
38 1.2 yamt *
39 1.2 yamt * This code is derived from software contributed to Berkeley by
40 1.2 yamt * The Mach Operating System project at Carnegie-Mellon University.
41 1.2 yamt *
42 1.2 yamt * Redistribution and use in source and binary forms, with or without
43 1.2 yamt * modification, are permitted provided that the following conditions
44 1.2 yamt * are met:
45 1.2 yamt * 1. Redistributions of source code must retain the above copyright
46 1.2 yamt * notice, this list of conditions and the following disclaimer.
47 1.2 yamt * 2. Redistributions in binary form must reproduce the above copyright
48 1.2 yamt * notice, this list of conditions and the following disclaimer in the
49 1.2 yamt * documentation and/or other materials provided with the distribution.
50 1.13 chuck * 3. Neither the name of the University nor the names of its contributors
51 1.2 yamt * may be used to endorse or promote products derived from this software
52 1.2 yamt * without specific prior written permission.
53 1.2 yamt *
54 1.2 yamt * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
55 1.2 yamt * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
56 1.2 yamt * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
57 1.2 yamt * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
58 1.2 yamt * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
59 1.2 yamt * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
60 1.2 yamt * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
61 1.2 yamt * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
62 1.2 yamt * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
63 1.2 yamt * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
64 1.2 yamt * SUCH DAMAGE.
65 1.2 yamt *
66 1.2 yamt * @(#)vm_pageout.c 8.5 (Berkeley) 2/14/94
67 1.2 yamt * from: Id: uvm_pdaemon.c,v 1.1.2.32 1998/02/06 05:26:30 chs Exp
68 1.2 yamt *
69 1.2 yamt *
70 1.2 yamt * Copyright (c) 1987, 1990 Carnegie-Mellon University.
71 1.2 yamt * All rights reserved.
72 1.2 yamt *
73 1.2 yamt * Permission to use, copy, modify and distribute this software and
74 1.2 yamt * its documentation is hereby granted, provided that both the copyright
75 1.2 yamt * notice and this permission notice appear in all copies of the
76 1.2 yamt * software, derivative works or modified versions, and any portions
77 1.2 yamt * thereof, and that both notices appear in supporting documentation.
78 1.2 yamt *
79 1.2 yamt * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
80 1.2 yamt * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
81 1.2 yamt * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
82 1.2 yamt *
83 1.2 yamt * Carnegie Mellon requests users of this software to return to
84 1.2 yamt *
85 1.2 yamt * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
86 1.2 yamt * School of Computer Science
87 1.2 yamt * Carnegie Mellon University
88 1.2 yamt * Pittsburgh PA 15213-3890
89 1.2 yamt *
90 1.2 yamt * any improvements or extensions that they make and grant Carnegie the
91 1.2 yamt * rights to redistribute these changes.
92 1.2 yamt */
93 1.2 yamt
94 1.2 yamt #if defined(PDSIM)
95 1.2 yamt
96 1.2 yamt #include "pdsim.h"
97 1.2 yamt
98 1.2 yamt #else /* defined(PDSIM) */
99 1.2 yamt
100 1.2 yamt #include <sys/cdefs.h>
101 1.33 ad __KERNEL_RCSID(0, "$NetBSD: uvm_pdpolicy_clock.c,v 1.33 2020/02/23 15:46:43 ad Exp $");
102 1.2 yamt
103 1.2 yamt #include <sys/param.h>
104 1.2 yamt #include <sys/proc.h>
105 1.2 yamt #include <sys/systm.h>
106 1.2 yamt #include <sys/kernel.h>
107 1.28 ad #include <sys/kmem.h>
108 1.29 mlelstv #include <sys/atomic.h>
109 1.2 yamt
110 1.2 yamt #include <uvm/uvm.h>
111 1.2 yamt #include <uvm/uvm_pdpolicy.h>
112 1.2 yamt #include <uvm/uvm_pdpolicy_impl.h>
113 1.18 ad #include <uvm/uvm_stat.h>
114 1.2 yamt
115 1.2 yamt #endif /* defined(PDSIM) */
116 1.2 yamt
117 1.28 ad /*
118 1.28 ad * per-CPU queue of pending page status changes. 128 entries makes for a
119 1.28 ad * 1kB queue on _LP64 and has been found to be a reasonable compromise that
120 1.28 ad * keeps lock contention events and wait times low, while not using too much
121 1.28 ad * memory nor allowing global state to fall too far behind.
122 1.28 ad */
123 1.28 ad #if !defined(CLOCK_PDQ_SIZE)
124 1.28 ad #define CLOCK_PDQ_SIZE 128
125 1.28 ad #endif /* !defined(CLOCK_PDQ_SIZE) */
126 1.28 ad
127 1.28 ad #define PQ_INACTIVE 0x00000010 /* page is in inactive list */
128 1.28 ad #define PQ_ACTIVE 0x00000020 /* page is in active list */
129 1.2 yamt
130 1.2 yamt #if !defined(CLOCK_INACTIVEPCT)
131 1.2 yamt #define CLOCK_INACTIVEPCT 33
132 1.2 yamt #endif /* !defined(CLOCK_INACTIVEPCT) */
133 1.2 yamt
134 1.2 yamt struct uvmpdpol_globalstate {
135 1.18 ad kmutex_t lock; /* lock on state */
136 1.18 ad /* <= compiler pads here */
137 1.18 ad struct pglist s_activeq /* allocated pages, in use */
138 1.18 ad __aligned(COHERENCY_UNIT);
139 1.2 yamt struct pglist s_inactiveq; /* pages between the clock hands */
140 1.2 yamt int s_active;
141 1.2 yamt int s_inactive;
142 1.2 yamt int s_inactarg;
143 1.2 yamt struct uvm_pctparam s_anonmin;
144 1.2 yamt struct uvm_pctparam s_filemin;
145 1.2 yamt struct uvm_pctparam s_execmin;
146 1.2 yamt struct uvm_pctparam s_anonmax;
147 1.2 yamt struct uvm_pctparam s_filemax;
148 1.2 yamt struct uvm_pctparam s_execmax;
149 1.2 yamt struct uvm_pctparam s_inactivepct;
150 1.2 yamt };
151 1.2 yamt
152 1.2 yamt struct uvmpdpol_scanstate {
153 1.7 thorpej bool ss_anonreact, ss_filereact, ss_execreact;
154 1.24 ad struct vm_page ss_marker;
155 1.2 yamt };
156 1.2 yamt
157 1.18 ad static void uvmpdpol_pageactivate_locked(struct vm_page *);
158 1.18 ad static void uvmpdpol_pagedeactivate_locked(struct vm_page *);
159 1.18 ad static void uvmpdpol_pagedequeue_locked(struct vm_page *);
160 1.28 ad static bool uvmpdpol_pagerealize_locked(struct vm_page *);
161 1.28 ad static struct uvm_cpu *uvmpdpol_flush(void);
162 1.18 ad
163 1.18 ad static struct uvmpdpol_globalstate pdpol_state __cacheline_aligned;
164 1.2 yamt static struct uvmpdpol_scanstate pdpol_scanstate;
165 1.2 yamt
166 1.2 yamt PDPOL_EVCNT_DEFINE(reactexec)
167 1.2 yamt PDPOL_EVCNT_DEFINE(reactfile)
168 1.2 yamt PDPOL_EVCNT_DEFINE(reactanon)
169 1.2 yamt
170 1.2 yamt static void
171 1.2 yamt clock_tune(void)
172 1.2 yamt {
173 1.2 yamt struct uvmpdpol_globalstate *s = &pdpol_state;
174 1.2 yamt
175 1.2 yamt s->s_inactarg = UVM_PCTPARAM_APPLY(&s->s_inactivepct,
176 1.2 yamt s->s_active + s->s_inactive);
177 1.2 yamt if (s->s_inactarg <= uvmexp.freetarg) {
178 1.2 yamt s->s_inactarg = uvmexp.freetarg + 1;
179 1.2 yamt }
180 1.2 yamt }
181 1.2 yamt
182 1.2 yamt void
183 1.2 yamt uvmpdpol_scaninit(void)
184 1.2 yamt {
185 1.2 yamt struct uvmpdpol_globalstate *s = &pdpol_state;
186 1.2 yamt struct uvmpdpol_scanstate *ss = &pdpol_scanstate;
187 1.2 yamt int t;
188 1.7 thorpej bool anonunder, fileunder, execunder;
189 1.7 thorpej bool anonover, fileover, execover;
190 1.7 thorpej bool anonreact, filereact, execreact;
191 1.20 ad int64_t freepg, anonpg, filepg, execpg;
192 1.2 yamt
193 1.2 yamt /*
194 1.2 yamt * decide which types of pages we want to reactivate instead of freeing
195 1.2 yamt * to keep usage within the minimum and maximum usage limits.
196 1.2 yamt */
197 1.2 yamt
198 1.20 ad cpu_count_sync_all();
199 1.27 ad freepg = uvm_availmem();
200 1.20 ad anonpg = cpu_count_get(CPU_COUNT_ANONPAGES);
201 1.20 ad filepg = cpu_count_get(CPU_COUNT_FILEPAGES);
202 1.20 ad execpg = cpu_count_get(CPU_COUNT_EXECPAGES);
203 1.20 ad
204 1.18 ad mutex_enter(&s->lock);
205 1.20 ad t = s->s_active + s->s_inactive + freepg;
206 1.20 ad anonunder = anonpg <= UVM_PCTPARAM_APPLY(&s->s_anonmin, t);
207 1.20 ad fileunder = filepg <= UVM_PCTPARAM_APPLY(&s->s_filemin, t);
208 1.20 ad execunder = execpg <= UVM_PCTPARAM_APPLY(&s->s_execmin, t);
209 1.20 ad anonover = anonpg > UVM_PCTPARAM_APPLY(&s->s_anonmax, t);
210 1.20 ad fileover = filepg > UVM_PCTPARAM_APPLY(&s->s_filemax, t);
211 1.20 ad execover = execpg > UVM_PCTPARAM_APPLY(&s->s_execmax, t);
212 1.2 yamt anonreact = anonunder || (!anonover && (fileover || execover));
213 1.2 yamt filereact = fileunder || (!fileover && (anonover || execover));
214 1.2 yamt execreact = execunder || (!execover && (anonover || fileover));
215 1.2 yamt if (filereact && execreact && (anonreact || uvm_swapisfull())) {
216 1.8 thorpej anonreact = filereact = execreact = false;
217 1.2 yamt }
218 1.2 yamt ss->ss_anonreact = anonreact;
219 1.2 yamt ss->ss_filereact = filereact;
220 1.2 yamt ss->ss_execreact = execreact;
221 1.24 ad memset(&ss->ss_marker, 0, sizeof(ss->ss_marker));
222 1.24 ad ss->ss_marker.flags = PG_MARKER;
223 1.24 ad TAILQ_INSERT_HEAD(&pdpol_state.s_inactiveq, &ss->ss_marker, pdqueue);
224 1.24 ad mutex_exit(&s->lock);
225 1.24 ad }
226 1.24 ad
227 1.24 ad void
228 1.24 ad uvmpdpol_scanfini(void)
229 1.24 ad {
230 1.24 ad struct uvmpdpol_globalstate *s = &pdpol_state;
231 1.24 ad struct uvmpdpol_scanstate *ss = &pdpol_scanstate;
232 1.2 yamt
233 1.24 ad mutex_enter(&s->lock);
234 1.24 ad TAILQ_REMOVE(&pdpol_state.s_inactiveq, &ss->ss_marker, pdqueue);
235 1.18 ad mutex_exit(&s->lock);
236 1.2 yamt }
237 1.2 yamt
238 1.2 yamt struct vm_page *
239 1.33 ad uvmpdpol_selectvictim(krwlock_t **plock)
240 1.2 yamt {
241 1.18 ad struct uvmpdpol_globalstate *s = &pdpol_state;
242 1.2 yamt struct uvmpdpol_scanstate *ss = &pdpol_scanstate;
243 1.2 yamt struct vm_page *pg;
244 1.33 ad krwlock_t *lock;
245 1.2 yamt
246 1.18 ad mutex_enter(&s->lock);
247 1.2 yamt while (/* CONSTCOND */ 1) {
248 1.2 yamt struct vm_anon *anon;
249 1.2 yamt struct uvm_object *uobj;
250 1.2 yamt
251 1.24 ad pg = TAILQ_NEXT(&ss->ss_marker, pdqueue);
252 1.2 yamt if (pg == NULL) {
253 1.2 yamt break;
254 1.2 yamt }
255 1.24 ad KASSERT((pg->flags & PG_MARKER) == 0);
256 1.2 yamt uvmexp.pdscans++;
257 1.2 yamt
258 1.2 yamt /*
259 1.18 ad * acquire interlock to stablize page identity.
260 1.18 ad * if we have caught the page in a state of flux
261 1.28 ad * deal with it and retry.
262 1.2 yamt */
263 1.18 ad mutex_enter(&pg->interlock);
264 1.28 ad if (uvmpdpol_pagerealize_locked(pg)) {
265 1.28 ad mutex_exit(&pg->interlock);
266 1.28 ad continue;
267 1.2 yamt }
268 1.2 yamt
269 1.2 yamt /*
270 1.24 ad * now prepare to move on to the next page.
271 1.24 ad */
272 1.24 ad TAILQ_REMOVE(&pdpol_state.s_inactiveq, &ss->ss_marker,
273 1.24 ad pdqueue);
274 1.24 ad TAILQ_INSERT_AFTER(&pdpol_state.s_inactiveq, pg,
275 1.24 ad &ss->ss_marker, pdqueue);
276 1.24 ad
277 1.24 ad /*
278 1.2 yamt * enforce the minimum thresholds on different
279 1.2 yamt * types of memory usage. if reusing the current
280 1.2 yamt * page would reduce that type of usage below its
281 1.2 yamt * minimum, reactivate the page instead and move
282 1.2 yamt * on to the next page.
283 1.2 yamt */
284 1.18 ad anon = pg->uanon;
285 1.18 ad uobj = pg->uobject;
286 1.2 yamt if (uobj && UVM_OBJ_IS_VTEXT(uobj) && ss->ss_execreact) {
287 1.28 ad uvmpdpol_pageactivate_locked(pg);
288 1.18 ad mutex_exit(&pg->interlock);
289 1.2 yamt PDPOL_EVCNT_INCR(reactexec);
290 1.2 yamt continue;
291 1.2 yamt }
292 1.2 yamt if (uobj && UVM_OBJ_IS_VNODE(uobj) &&
293 1.2 yamt !UVM_OBJ_IS_VTEXT(uobj) && ss->ss_filereact) {
294 1.28 ad uvmpdpol_pageactivate_locked(pg);
295 1.18 ad mutex_exit(&pg->interlock);
296 1.2 yamt PDPOL_EVCNT_INCR(reactfile);
297 1.2 yamt continue;
298 1.2 yamt }
299 1.2 yamt if ((anon || UVM_OBJ_IS_AOBJ(uobj)) && ss->ss_anonreact) {
300 1.28 ad uvmpdpol_pageactivate_locked(pg);
301 1.18 ad mutex_exit(&pg->interlock);
302 1.2 yamt PDPOL_EVCNT_INCR(reactanon);
303 1.2 yamt continue;
304 1.2 yamt }
305 1.2 yamt
306 1.18 ad /*
307 1.18 ad * try to lock the object that owns the page.
308 1.18 ad *
309 1.18 ad * with the page interlock held, we can drop s->lock, which
310 1.18 ad * could otherwise serve as a barrier to us getting the
311 1.18 ad * object locked, because the owner of the object's lock may
312 1.18 ad * be blocked on s->lock (i.e. a deadlock).
313 1.18 ad *
314 1.18 ad * whatever happens, uvmpd_trylockowner() will release the
315 1.18 ad * interlock. with the interlock dropped we can then
316 1.18 ad * re-acquire our own lock. the order is:
317 1.18 ad *
318 1.18 ad * object -> pdpol -> interlock.
319 1.18 ad */
320 1.18 ad mutex_exit(&s->lock);
321 1.18 ad lock = uvmpd_trylockowner(pg);
322 1.18 ad /* pg->interlock now released */
323 1.18 ad mutex_enter(&s->lock);
324 1.18 ad if (lock == NULL) {
325 1.18 ad /* didn't get it - try the next page. */
326 1.18 ad continue;
327 1.18 ad }
328 1.18 ad
329 1.18 ad /*
330 1.18 ad * move referenced pages back to active queue and skip to
331 1.18 ad * next page.
332 1.18 ad */
333 1.18 ad if (pmap_is_referenced(pg)) {
334 1.28 ad mutex_enter(&pg->interlock);
335 1.18 ad uvmpdpol_pageactivate_locked(pg);
336 1.28 ad mutex_exit(&pg->interlock);
337 1.18 ad uvmexp.pdreact++;
338 1.33 ad rw_exit(lock);
339 1.18 ad continue;
340 1.18 ad }
341 1.18 ad
342 1.18 ad /* we have a potential victim. */
343 1.18 ad *plock = lock;
344 1.2 yamt break;
345 1.2 yamt }
346 1.18 ad mutex_exit(&s->lock);
347 1.2 yamt return pg;
348 1.2 yamt }
349 1.2 yamt
350 1.2 yamt void
351 1.2 yamt uvmpdpol_balancequeue(int swap_shortage)
352 1.2 yamt {
353 1.18 ad struct uvmpdpol_globalstate *s = &pdpol_state;
354 1.2 yamt int inactive_shortage;
355 1.24 ad struct vm_page *p, marker;
356 1.33 ad krwlock_t *lock;
357 1.2 yamt
358 1.2 yamt /*
359 1.2 yamt * we have done the scan to get free pages. now we work on meeting
360 1.2 yamt * our inactive target.
361 1.2 yamt */
362 1.2 yamt
363 1.24 ad memset(&marker, 0, sizeof(marker));
364 1.24 ad marker.flags = PG_MARKER;
365 1.24 ad
366 1.18 ad mutex_enter(&s->lock);
367 1.24 ad TAILQ_INSERT_HEAD(&pdpol_state.s_activeq, &marker, pdqueue);
368 1.24 ad for (;;) {
369 1.24 ad inactive_shortage =
370 1.24 ad pdpol_state.s_inactarg - pdpol_state.s_inactive;
371 1.24 ad if (inactive_shortage <= 0 && swap_shortage <= 0) {
372 1.24 ad break;
373 1.2 yamt }
374 1.24 ad p = TAILQ_NEXT(&marker, pdqueue);
375 1.24 ad if (p == NULL) {
376 1.24 ad break;
377 1.14 rmind }
378 1.24 ad KASSERT((p->flags & PG_MARKER) == 0);
379 1.14 rmind
380 1.18 ad /*
381 1.18 ad * acquire interlock to stablize page identity.
382 1.18 ad * if we have caught the page in a state of flux
383 1.28 ad * deal with it and retry.
384 1.18 ad */
385 1.18 ad mutex_enter(&p->interlock);
386 1.28 ad if (uvmpdpol_pagerealize_locked(p)) {
387 1.28 ad mutex_exit(&p->interlock);
388 1.28 ad continue;
389 1.18 ad }
390 1.24 ad
391 1.24 ad /*
392 1.24 ad * now prepare to move on to the next page.
393 1.24 ad */
394 1.24 ad TAILQ_REMOVE(&pdpol_state.s_activeq, &marker, pdqueue);
395 1.24 ad TAILQ_INSERT_AFTER(&pdpol_state.s_activeq, p, &marker,
396 1.24 ad pdqueue);
397 1.24 ad
398 1.24 ad /*
399 1.24 ad * try to lock the object that owns the page. see comments
400 1.24 ad * in uvmpdol_selectvictim().
401 1.24 ad */
402 1.24 ad mutex_exit(&s->lock);
403 1.24 ad lock = uvmpd_trylockowner(p);
404 1.24 ad /* p->interlock now released */
405 1.24 ad mutex_enter(&s->lock);
406 1.24 ad if (lock == NULL) {
407 1.24 ad /* didn't get it - try the next page. */
408 1.24 ad continue;
409 1.24 ad }
410 1.24 ad
411 1.24 ad /*
412 1.24 ad * if there's a shortage of swap slots, try to free it.
413 1.24 ad */
414 1.24 ad if (swap_shortage > 0 && (p->flags & PG_SWAPBACKED) != 0 &&
415 1.24 ad (p->flags & PG_BUSY) == 0) {
416 1.24 ad if (uvmpd_dropswap(p)) {
417 1.24 ad swap_shortage--;
418 1.24 ad }
419 1.24 ad }
420 1.24 ad
421 1.24 ad /*
422 1.24 ad * if there's a shortage of inactive pages, deactivate.
423 1.24 ad */
424 1.24 ad if (inactive_shortage > 0) {
425 1.28 ad pmap_clear_reference(p);
426 1.28 ad mutex_enter(&p->interlock);
427 1.18 ad uvmpdpol_pagedeactivate_locked(p);
428 1.28 ad mutex_exit(&p->interlock);
429 1.2 yamt uvmexp.pddeact++;
430 1.2 yamt inactive_shortage--;
431 1.2 yamt }
432 1.33 ad rw_exit(lock);
433 1.2 yamt }
434 1.24 ad TAILQ_REMOVE(&pdpol_state.s_activeq, &marker, pdqueue);
435 1.18 ad mutex_exit(&s->lock);
436 1.2 yamt }
437 1.2 yamt
438 1.18 ad static void
439 1.18 ad uvmpdpol_pagedeactivate_locked(struct vm_page *pg)
440 1.2 yamt {
441 1.28 ad struct uvmpdpol_globalstate *s __diagused = &pdpol_state;
442 1.2 yamt
443 1.28 ad KASSERT(mutex_owned(&s->lock));
444 1.28 ad KASSERT(mutex_owned(&pg->interlock));
445 1.28 ad KASSERT((pg->pqflags & (PQ_INTENT_MASK | PQ_INTENT_SET)) !=
446 1.28 ad (PQ_INTENT_D | PQ_INTENT_SET));
447 1.14 rmind
448 1.2 yamt if (pg->pqflags & PQ_ACTIVE) {
449 1.23 ad TAILQ_REMOVE(&pdpol_state.s_activeq, pg, pdqueue);
450 1.2 yamt KASSERT(pdpol_state.s_active > 0);
451 1.2 yamt pdpol_state.s_active--;
452 1.2 yamt }
453 1.2 yamt if ((pg->pqflags & PQ_INACTIVE) == 0) {
454 1.2 yamt KASSERT(pg->wire_count == 0);
455 1.23 ad TAILQ_INSERT_TAIL(&pdpol_state.s_inactiveq, pg, pdqueue);
456 1.2 yamt pdpol_state.s_inactive++;
457 1.2 yamt }
458 1.28 ad pg->pqflags = (pg->pqflags & PQ_INTENT_QUEUED) | PQ_INACTIVE;
459 1.2 yamt }
460 1.2 yamt
461 1.2 yamt void
462 1.18 ad uvmpdpol_pagedeactivate(struct vm_page *pg)
463 1.18 ad {
464 1.18 ad
465 1.33 ad KASSERT(uvm_page_owner_locked_p(pg, true));
466 1.28 ad KASSERT(mutex_owned(&pg->interlock));
467 1.28 ad
468 1.28 ad /*
469 1.28 ad * we have to clear the reference bit now, as when it comes time to
470 1.28 ad * realize the intent we won't have the object locked any more.
471 1.28 ad */
472 1.28 ad pmap_clear_reference(pg);
473 1.28 ad uvmpdpol_set_intent(pg, PQ_INTENT_I);
474 1.18 ad }
475 1.18 ad
476 1.18 ad static void
477 1.18 ad uvmpdpol_pageactivate_locked(struct vm_page *pg)
478 1.2 yamt {
479 1.28 ad struct uvmpdpol_globalstate *s __diagused = &pdpol_state;
480 1.28 ad
481 1.28 ad KASSERT(mutex_owned(&s->lock));
482 1.28 ad KASSERT(mutex_owned(&pg->interlock));
483 1.28 ad KASSERT((pg->pqflags & (PQ_INTENT_MASK | PQ_INTENT_SET)) !=
484 1.28 ad (PQ_INTENT_D | PQ_INTENT_SET));
485 1.2 yamt
486 1.18 ad uvmpdpol_pagedequeue_locked(pg);
487 1.23 ad TAILQ_INSERT_TAIL(&pdpol_state.s_activeq, pg, pdqueue);
488 1.2 yamt pdpol_state.s_active++;
489 1.31 ad pg->pqflags = (pg->pqflags & PQ_INTENT_QUEUED) | PQ_ACTIVE;
490 1.2 yamt }
491 1.2 yamt
492 1.2 yamt void
493 1.18 ad uvmpdpol_pageactivate(struct vm_page *pg)
494 1.18 ad {
495 1.28 ad
496 1.33 ad KASSERT(uvm_page_owner_locked_p(pg, true));
497 1.28 ad KASSERT(mutex_owned(&pg->interlock));
498 1.18 ad
499 1.31 ad uvmpdpol_set_intent(pg, PQ_INTENT_A);
500 1.18 ad }
501 1.18 ad
502 1.18 ad static void
503 1.18 ad uvmpdpol_pagedequeue_locked(struct vm_page *pg)
504 1.2 yamt {
505 1.28 ad struct uvmpdpol_globalstate *s __diagused = &pdpol_state;
506 1.28 ad
507 1.28 ad KASSERT(mutex_owned(&s->lock));
508 1.28 ad KASSERT(mutex_owned(&pg->interlock));
509 1.2 yamt
510 1.2 yamt if (pg->pqflags & PQ_ACTIVE) {
511 1.23 ad TAILQ_REMOVE(&pdpol_state.s_activeq, pg, pdqueue);
512 1.28 ad KASSERT((pg->pqflags & PQ_INACTIVE) == 0);
513 1.2 yamt KASSERT(pdpol_state.s_active > 0);
514 1.2 yamt pdpol_state.s_active--;
515 1.2 yamt } else if (pg->pqflags & PQ_INACTIVE) {
516 1.23 ad TAILQ_REMOVE(&pdpol_state.s_inactiveq, pg, pdqueue);
517 1.2 yamt KASSERT(pdpol_state.s_inactive > 0);
518 1.2 yamt pdpol_state.s_inactive--;
519 1.2 yamt }
520 1.28 ad pg->pqflags &= PQ_INTENT_QUEUED;
521 1.2 yamt }
522 1.2 yamt
523 1.2 yamt void
524 1.18 ad uvmpdpol_pagedequeue(struct vm_page *pg)
525 1.18 ad {
526 1.18 ad
527 1.33 ad KASSERT(uvm_page_owner_locked_p(pg, true));
528 1.28 ad KASSERT(mutex_owned(&pg->interlock));
529 1.28 ad
530 1.28 ad uvmpdpol_set_intent(pg, PQ_INTENT_D);
531 1.18 ad }
532 1.18 ad
533 1.18 ad void
534 1.2 yamt uvmpdpol_pageenqueue(struct vm_page *pg)
535 1.2 yamt {
536 1.2 yamt
537 1.33 ad KASSERT(uvm_page_owner_locked_p(pg, true));
538 1.28 ad KASSERT(mutex_owned(&pg->interlock));
539 1.28 ad
540 1.28 ad uvmpdpol_set_intent(pg, PQ_INTENT_E);
541 1.2 yamt }
542 1.2 yamt
543 1.2 yamt void
544 1.5 yamt uvmpdpol_anfree(struct vm_anon *an)
545 1.2 yamt {
546 1.2 yamt }
547 1.2 yamt
548 1.7 thorpej bool
549 1.2 yamt uvmpdpol_pageisqueued_p(struct vm_page *pg)
550 1.2 yamt {
551 1.28 ad uint32_t pqflags;
552 1.2 yamt
553 1.28 ad /*
554 1.28 ad * if there's an intent set, we have to consider it. otherwise,
555 1.28 ad * return the actual state. we may be called unlocked for the
556 1.28 ad * purpose of assertions, which is safe due to the page lifecycle.
557 1.28 ad */
558 1.28 ad pqflags = atomic_load_relaxed(&pg->pqflags);
559 1.28 ad if ((pqflags & PQ_INTENT_SET) != 0) {
560 1.28 ad return (pqflags & PQ_INTENT_MASK) != PQ_INTENT_D;
561 1.28 ad } else {
562 1.28 ad return (pqflags & (PQ_ACTIVE | PQ_INACTIVE)) != 0;
563 1.28 ad }
564 1.2 yamt }
565 1.2 yamt
566 1.2 yamt void
567 1.2 yamt uvmpdpol_estimatepageable(int *active, int *inactive)
568 1.2 yamt {
569 1.18 ad struct uvmpdpol_globalstate *s = &pdpol_state;
570 1.2 yamt
571 1.32 ad /*
572 1.32 ad * Don't take any locks here. This can be called from DDB, and in
573 1.32 ad * any case the numbers are stale the instant the lock is dropped,
574 1.32 ad * so it just doesn't matter.
575 1.32 ad */
576 1.2 yamt if (active) {
577 1.32 ad *active = s->s_active;
578 1.2 yamt }
579 1.2 yamt if (inactive) {
580 1.32 ad *inactive = s->s_inactive;
581 1.2 yamt }
582 1.2 yamt }
583 1.2 yamt
584 1.2 yamt #if !defined(PDSIM)
585 1.2 yamt static int
586 1.2 yamt min_check(struct uvm_pctparam *pct, int t)
587 1.2 yamt {
588 1.2 yamt struct uvmpdpol_globalstate *s = &pdpol_state;
589 1.2 yamt int total = t;
590 1.2 yamt
591 1.2 yamt if (pct != &s->s_anonmin) {
592 1.2 yamt total += uvm_pctparam_get(&s->s_anonmin);
593 1.2 yamt }
594 1.2 yamt if (pct != &s->s_filemin) {
595 1.2 yamt total += uvm_pctparam_get(&s->s_filemin);
596 1.2 yamt }
597 1.2 yamt if (pct != &s->s_execmin) {
598 1.2 yamt total += uvm_pctparam_get(&s->s_execmin);
599 1.2 yamt }
600 1.2 yamt if (total > 95) {
601 1.2 yamt return EINVAL;
602 1.2 yamt }
603 1.2 yamt return 0;
604 1.2 yamt }
605 1.2 yamt #endif /* !defined(PDSIM) */
606 1.2 yamt
607 1.2 yamt void
608 1.2 yamt uvmpdpol_init(void)
609 1.2 yamt {
610 1.2 yamt struct uvmpdpol_globalstate *s = &pdpol_state;
611 1.2 yamt
612 1.18 ad mutex_init(&s->lock, MUTEX_DEFAULT, IPL_NONE);
613 1.2 yamt TAILQ_INIT(&s->s_activeq);
614 1.2 yamt TAILQ_INIT(&s->s_inactiveq);
615 1.2 yamt uvm_pctparam_init(&s->s_inactivepct, CLOCK_INACTIVEPCT, NULL);
616 1.2 yamt uvm_pctparam_init(&s->s_anonmin, 10, min_check);
617 1.2 yamt uvm_pctparam_init(&s->s_filemin, 10, min_check);
618 1.2 yamt uvm_pctparam_init(&s->s_execmin, 5, min_check);
619 1.2 yamt uvm_pctparam_init(&s->s_anonmax, 80, NULL);
620 1.2 yamt uvm_pctparam_init(&s->s_filemax, 50, NULL);
621 1.2 yamt uvm_pctparam_init(&s->s_execmax, 30, NULL);
622 1.2 yamt }
623 1.2 yamt
624 1.2 yamt void
625 1.28 ad uvmpdpol_init_cpu(struct uvm_cpu *ucpu)
626 1.28 ad {
627 1.28 ad
628 1.28 ad ucpu->pdq =
629 1.28 ad kmem_alloc(CLOCK_PDQ_SIZE * sizeof(struct vm_page *), KM_SLEEP);
630 1.28 ad ucpu->pdqhead = CLOCK_PDQ_SIZE;
631 1.28 ad ucpu->pdqtail = CLOCK_PDQ_SIZE;
632 1.28 ad }
633 1.28 ad
634 1.28 ad void
635 1.2 yamt uvmpdpol_reinit(void)
636 1.2 yamt {
637 1.2 yamt }
638 1.2 yamt
639 1.7 thorpej bool
640 1.2 yamt uvmpdpol_needsscan_p(void)
641 1.2 yamt {
642 1.2 yamt
643 1.28 ad /*
644 1.28 ad * this must be an unlocked check: can be called from interrupt.
645 1.28 ad */
646 1.17 para return pdpol_state.s_inactive < pdpol_state.s_inactarg;
647 1.2 yamt }
648 1.2 yamt
649 1.2 yamt void
650 1.2 yamt uvmpdpol_tune(void)
651 1.2 yamt {
652 1.18 ad struct uvmpdpol_globalstate *s = &pdpol_state;
653 1.2 yamt
654 1.18 ad mutex_enter(&s->lock);
655 1.2 yamt clock_tune();
656 1.18 ad mutex_exit(&s->lock);
657 1.2 yamt }
658 1.2 yamt
659 1.28 ad /*
660 1.30 ad * uvmpdpol_pagerealize_locked: take the intended state set on a page and
661 1.30 ad * make it real. return true if any work was done.
662 1.28 ad */
663 1.28 ad static bool
664 1.28 ad uvmpdpol_pagerealize_locked(struct vm_page *pg)
665 1.28 ad {
666 1.28 ad struct uvmpdpol_globalstate *s __diagused = &pdpol_state;
667 1.28 ad
668 1.28 ad KASSERT(mutex_owned(&s->lock));
669 1.28 ad KASSERT(mutex_owned(&pg->interlock));
670 1.28 ad
671 1.28 ad switch (pg->pqflags & (PQ_INTENT_MASK | PQ_INTENT_SET)) {
672 1.28 ad case PQ_INTENT_A | PQ_INTENT_SET:
673 1.28 ad case PQ_INTENT_E | PQ_INTENT_SET:
674 1.28 ad uvmpdpol_pageactivate_locked(pg);
675 1.28 ad return true;
676 1.28 ad case PQ_INTENT_I | PQ_INTENT_SET:
677 1.28 ad uvmpdpol_pagedeactivate_locked(pg);
678 1.28 ad return true;
679 1.28 ad case PQ_INTENT_D | PQ_INTENT_SET:
680 1.28 ad uvmpdpol_pagedequeue_locked(pg);
681 1.28 ad return true;
682 1.28 ad default:
683 1.28 ad return false;
684 1.28 ad }
685 1.28 ad }
686 1.28 ad
687 1.28 ad /*
688 1.28 ad * uvmpdpol_flush: return the current uvm_cpu with all of its pending
689 1.28 ad * updates flushed to the global queues. this routine may block, and
690 1.28 ad * so can switch cpu. the idea is to empty to queue on whatever cpu
691 1.28 ad * we finally end up on.
692 1.28 ad */
693 1.28 ad static struct uvm_cpu *
694 1.28 ad uvmpdpol_flush(void)
695 1.28 ad {
696 1.28 ad struct uvmpdpol_globalstate *s __diagused = &pdpol_state;
697 1.28 ad struct uvm_cpu *ucpu;
698 1.28 ad struct vm_page *pg;
699 1.28 ad
700 1.28 ad KASSERT(kpreempt_disabled());
701 1.28 ad
702 1.28 ad mutex_enter(&s->lock);
703 1.28 ad for (;;) {
704 1.28 ad /*
705 1.28 ad * prefer scanning forwards (even though mutex_enter() is
706 1.28 ad * serializing) so as to not defeat any prefetch logic in
707 1.28 ad * the CPU. that means elsewhere enqueuing backwards, like
708 1.28 ad * a stack, but not so important there as pages are being
709 1.28 ad * added singularly.
710 1.28 ad *
711 1.28 ad * prefetch the next "struct vm_page" while working on the
712 1.28 ad * current one. this has a measurable and very positive
713 1.28 ad * effect in reducing the amount of time spent here under
714 1.28 ad * the global lock.
715 1.28 ad */
716 1.28 ad ucpu = curcpu()->ci_data.cpu_uvm;
717 1.28 ad KASSERT(ucpu->pdqhead <= ucpu->pdqtail);
718 1.28 ad if (__predict_false(ucpu->pdqhead == ucpu->pdqtail)) {
719 1.28 ad break;
720 1.28 ad }
721 1.28 ad pg = ucpu->pdq[ucpu->pdqhead++];
722 1.28 ad if (__predict_true(ucpu->pdqhead != ucpu->pdqtail)) {
723 1.28 ad __builtin_prefetch(ucpu->pdq[ucpu->pdqhead]);
724 1.28 ad }
725 1.28 ad mutex_enter(&pg->interlock);
726 1.28 ad pg->pqflags &= ~PQ_INTENT_QUEUED;
727 1.28 ad (void)uvmpdpol_pagerealize_locked(pg);
728 1.28 ad mutex_exit(&pg->interlock);
729 1.28 ad }
730 1.28 ad mutex_exit(&s->lock);
731 1.28 ad return ucpu;
732 1.28 ad }
733 1.28 ad
734 1.28 ad /*
735 1.28 ad * uvmpdpol_pagerealize: realize any intent set on the page. in this
736 1.28 ad * implementation, that means putting the page on a per-CPU queue to be
737 1.28 ad * dealt with later.
738 1.28 ad */
739 1.28 ad void
740 1.28 ad uvmpdpol_pagerealize(struct vm_page *pg)
741 1.28 ad {
742 1.28 ad struct uvm_cpu *ucpu;
743 1.28 ad
744 1.28 ad /*
745 1.28 ad * drain the per per-CPU queue if full, then enter the page.
746 1.28 ad */
747 1.28 ad kpreempt_disable();
748 1.28 ad ucpu = curcpu()->ci_data.cpu_uvm;
749 1.28 ad if (__predict_false(ucpu->pdqhead == 0)) {
750 1.28 ad ucpu = uvmpdpol_flush();
751 1.28 ad }
752 1.28 ad ucpu->pdq[--(ucpu->pdqhead)] = pg;
753 1.28 ad kpreempt_enable();
754 1.28 ad }
755 1.28 ad
756 1.28 ad /*
757 1.28 ad * uvmpdpol_idle: called from the system idle loop. periodically purge any
758 1.28 ad * pending updates back to the global queues.
759 1.28 ad */
760 1.28 ad void
761 1.28 ad uvmpdpol_idle(struct uvm_cpu *ucpu)
762 1.28 ad {
763 1.28 ad struct uvmpdpol_globalstate *s = &pdpol_state;
764 1.28 ad struct vm_page *pg;
765 1.28 ad
766 1.28 ad KASSERT(kpreempt_disabled());
767 1.28 ad
768 1.28 ad /*
769 1.28 ad * if no pages in the queue, we have nothing to do.
770 1.28 ad */
771 1.28 ad if (ucpu->pdqhead == ucpu->pdqtail) {
772 1.28 ad ucpu->pdqtime = hardclock_ticks;
773 1.28 ad return;
774 1.28 ad }
775 1.28 ad
776 1.28 ad /*
777 1.28 ad * don't do this more than ~8 times a second as it would needlessly
778 1.28 ad * exert pressure.
779 1.28 ad */
780 1.28 ad if (hardclock_ticks - ucpu->pdqtime < (hz >> 3)) {
781 1.28 ad return;
782 1.28 ad }
783 1.28 ad
784 1.28 ad /*
785 1.28 ad * the idle LWP can't block, so we have to try for the lock. if we
786 1.28 ad * get it, purge the per-CPU pending update queue. continually
787 1.28 ad * check for a pending resched: in that case exit immediately.
788 1.28 ad */
789 1.28 ad if (mutex_tryenter(&s->lock)) {
790 1.28 ad while (ucpu->pdqhead != ucpu->pdqtail) {
791 1.28 ad pg = ucpu->pdq[ucpu->pdqhead];
792 1.28 ad if (!mutex_tryenter(&pg->interlock)) {
793 1.28 ad break;
794 1.28 ad }
795 1.28 ad ucpu->pdqhead++;
796 1.28 ad pg->pqflags &= ~PQ_INTENT_QUEUED;
797 1.28 ad (void)uvmpdpol_pagerealize_locked(pg);
798 1.28 ad mutex_exit(&pg->interlock);
799 1.28 ad if (curcpu()->ci_want_resched) {
800 1.28 ad break;
801 1.28 ad }
802 1.28 ad }
803 1.28 ad if (ucpu->pdqhead == ucpu->pdqtail) {
804 1.28 ad ucpu->pdqtime = hardclock_ticks;
805 1.28 ad }
806 1.28 ad mutex_exit(&s->lock);
807 1.28 ad }
808 1.28 ad }
809 1.28 ad
810 1.2 yamt #if !defined(PDSIM)
811 1.2 yamt
812 1.2 yamt #include <sys/sysctl.h> /* XXX SYSCTL_DESCR */
813 1.2 yamt
814 1.2 yamt void
815 1.2 yamt uvmpdpol_sysctlsetup(void)
816 1.2 yamt {
817 1.2 yamt struct uvmpdpol_globalstate *s = &pdpol_state;
818 1.2 yamt
819 1.2 yamt uvm_pctparam_createsysctlnode(&s->s_anonmin, "anonmin",
820 1.2 yamt SYSCTL_DESCR("Percentage of physical memory reserved "
821 1.2 yamt "for anonymous application data"));
822 1.2 yamt uvm_pctparam_createsysctlnode(&s->s_filemin, "filemin",
823 1.2 yamt SYSCTL_DESCR("Percentage of physical memory reserved "
824 1.11 martin "for cached file data"));
825 1.2 yamt uvm_pctparam_createsysctlnode(&s->s_execmin, "execmin",
826 1.2 yamt SYSCTL_DESCR("Percentage of physical memory reserved "
827 1.11 martin "for cached executable data"));
828 1.2 yamt
829 1.2 yamt uvm_pctparam_createsysctlnode(&s->s_anonmax, "anonmax",
830 1.2 yamt SYSCTL_DESCR("Percentage of physical memory which will "
831 1.2 yamt "be reclaimed from other usage for "
832 1.2 yamt "anonymous application data"));
833 1.2 yamt uvm_pctparam_createsysctlnode(&s->s_filemax, "filemax",
834 1.2 yamt SYSCTL_DESCR("Percentage of physical memory which will "
835 1.2 yamt "be reclaimed from other usage for cached "
836 1.2 yamt "file data"));
837 1.2 yamt uvm_pctparam_createsysctlnode(&s->s_execmax, "execmax",
838 1.2 yamt SYSCTL_DESCR("Percentage of physical memory which will "
839 1.2 yamt "be reclaimed from other usage for cached "
840 1.2 yamt "executable data"));
841 1.2 yamt
842 1.2 yamt uvm_pctparam_createsysctlnode(&s->s_inactivepct, "inactivepct",
843 1.2 yamt SYSCTL_DESCR("Percentage of inactive queue of "
844 1.2 yamt "the entire (active + inactive) queue"));
845 1.2 yamt }
846 1.2 yamt
847 1.2 yamt #endif /* !defined(PDSIM) */
848 1.2 yamt
849 1.2 yamt #if defined(PDSIM)
850 1.2 yamt void
851 1.2 yamt pdsim_dump(const char *id)
852 1.2 yamt {
853 1.2 yamt #if defined(DEBUG)
854 1.2 yamt /* XXX */
855 1.2 yamt #endif /* defined(DEBUG) */
856 1.2 yamt }
857 1.2 yamt #endif /* defined(PDSIM) */
858