uvm_pglist.c revision 1.84 1 /* $NetBSD: uvm_pglist.c,v 1.84 2020/06/11 22:25:51 ad Exp $ */
2
3 /*-
4 * Copyright (c) 1997, 2019 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
9 * NASA Ames Research Center, and by Andrew Doran.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 * POSSIBILITY OF SUCH DAMAGE.
31 */
32
33 /*
34 * uvm_pglist.c: pglist functions
35 */
36
37 #include <sys/cdefs.h>
38 __KERNEL_RCSID(0, "$NetBSD: uvm_pglist.c,v 1.84 2020/06/11 22:25:51 ad Exp $");
39
40 #include <sys/param.h>
41 #include <sys/systm.h>
42 #include <sys/cpu.h>
43
44 #include <uvm/uvm.h>
45 #include <uvm/uvm_pdpolicy.h>
46 #include <uvm/uvm_pgflcache.h>
47
48 #ifdef VM_PAGE_ALLOC_MEMORY_STATS
49 #define STAT_INCR(v) (v)++
50 #define STAT_DECR(v) do { \
51 if ((v) == 0) \
52 printf("%s:%d -- Already 0!\n", __FILE__, __LINE__); \
53 else \
54 (v)--; \
55 } while (/*CONSTCOND*/ 0)
56 u_long uvm_pglistalloc_npages;
57 #else
58 #define STAT_INCR(v)
59 #define STAT_DECR(v)
60 #endif
61
62 /*
63 * uvm_pglistalloc: allocate a list of pages
64 *
65 * => allocated pages are placed onto an rlist. rlist is
66 * initialized by uvm_pglistalloc.
67 * => returns 0 on success or errno on failure
68 * => implementation allocates a single segment if any constraints are
69 * imposed by call arguments.
70 * => doesn't take into account clean non-busy pages on inactive list
71 * that could be used(?)
72 * => params:
73 * size the size of the allocation, rounded to page size.
74 * low the low address of the allowed allocation range.
75 * high the high address of the allowed allocation range.
76 * alignment memory must be aligned to this power-of-two boundary.
77 * boundary no segment in the allocation may cross this
78 * power-of-two boundary (relative to zero).
79 */
80
81 static void
82 uvm_pglist_add(struct vm_page *pg, struct pglist *rlist)
83 {
84 struct pgfreelist *pgfl;
85 struct pgflbucket *pgb;
86
87 pgfl = &uvm.page_free[uvm_page_get_freelist(pg)];
88 pgb = pgfl->pgfl_buckets[uvm_page_get_bucket(pg)];
89
90 #ifdef UVMDEBUG
91 struct vm_page *tp;
92 LIST_FOREACH(tp, &pgb->pgb_colors[VM_PGCOLOR(pg)], pageq.list) {
93 if (tp == pg)
94 break;
95 }
96 if (tp == NULL)
97 panic("uvm_pglistalloc: page not on freelist");
98 #endif
99 LIST_REMOVE(pg, pageq.list);
100 pgb->pgb_nfree--;
101 CPU_COUNT(CPU_COUNT_FREEPAGES, -1);
102 if (pg->flags & PG_ZERO)
103 CPU_COUNT(CPU_COUNT_ZEROPAGES, -1);
104 pg->flags = PG_CLEAN;
105 pg->uobject = NULL;
106 pg->uanon = NULL;
107 TAILQ_INSERT_TAIL(rlist, pg, pageq.queue);
108 STAT_INCR(uvm_pglistalloc_npages);
109 }
110
111 static int
112 uvm_pglistalloc_c_ps(uvm_physseg_t psi, int num, paddr_t low, paddr_t high,
113 paddr_t alignment, paddr_t boundary, struct pglist *rlist)
114 {
115 signed int candidate, limit, candidateidx, end, idx, skip;
116 int pagemask;
117 bool second_pass;
118 #ifdef DEBUG
119 paddr_t idxpa, lastidxpa;
120 paddr_t cidx = 0; /* XXX: GCC */
121 #endif
122 #ifdef PGALLOC_VERBOSE
123 printf("pgalloc: contig %d pgs from psi %d\n", num, psi);
124 #endif
125
126 low = atop(low);
127 high = atop(high);
128 alignment = atop(alignment);
129
130 /*
131 * Make sure that physseg falls within with range to be allocated from.
132 */
133 if (high <= uvm_physseg_get_avail_start(psi) || low >= uvm_physseg_get_avail_end(psi))
134 return 0;
135
136 /*
137 * We start our search at the just after where the last allocation
138 * succeeded.
139 */
140 candidate = roundup2(uimax(low, uvm_physseg_get_avail_start(psi) +
141 uvm_physseg_get_start_hint(psi)), alignment);
142 limit = uimin(high, uvm_physseg_get_avail_end(psi));
143 pagemask = ~((boundary >> PAGE_SHIFT) - 1);
144 skip = 0;
145 second_pass = false;
146
147 for (;;) {
148 bool ok = true;
149 signed int cnt;
150
151 if (candidate + num > limit) {
152 if (uvm_physseg_get_start_hint(psi) == 0 || second_pass) {
153 /*
154 * We've run past the allowable range.
155 */
156 return 0; /* FAIL = 0 pages*/
157 }
158 /*
159 * We've wrapped around the end of this segment
160 * so restart at the beginning but now our limit
161 * is were we started.
162 */
163 second_pass = true;
164 candidate = roundup2(uimax(low, uvm_physseg_get_avail_start(psi)), alignment);
165 limit = uimin(limit, uvm_physseg_get_avail_start(psi) +
166 uvm_physseg_get_start_hint(psi));
167 skip = 0;
168 continue;
169 }
170 if (boundary != 0 &&
171 ((candidate ^ (candidate + num - 1)) & pagemask) != 0) {
172 /*
173 * Region crosses boundary. Jump to the boundary
174 * just crossed and ensure alignment.
175 */
176 candidate = (candidate + num - 1) & pagemask;
177 candidate = roundup2(candidate, alignment);
178 skip = 0;
179 continue;
180 }
181 #ifdef DEBUG
182 /*
183 * Make sure this is a managed physical page.
184 */
185
186 if (uvm_physseg_find(candidate, &cidx) != psi)
187 panic("pgalloc contig: botch1");
188 if (cidx != candidate - uvm_physseg_get_start(psi))
189 panic("pgalloc contig: botch2");
190 if (uvm_physseg_find(candidate + num - 1, &cidx) != psi)
191 panic("pgalloc contig: botch3");
192 if (cidx != candidate - uvm_physseg_get_start(psi) + num - 1)
193 panic("pgalloc contig: botch4");
194 #endif
195 candidateidx = candidate - uvm_physseg_get_start(psi);
196 end = candidateidx + num;
197
198 /*
199 * Found a suitable starting page. See if the range is free.
200 */
201 #ifdef PGALLOC_VERBOSE
202 printf("%s: psi=%d candidate=%#x end=%#x skip=%#x, align=%#"PRIxPADDR,
203 __func__, psi, candidateidx, end, skip, alignment);
204 #endif
205 /*
206 * We start at the end and work backwards since if we find a
207 * non-free page, it makes no sense to continue.
208 *
209 * But on the plus size we have "vetted" some number of free
210 * pages. If this iteration fails, we may be able to skip
211 * testing most of those pages again in the next pass.
212 */
213 for (idx = end - 1; idx >= candidateidx + skip; idx--) {
214 if (VM_PAGE_IS_FREE(uvm_physseg_get_pg(psi, idx)) == 0) {
215 ok = false;
216 break;
217 }
218
219 #ifdef DEBUG
220 if (idx > candidateidx) {
221 idxpa = VM_PAGE_TO_PHYS(uvm_physseg_get_pg(psi, idx));
222 lastidxpa = VM_PAGE_TO_PHYS(uvm_physseg_get_pg(psi, idx - 1));
223 if ((lastidxpa + PAGE_SIZE) != idxpa) {
224 /*
225 * Region not contiguous.
226 */
227 panic("pgalloc contig: botch5");
228 }
229 if (boundary != 0 &&
230 ((lastidxpa ^ idxpa) & ~(boundary - 1))
231 != 0) {
232 /*
233 * Region crosses boundary.
234 */
235 panic("pgalloc contig: botch6");
236 }
237 }
238 #endif
239 }
240
241 if (ok) {
242 while (skip-- > 0) {
243 KDASSERT(VM_PAGE_IS_FREE(uvm_physseg_get_pg(psi, candidateidx + skip)));
244 }
245 #ifdef PGALLOC_VERBOSE
246 printf(": ok\n");
247 #endif
248 break;
249 }
250
251 #ifdef PGALLOC_VERBOSE
252 printf(": non-free at %#x\n", idx - candidateidx);
253 #endif
254 /*
255 * count the number of pages we can advance
256 * since we know they aren't all free.
257 */
258 cnt = idx + 1 - candidateidx;
259 /*
260 * now round up that to the needed alignment.
261 */
262 cnt = roundup2(cnt, alignment);
263 /*
264 * The number of pages we can skip checking
265 * (might be 0 if cnt > num).
266 */
267 skip = uimax(num - cnt, 0);
268 candidate += cnt;
269 }
270
271 /*
272 * we have a chunk of memory that conforms to the requested constraints.
273 */
274 for (idx = candidateidx; idx < end; idx++)
275 uvm_pglist_add(uvm_physseg_get_pg(psi, idx), rlist);
276
277 /*
278 * the next time we need to search this segment, start after this
279 * chunk of pages we just allocated.
280 */
281 uvm_physseg_set_start_hint(psi, candidate + num -
282 uvm_physseg_get_avail_start(psi));
283 KASSERTMSG(uvm_physseg_get_start_hint(psi) <=
284 uvm_physseg_get_avail_end(psi) - uvm_physseg_get_avail_start(psi),
285 "%x %u (%#x) <= %#"PRIxPADDR" - %#"PRIxPADDR" (%#"PRIxPADDR")",
286 candidate + num,
287 uvm_physseg_get_start_hint(psi), uvm_physseg_get_start_hint(psi),
288 uvm_physseg_get_avail_end(psi), uvm_physseg_get_avail_start(psi),
289 uvm_physseg_get_avail_end(psi) - uvm_physseg_get_avail_start(psi));
290
291 #ifdef PGALLOC_VERBOSE
292 printf("got %d pgs\n", num);
293 #endif
294 return num; /* number of pages allocated */
295 }
296
297 static int
298 uvm_pglistalloc_contig(int num, paddr_t low, paddr_t high, paddr_t alignment,
299 paddr_t boundary, struct pglist *rlist)
300 {
301 int fl;
302 int error;
303
304 uvm_physseg_t psi;
305 /* Default to "lose". */
306 error = ENOMEM;
307
308 /*
309 * Block all memory allocation and lock the free list.
310 */
311 uvm_pgfl_lock();
312
313 /* Are there even any free pages? */
314 if (uvm_availmem(false) <=
315 (uvmexp.reserve_pagedaemon + uvmexp.reserve_kernel))
316 goto out;
317
318 for (fl = 0; fl < VM_NFREELIST; fl++) {
319 #if (VM_PHYSSEG_STRAT == VM_PSTRAT_BIGFIRST)
320 for (psi = uvm_physseg_get_last(); uvm_physseg_valid_p(psi); psi = uvm_physseg_get_prev(psi))
321 #else
322 for (psi = uvm_physseg_get_first(); uvm_physseg_valid_p(psi); psi = uvm_physseg_get_next(psi))
323 #endif
324 {
325 if (uvm_physseg_get_free_list(psi) != fl)
326 continue;
327
328 num -= uvm_pglistalloc_c_ps(psi, num, low, high,
329 alignment, boundary, rlist);
330 if (num == 0) {
331 #ifdef PGALLOC_VERBOSE
332 printf("pgalloc: %"PRIxMAX"-%"PRIxMAX"\n",
333 (uintmax_t) VM_PAGE_TO_PHYS(TAILQ_FIRST(rlist)),
334 (uintmax_t) VM_PAGE_TO_PHYS(TAILQ_LAST(rlist, pglist)));
335 #endif
336 error = 0;
337 goto out;
338 }
339 }
340 }
341
342 out:
343 /*
344 * check to see if we need to generate some free pages waking
345 * the pagedaemon.
346 */
347
348 uvm_pgfl_unlock();
349 uvm_kick_pdaemon();
350 return (error);
351 }
352
353 static int
354 uvm_pglistalloc_s_ps(uvm_physseg_t psi, int num, paddr_t low, paddr_t high,
355 struct pglist *rlist)
356 {
357 int todo, limit, candidate;
358 struct vm_page *pg;
359 bool second_pass;
360 #ifdef PGALLOC_VERBOSE
361 printf("pgalloc: simple %d pgs from psi %zd\n", num, psi);
362 #endif
363
364 KASSERT(uvm_physseg_get_start(psi) <= uvm_physseg_get_avail_start(psi));
365 KASSERT(uvm_physseg_get_start(psi) <= uvm_physseg_get_avail_end(psi));
366 KASSERT(uvm_physseg_get_avail_start(psi) <= uvm_physseg_get_end(psi));
367 KASSERT(uvm_physseg_get_avail_end(psi) <= uvm_physseg_get_end(psi));
368
369 low = atop(low);
370 high = atop(high);
371 todo = num;
372 candidate = uimax(low, uvm_physseg_get_avail_start(psi) +
373 uvm_physseg_get_start_hint(psi));
374 limit = uimin(high, uvm_physseg_get_avail_end(psi));
375 pg = uvm_physseg_get_pg(psi, candidate - uvm_physseg_get_start(psi));
376 second_pass = false;
377
378 /*
379 * Make sure that physseg falls within with range to be allocated from.
380 */
381 if (high <= uvm_physseg_get_avail_start(psi) ||
382 low >= uvm_physseg_get_avail_end(psi))
383 return 0;
384
385 again:
386 for (;; candidate++, pg++) {
387 if (candidate >= limit) {
388 if (uvm_physseg_get_start_hint(psi) == 0 || second_pass) {
389 candidate = limit - 1;
390 break;
391 }
392 second_pass = true;
393 candidate = uimax(low, uvm_physseg_get_avail_start(psi));
394 limit = uimin(limit, uvm_physseg_get_avail_start(psi) +
395 uvm_physseg_get_start_hint(psi));
396 pg = uvm_physseg_get_pg(psi, candidate - uvm_physseg_get_start(psi));
397 goto again;
398 }
399 #if defined(DEBUG)
400 {
401 paddr_t cidx = 0;
402 const uvm_physseg_t bank = uvm_physseg_find(candidate, &cidx);
403 KDASSERTMSG(bank == psi,
404 "uvm_physseg_find(%#x) (%"PRIxPHYSSEG ") != psi %"PRIxPHYSSEG,
405 candidate, bank, psi);
406 KDASSERTMSG(cidx == candidate - uvm_physseg_get_start(psi),
407 "uvm_physseg_find(%#x): %#"PRIxPADDR" != off %"PRIxPADDR,
408 candidate, cidx, candidate - uvm_physseg_get_start(psi));
409 }
410 #endif
411 if (VM_PAGE_IS_FREE(pg) == 0)
412 continue;
413
414 uvm_pglist_add(pg, rlist);
415 if (--todo == 0) {
416 break;
417 }
418 }
419
420 /*
421 * The next time we need to search this segment,
422 * start just after the pages we just allocated.
423 */
424 uvm_physseg_set_start_hint(psi, candidate + 1 - uvm_physseg_get_avail_start(psi));
425 KASSERTMSG(uvm_physseg_get_start_hint(psi) <= uvm_physseg_get_avail_end(psi) -
426 uvm_physseg_get_avail_start(psi),
427 "%#x %u (%#x) <= %#"PRIxPADDR" - %#"PRIxPADDR" (%#"PRIxPADDR")",
428 candidate + 1,
429 uvm_physseg_get_start_hint(psi),
430 uvm_physseg_get_start_hint(psi),
431 uvm_physseg_get_avail_end(psi),
432 uvm_physseg_get_avail_start(psi),
433 uvm_physseg_get_avail_end(psi) - uvm_physseg_get_avail_start(psi));
434
435 #ifdef PGALLOC_VERBOSE
436 printf("got %d pgs\n", num - todo);
437 #endif
438 return (num - todo); /* number of pages allocated */
439 }
440
441 static int
442 uvm_pglistalloc_simple(int num, paddr_t low, paddr_t high,
443 struct pglist *rlist, int waitok)
444 {
445 int fl, error;
446 uvm_physseg_t psi;
447 int count = 0;
448
449 /* Default to "lose". */
450 error = ENOMEM;
451
452 again:
453 /*
454 * Block all memory allocation and lock the free list.
455 */
456 uvm_pgfl_lock();
457 count++;
458
459 /* Are there even any free pages? */
460 if (uvm_availmem(false) <=
461 (uvmexp.reserve_pagedaemon + uvmexp.reserve_kernel))
462 goto out;
463
464 for (fl = 0; fl < VM_NFREELIST; fl++) {
465 #if (VM_PHYSSEG_STRAT == VM_PSTRAT_BIGFIRST)
466 for (psi = uvm_physseg_get_last(); uvm_physseg_valid_p(psi); psi = uvm_physseg_get_prev(psi))
467 #else
468 for (psi = uvm_physseg_get_first(); uvm_physseg_valid_p(psi); psi = uvm_physseg_get_next(psi))
469 #endif
470 {
471 if (uvm_physseg_get_free_list(psi) != fl)
472 continue;
473
474 num -= uvm_pglistalloc_s_ps(psi, num, low, high, rlist);
475 if (num == 0) {
476 error = 0;
477 goto out;
478 }
479 }
480
481 }
482
483 out:
484 /*
485 * check to see if we need to generate some free pages waking
486 * the pagedaemon.
487 */
488
489 uvm_pgfl_unlock();
490 uvm_kick_pdaemon();
491
492 if (error) {
493 if (waitok) {
494 /* XXX perhaps some time limitation? */
495 #ifdef DEBUG
496 if (count == 1)
497 printf("pglistalloc waiting\n");
498 #endif
499 uvm_wait("pglalloc");
500 goto again;
501 } else
502 uvm_pglistfree(rlist);
503 }
504 #ifdef PGALLOC_VERBOSE
505 if (!error)
506 printf("pgalloc: %"PRIxMAX"..%"PRIxMAX"\n",
507 (uintmax_t) VM_PAGE_TO_PHYS(TAILQ_FIRST(rlist)),
508 (uintmax_t) VM_PAGE_TO_PHYS(TAILQ_LAST(rlist, pglist)));
509 #endif
510 return (error);
511 }
512
513 int
514 uvm_pglistalloc(psize_t size, paddr_t low, paddr_t high, paddr_t alignment,
515 paddr_t boundary, struct pglist *rlist, int nsegs, int waitok)
516 {
517 int num, res;
518
519 KASSERT(!cpu_intr_p());
520 KASSERT(!cpu_softintr_p());
521 KASSERT((alignment & (alignment - 1)) == 0);
522 KASSERT((boundary & (boundary - 1)) == 0);
523
524 /*
525 * Our allocations are always page granularity, so our alignment
526 * must be, too.
527 */
528 if (alignment < PAGE_SIZE)
529 alignment = PAGE_SIZE;
530 if (boundary != 0 && boundary < size)
531 return (EINVAL);
532 num = atop(round_page(size));
533 low = roundup2(low, alignment);
534
535 TAILQ_INIT(rlist);
536
537 /*
538 * Turn off the caching of free pages - we need everything to be on
539 * the global freelists.
540 */
541 uvm_pgflcache_pause();
542
543 if ((nsegs < size >> PAGE_SHIFT) || (alignment != PAGE_SIZE) ||
544 (boundary != 0))
545 res = uvm_pglistalloc_contig(num, low, high, alignment,
546 boundary, rlist);
547 else
548 res = uvm_pglistalloc_simple(num, low, high, rlist, waitok);
549
550 uvm_pgflcache_resume();
551
552 return (res);
553 }
554
555 /*
556 * uvm_pglistfree: free a list of pages
557 *
558 * => pages should already be unmapped
559 */
560
561 void
562 uvm_pglistfree(struct pglist *list)
563 {
564 struct vm_page *pg;
565
566 KASSERT(!cpu_intr_p());
567 KASSERT(!cpu_softintr_p());
568
569 while ((pg = TAILQ_FIRST(list)) != NULL) {
570 TAILQ_REMOVE(list, pg, pageq.queue);
571 uvm_pagefree(pg);
572 STAT_DECR(uvm_pglistalloc_npages);
573 }
574 }
575