1 1.92 tnn /* $NetBSD: uvm_pglist.c,v 1.92 2024/01/14 10:38:47 tnn Exp $ */ 2 1.45 nonaka 3 1.1 mrg /*- 4 1.78 ad * Copyright (c) 1997, 2019 The NetBSD Foundation, Inc. 5 1.1 mrg * All rights reserved. 6 1.15 chs * 7 1.1 mrg * This code is derived from software contributed to The NetBSD Foundation 8 1.1 mrg * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, 9 1.78 ad * NASA Ames Research Center, and by Andrew Doran. 10 1.1 mrg * 11 1.1 mrg * Redistribution and use in source and binary forms, with or without 12 1.1 mrg * modification, are permitted provided that the following conditions 13 1.1 mrg * are met: 14 1.1 mrg * 1. Redistributions of source code must retain the above copyright 15 1.1 mrg * notice, this list of conditions and the following disclaimer. 16 1.15 chs * 2. Redistributions in binary form must reproduce the above copyright 17 1.1 mrg * notice, this list of conditions and the following disclaimer in the 18 1.1 mrg * documentation and/or other materials provided with the distribution. 19 1.15 chs * 20 1.1 mrg * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 21 1.1 mrg * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 22 1.1 mrg * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 23 1.1 mrg * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 24 1.1 mrg * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 1.1 mrg * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 1.1 mrg * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 1.1 mrg * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 1.1 mrg * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 1.1 mrg * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 1.1 mrg * POSSIBILITY OF SUCH DAMAGE. 31 1.1 mrg */ 32 1.1 mrg 33 1.1 mrg /* 34 1.1 mrg * uvm_pglist.c: pglist functions 35 1.1 mrg */ 36 1.19 lukem 37 1.19 lukem #include <sys/cdefs.h> 38 1.92 tnn __KERNEL_RCSID(0, "$NetBSD: uvm_pglist.c,v 1.92 2024/01/14 10:38:47 tnn Exp $"); 39 1.1 mrg 40 1.1 mrg #include <sys/param.h> 41 1.1 mrg #include <sys/systm.h> 42 1.81 ad #include <sys/cpu.h> 43 1.1 mrg 44 1.1 mrg #include <uvm/uvm.h> 45 1.36 yamt #include <uvm/uvm_pdpolicy.h> 46 1.78 ad #include <uvm/uvm_pgflcache.h> 47 1.1 mrg 48 1.1 mrg #ifdef VM_PAGE_ALLOC_MEMORY_STATS 49 1.1 mrg #define STAT_INCR(v) (v)++ 50 1.1 mrg #define STAT_DECR(v) do { \ 51 1.1 mrg if ((v) == 0) \ 52 1.1 mrg printf("%s:%d -- Already 0!\n", __FILE__, __LINE__); \ 53 1.1 mrg else \ 54 1.1 mrg (v)--; \ 55 1.25 perry } while (/*CONSTCOND*/ 0) 56 1.1 mrg u_long uvm_pglistalloc_npages; 57 1.1 mrg #else 58 1.1 mrg #define STAT_INCR(v) 59 1.1 mrg #define STAT_DECR(v) 60 1.1 mrg #endif 61 1.1 mrg 62 1.86 chs kmutex_t uvm_pglistalloc_contig_lock; 63 1.86 chs 64 1.1 mrg /* 65 1.1 mrg * uvm_pglistalloc: allocate a list of pages 66 1.1 mrg * 67 1.27 drochner * => allocated pages are placed onto an rlist. rlist is 68 1.27 drochner * initialized by uvm_pglistalloc. 69 1.1 mrg * => returns 0 on success or errno on failure 70 1.27 drochner * => implementation allocates a single segment if any constraints are 71 1.27 drochner * imposed by call arguments. 72 1.1 mrg * => doesn't take into account clean non-busy pages on inactive list 73 1.1 mrg * that could be used(?) 74 1.1 mrg * => params: 75 1.1 mrg * size the size of the allocation, rounded to page size. 76 1.1 mrg * low the low address of the allowed allocation range. 77 1.1 mrg * high the high address of the allowed allocation range. 78 1.1 mrg * alignment memory must be aligned to this power-of-two boundary. 79 1.15 chs * boundary no segment in the allocation may cross this 80 1.1 mrg * power-of-two boundary (relative to zero). 81 1.1 mrg */ 82 1.1 mrg 83 1.20 drochner static void 84 1.33 thorpej uvm_pglist_add(struct vm_page *pg, struct pglist *rlist) 85 1.20 drochner { 86 1.78 ad struct pgfreelist *pgfl; 87 1.78 ad struct pgflbucket *pgb; 88 1.20 drochner 89 1.78 ad pgfl = &uvm.page_free[uvm_page_get_freelist(pg)]; 90 1.78 ad pgb = pgfl->pgfl_buckets[uvm_page_get_bucket(pg)]; 91 1.39 ad 92 1.67 christos #ifdef UVMDEBUG 93 1.52 matt struct vm_page *tp; 94 1.78 ad LIST_FOREACH(tp, &pgb->pgb_colors[VM_PGCOLOR(pg)], pageq.list) { 95 1.20 drochner if (tp == pg) 96 1.20 drochner break; 97 1.20 drochner } 98 1.20 drochner if (tp == NULL) 99 1.20 drochner panic("uvm_pglistalloc: page not on freelist"); 100 1.20 drochner #endif 101 1.78 ad LIST_REMOVE(pg, pageq.list); 102 1.78 ad pgb->pgb_nfree--; 103 1.84 ad CPU_COUNT(CPU_COUNT_FREEPAGES, -1); 104 1.20 drochner pg->flags = PG_CLEAN; 105 1.20 drochner pg->uobject = NULL; 106 1.20 drochner pg->uanon = NULL; 107 1.42 ad TAILQ_INSERT_TAIL(rlist, pg, pageq.queue); 108 1.20 drochner STAT_INCR(uvm_pglistalloc_npages); 109 1.20 drochner } 110 1.20 drochner 111 1.20 drochner static int 112 1.68 cherry uvm_pglistalloc_c_ps(uvm_physseg_t psi, int num, paddr_t low, paddr_t high, 113 1.33 thorpej paddr_t alignment, paddr_t boundary, struct pglist *rlist) 114 1.1 mrg { 115 1.91 tnn long candidate, limit, candidateidx, end, idx; 116 1.91 tnn int skip; 117 1.91 tnn long pagemask; 118 1.52 matt bool second_pass; 119 1.24 drochner #ifdef DEBUG 120 1.22 drochner paddr_t idxpa, lastidxpa; 121 1.68 cherry paddr_t cidx = 0; /* XXX: GCC */ 122 1.22 drochner #endif 123 1.24 drochner #ifdef PGALLOC_VERBOSE 124 1.80 rin printf("pgalloc: contig %d pgs from psi %d\n", num, psi); 125 1.24 drochner #endif 126 1.1 mrg 127 1.52 matt low = atop(low); 128 1.52 matt high = atop(high); 129 1.52 matt 130 1.52 matt /* 131 1.57 matt * Make sure that physseg falls within with range to be allocated from. 132 1.57 matt */ 133 1.89 skrll if (high <= uvm_physseg_get_avail_start(psi) || 134 1.89 skrll low >= uvm_physseg_get_avail_end(psi)) 135 1.90 skrll return -1; 136 1.57 matt 137 1.57 matt /* 138 1.52 matt * We start our search at the just after where the last allocation 139 1.52 matt * succeeded. 140 1.52 matt */ 141 1.89 skrll alignment = atop(alignment); 142 1.91 tnn candidate = roundup2(ulmax(low, uvm_physseg_get_avail_start(psi) + 143 1.68 cherry uvm_physseg_get_start_hint(psi)), alignment); 144 1.91 tnn limit = ulmin(high, uvm_physseg_get_avail_end(psi)); 145 1.24 drochner pagemask = ~((boundary >> PAGE_SHIFT) - 1); 146 1.52 matt skip = 0; 147 1.52 matt second_pass = false; 148 1.12 chs 149 1.24 drochner for (;;) { 150 1.52 matt bool ok = true; 151 1.52 matt signed int cnt; 152 1.52 matt 153 1.66 matt if (candidate + num > limit) { 154 1.68 cherry if (uvm_physseg_get_start_hint(psi) == 0 || second_pass) { 155 1.52 matt /* 156 1.52 matt * We've run past the allowable range. 157 1.52 matt */ 158 1.52 matt return 0; /* FAIL = 0 pages*/ 159 1.52 matt } 160 1.3 mrg /* 161 1.52 matt * We've wrapped around the end of this segment 162 1.52 matt * so restart at the beginning but now our limit 163 1.52 matt * is were we started. 164 1.3 mrg */ 165 1.52 matt second_pass = true; 166 1.91 tnn candidate = roundup2(ulmax(low, uvm_physseg_get_avail_start(psi)), alignment); 167 1.91 tnn limit = ulmin(limit, uvm_physseg_get_avail_start(psi) + 168 1.68 cherry uvm_physseg_get_start_hint(psi)); 169 1.52 matt skip = 0; 170 1.52 matt continue; 171 1.3 mrg } 172 1.24 drochner if (boundary != 0 && 173 1.66 matt ((candidate ^ (candidate + num - 1)) & pagemask) != 0) { 174 1.24 drochner /* 175 1.24 drochner * Region crosses boundary. Jump to the boundary 176 1.24 drochner * just crossed and ensure alignment. 177 1.24 drochner */ 178 1.66 matt candidate = (candidate + num - 1) & pagemask; 179 1.66 matt candidate = roundup2(candidate, alignment); 180 1.52 matt skip = 0; 181 1.24 drochner continue; 182 1.24 drochner } 183 1.22 drochner #ifdef DEBUG 184 1.3 mrg /* 185 1.3 mrg * Make sure this is a managed physical page. 186 1.3 mrg */ 187 1.3 mrg 188 1.68 cherry if (uvm_physseg_find(candidate, &cidx) != psi) 189 1.22 drochner panic("pgalloc contig: botch1"); 190 1.68 cherry if (cidx != candidate - uvm_physseg_get_start(psi)) 191 1.22 drochner panic("pgalloc contig: botch2"); 192 1.68 cherry if (uvm_physseg_find(candidate + num - 1, &cidx) != psi) 193 1.22 drochner panic("pgalloc contig: botch3"); 194 1.68 cherry if (cidx != candidate - uvm_physseg_get_start(psi) + num - 1) 195 1.31 junyoung panic("pgalloc contig: botch4"); 196 1.22 drochner #endif 197 1.68 cherry candidateidx = candidate - uvm_physseg_get_start(psi); 198 1.66 matt end = candidateidx + num; 199 1.3 mrg 200 1.3 mrg /* 201 1.24 drochner * Found a suitable starting page. See if the range is free. 202 1.3 mrg */ 203 1.52 matt #ifdef PGALLOC_VERBOSE 204 1.91 tnn printf("%s: psi=%d candidate=%#lx end=%#lx skip=%#x, align=%#"PRIxPADDR, 205 1.80 rin __func__, psi, candidateidx, end, skip, alignment); 206 1.52 matt #endif 207 1.52 matt /* 208 1.52 matt * We start at the end and work backwards since if we find a 209 1.52 matt * non-free page, it makes no sense to continue. 210 1.52 matt * 211 1.52 matt * But on the plus size we have "vetted" some number of free 212 1.52 matt * pages. If this iteration fails, we may be able to skip 213 1.52 matt * testing most of those pages again in the next pass. 214 1.52 matt */ 215 1.66 matt for (idx = end - 1; idx >= candidateidx + skip; idx--) { 216 1.68 cherry if (VM_PAGE_IS_FREE(uvm_physseg_get_pg(psi, idx)) == 0) { 217 1.52 matt ok = false; 218 1.3 mrg break; 219 1.52 matt } 220 1.24 drochner 221 1.24 drochner #ifdef DEBUG 222 1.66 matt if (idx > candidateidx) { 223 1.68 cherry idxpa = VM_PAGE_TO_PHYS(uvm_physseg_get_pg(psi, idx)); 224 1.68 cherry lastidxpa = VM_PAGE_TO_PHYS(uvm_physseg_get_pg(psi, idx - 1)); 225 1.12 chs if ((lastidxpa + PAGE_SIZE) != idxpa) { 226 1.3 mrg /* 227 1.3 mrg * Region not contiguous. 228 1.3 mrg */ 229 1.22 drochner panic("pgalloc contig: botch5"); 230 1.3 mrg } 231 1.3 mrg if (boundary != 0 && 232 1.24 drochner ((lastidxpa ^ idxpa) & ~(boundary - 1)) 233 1.24 drochner != 0) { 234 1.3 mrg /* 235 1.3 mrg * Region crosses boundary. 236 1.3 mrg */ 237 1.24 drochner panic("pgalloc contig: botch6"); 238 1.3 mrg } 239 1.3 mrg } 240 1.24 drochner #endif 241 1.3 mrg } 242 1.52 matt 243 1.52 matt if (ok) { 244 1.52 matt while (skip-- > 0) { 245 1.68 cherry KDASSERT(VM_PAGE_IS_FREE(uvm_physseg_get_pg(psi, candidateidx + skip))); 246 1.52 matt } 247 1.52 matt #ifdef PGALLOC_VERBOSE 248 1.52 matt printf(": ok\n"); 249 1.52 matt #endif 250 1.3 mrg break; 251 1.52 matt } 252 1.24 drochner 253 1.52 matt #ifdef PGALLOC_VERBOSE 254 1.66 matt printf(": non-free at %#x\n", idx - candidateidx); 255 1.52 matt #endif 256 1.52 matt /* 257 1.52 matt * count the number of pages we can advance 258 1.52 matt * since we know they aren't all free. 259 1.52 matt */ 260 1.66 matt cnt = idx + 1 - candidateidx; 261 1.52 matt /* 262 1.52 matt * now round up that to the needed alignment. 263 1.52 matt */ 264 1.52 matt cnt = roundup2(cnt, alignment); 265 1.52 matt /* 266 1.87 skrll * The number of pages we can skip checking 267 1.52 matt * (might be 0 if cnt > num). 268 1.52 matt */ 269 1.71 riastrad skip = uimax(num - cnt, 0); 270 1.66 matt candidate += cnt; 271 1.1 mrg } 272 1.1 mrg 273 1.3 mrg /* 274 1.3 mrg * we have a chunk of memory that conforms to the requested constraints. 275 1.3 mrg */ 276 1.68 cherry for (idx = candidateidx; idx < end; idx++) 277 1.68 cherry uvm_pglist_add(uvm_physseg_get_pg(psi, idx), rlist); 278 1.52 matt 279 1.52 matt /* 280 1.52 matt * the next time we need to search this segment, start after this 281 1.52 matt * chunk of pages we just allocated. 282 1.52 matt */ 283 1.68 cherry uvm_physseg_set_start_hint(psi, candidate + num - 284 1.68 cherry uvm_physseg_get_avail_start(psi)); 285 1.68 cherry KASSERTMSG(uvm_physseg_get_start_hint(psi) <= 286 1.68 cherry uvm_physseg_get_avail_end(psi) - uvm_physseg_get_avail_start(psi), 287 1.91 tnn "%lx %lu (%#lx) <= %#"PRIxPADDR" - %#"PRIxPADDR" (%#"PRIxPADDR")", 288 1.66 matt candidate + num, 289 1.68 cherry uvm_physseg_get_start_hint(psi), uvm_physseg_get_start_hint(psi), 290 1.68 cherry uvm_physseg_get_avail_end(psi), uvm_physseg_get_avail_start(psi), 291 1.68 cherry uvm_physseg_get_avail_end(psi) - uvm_physseg_get_avail_start(psi)); 292 1.24 drochner 293 1.24 drochner #ifdef PGALLOC_VERBOSE 294 1.24 drochner printf("got %d pgs\n", num); 295 1.24 drochner #endif 296 1.52 matt return num; /* number of pages allocated */ 297 1.22 drochner } 298 1.22 drochner 299 1.22 drochner static int 300 1.86 chs uvm_pglistalloc_contig_aggressive(int num, paddr_t low, paddr_t high, 301 1.86 chs paddr_t alignment, paddr_t boundary, struct pglist *rlist) 302 1.86 chs { 303 1.86 chs struct vm_page *pg; 304 1.86 chs struct pglist tmp; 305 1.86 chs paddr_t pa, off, spa, amask, bmask, rlo, rhi; 306 1.86 chs uvm_physseg_t upm; 307 1.86 chs int error, i, run, acnt; 308 1.86 chs 309 1.86 chs /* 310 1.86 chs * Allocate pages the normal way and for each new page, check if 311 1.86 chs * the page completes a range satisfying the request. 312 1.86 chs * The pagedaemon will evict pages as we go and we are very likely 313 1.86 chs * to get compatible pages eventually. 314 1.86 chs */ 315 1.86 chs 316 1.86 chs error = ENOMEM; 317 1.86 chs TAILQ_INIT(&tmp); 318 1.86 chs acnt = atop(alignment); 319 1.86 chs amask = ~(alignment - 1); 320 1.86 chs bmask = ~(boundary - 1); 321 1.86 chs KASSERT(bmask <= amask); 322 1.86 chs mutex_enter(&uvm_pglistalloc_contig_lock); 323 1.86 chs while (uvm_reclaimable()) { 324 1.86 chs pg = uvm_pagealloc(NULL, 0, NULL, 0); 325 1.86 chs if (pg == NULL) { 326 1.86 chs uvm_wait("pglac2"); 327 1.86 chs continue; 328 1.86 chs } 329 1.86 chs pg->flags |= PG_PGLCA; 330 1.86 chs TAILQ_INSERT_HEAD(&tmp, pg, pageq.queue); 331 1.86 chs 332 1.86 chs pa = VM_PAGE_TO_PHYS(pg); 333 1.86 chs if (pa < low || pa >= high) { 334 1.86 chs continue; 335 1.86 chs } 336 1.86 chs 337 1.86 chs upm = uvm_physseg_find(atop(pa), &off); 338 1.86 chs KASSERT(uvm_physseg_valid_p(upm)); 339 1.86 chs 340 1.86 chs spa = pa & amask; 341 1.86 chs 342 1.86 chs /* 343 1.86 chs * Look backward for at most num - 1 pages, back to 344 1.86 chs * the highest of: 345 1.86 chs * - the first page in the physseg 346 1.86 chs * - the specified low address 347 1.86 chs * - num-1 pages before the one we just allocated 348 1.86 chs * - the start of the boundary range containing pa 349 1.86 chs * all rounded up to alignment. 350 1.86 chs */ 351 1.86 chs 352 1.86 chs rlo = roundup2(ptoa(uvm_physseg_get_avail_start(upm)), alignment); 353 1.86 chs rlo = MAX(rlo, roundup2(low, alignment)); 354 1.86 chs rlo = MAX(rlo, roundup2(pa - ptoa(num - 1), alignment)); 355 1.86 chs if (boundary) { 356 1.86 chs rlo = MAX(rlo, spa & bmask); 357 1.86 chs } 358 1.86 chs 359 1.86 chs /* 360 1.86 chs * Look forward as far as the lowest of: 361 1.86 chs * - the last page of the physseg 362 1.86 chs * - the specified high address 363 1.86 chs * - the boundary after pa 364 1.86 chs */ 365 1.86 chs 366 1.86 chs rhi = ptoa(uvm_physseg_get_avail_end(upm)); 367 1.86 chs rhi = MIN(rhi, high); 368 1.86 chs if (boundary) { 369 1.86 chs rhi = MIN(rhi, rounddown2(pa, boundary) + boundary); 370 1.86 chs } 371 1.86 chs 372 1.86 chs /* 373 1.86 chs * Make sure our range to consider is big enough. 374 1.86 chs */ 375 1.86 chs 376 1.86 chs if (rhi - rlo < ptoa(num)) { 377 1.86 chs continue; 378 1.86 chs } 379 1.86 chs 380 1.86 chs run = 0; 381 1.86 chs while (spa > rlo) { 382 1.86 chs 383 1.86 chs /* 384 1.86 chs * Examine pages before spa in groups of acnt. 385 1.86 chs * If all the pages in a group are marked then add 386 1.86 chs * these pages to the run. 387 1.86 chs */ 388 1.86 chs 389 1.86 chs for (i = 0; i < acnt; i++) { 390 1.86 chs pg = PHYS_TO_VM_PAGE(spa - alignment + ptoa(i)); 391 1.86 chs if ((pg->flags & PG_PGLCA) == 0) { 392 1.86 chs break; 393 1.86 chs } 394 1.86 chs } 395 1.86 chs if (i < acnt) { 396 1.86 chs break; 397 1.86 chs } 398 1.86 chs spa -= alignment; 399 1.86 chs run += acnt; 400 1.86 chs } 401 1.86 chs 402 1.86 chs /* 403 1.86 chs * Look forward for any remaining pages. 404 1.86 chs */ 405 1.86 chs 406 1.88 chs if (spa + ptoa(num) > rhi) { 407 1.88 chs continue; 408 1.88 chs } 409 1.86 chs for (; run < num; run++) { 410 1.86 chs pg = PHYS_TO_VM_PAGE(spa + ptoa(run)); 411 1.86 chs if ((pg->flags & PG_PGLCA) == 0) { 412 1.86 chs break; 413 1.86 chs } 414 1.86 chs } 415 1.86 chs if (run < num) { 416 1.86 chs continue; 417 1.86 chs } 418 1.86 chs 419 1.86 chs /* 420 1.86 chs * We found a match. Move these pages from the tmp list to 421 1.86 chs * the caller's list. 422 1.86 chs */ 423 1.86 chs 424 1.86 chs for (i = 0; i < num; i++) { 425 1.86 chs pg = PHYS_TO_VM_PAGE(spa + ptoa(i)); 426 1.86 chs TAILQ_REMOVE(&tmp, pg, pageq.queue); 427 1.86 chs pg->flags &= ~PG_PGLCA; 428 1.86 chs TAILQ_INSERT_TAIL(rlist, pg, pageq.queue); 429 1.86 chs STAT_INCR(uvm_pglistalloc_npages); 430 1.86 chs } 431 1.86 chs 432 1.86 chs error = 0; 433 1.86 chs break; 434 1.86 chs } 435 1.86 chs 436 1.86 chs /* 437 1.86 chs * Free all the pages that we didn't need. 438 1.86 chs */ 439 1.86 chs 440 1.86 chs while (!TAILQ_EMPTY(&tmp)) { 441 1.86 chs pg = TAILQ_FIRST(&tmp); 442 1.86 chs TAILQ_REMOVE(&tmp, pg, pageq.queue); 443 1.86 chs pg->flags &= ~PG_PGLCA; 444 1.86 chs uvm_pagefree(pg); 445 1.86 chs } 446 1.86 chs mutex_exit(&uvm_pglistalloc_contig_lock); 447 1.86 chs return error; 448 1.86 chs } 449 1.86 chs 450 1.86 chs static int 451 1.33 thorpej uvm_pglistalloc_contig(int num, paddr_t low, paddr_t high, paddr_t alignment, 452 1.86 chs paddr_t boundary, struct pglist *rlist, int waitok) 453 1.22 drochner { 454 1.68 cherry int fl; 455 1.38 ad int error; 456 1.86 chs uvm_physseg_t psi; 457 1.22 drochner 458 1.22 drochner /* Default to "lose". */ 459 1.22 drochner error = ENOMEM; 460 1.90 skrll bool valid = false; 461 1.22 drochner 462 1.22 drochner /* 463 1.22 drochner * Block all memory allocation and lock the free list. 464 1.22 drochner */ 465 1.78 ad uvm_pgfl_lock(); 466 1.22 drochner 467 1.22 drochner /* Are there even any free pages? */ 468 1.83 ad if (uvm_availmem(false) <= 469 1.79 ad (uvmexp.reserve_pagedaemon + uvmexp.reserve_kernel)) 470 1.22 drochner goto out; 471 1.22 drochner 472 1.22 drochner for (fl = 0; fl < VM_NFREELIST; fl++) { 473 1.22 drochner #if (VM_PHYSSEG_STRAT == VM_PSTRAT_BIGFIRST) 474 1.68 cherry for (psi = uvm_physseg_get_last(); uvm_physseg_valid_p(psi); psi = uvm_physseg_get_prev(psi)) 475 1.22 drochner #else 476 1.68 cherry for (psi = uvm_physseg_get_first(); uvm_physseg_valid_p(psi); psi = uvm_physseg_get_next(psi)) 477 1.22 drochner #endif 478 1.22 drochner { 479 1.68 cherry if (uvm_physseg_get_free_list(psi) != fl) 480 1.22 drochner continue; 481 1.22 drochner 482 1.90 skrll int done = uvm_pglistalloc_c_ps(psi, num, low, high, 483 1.90 skrll alignment, boundary, rlist); 484 1.90 skrll if (done >= 0) { 485 1.90 skrll valid = true; 486 1.90 skrll num -= done; 487 1.90 skrll } 488 1.24 drochner if (num == 0) { 489 1.24 drochner #ifdef PGALLOC_VERBOSE 490 1.44 reinoud printf("pgalloc: %"PRIxMAX"-%"PRIxMAX"\n", 491 1.44 reinoud (uintmax_t) VM_PAGE_TO_PHYS(TAILQ_FIRST(rlist)), 492 1.44 reinoud (uintmax_t) VM_PAGE_TO_PHYS(TAILQ_LAST(rlist, pglist))); 493 1.22 drochner #endif 494 1.22 drochner error = 0; 495 1.22 drochner goto out; 496 1.22 drochner } 497 1.22 drochner } 498 1.22 drochner } 499 1.90 skrll if (!valid) { 500 1.90 skrll uvm_pgfl_unlock(); 501 1.90 skrll return EINVAL; 502 1.90 skrll } 503 1.20 drochner 504 1.20 drochner out: 505 1.86 chs uvm_pgfl_unlock(); 506 1.86 chs 507 1.20 drochner /* 508 1.86 chs * If that didn't work, try the more aggressive approach. 509 1.20 drochner */ 510 1.87 skrll 511 1.86 chs if (error) { 512 1.86 chs if (waitok) { 513 1.86 chs error = uvm_pglistalloc_contig_aggressive(num, low, high, 514 1.86 chs alignment, boundary, rlist); 515 1.86 chs } else { 516 1.86 chs uvm_pglistfree(rlist); 517 1.86 chs uvm_kick_pdaemon(); 518 1.86 chs } 519 1.86 chs } 520 1.86 chs return error; 521 1.20 drochner } 522 1.20 drochner 523 1.24 drochner static int 524 1.68 cherry uvm_pglistalloc_s_ps(uvm_physseg_t psi, int num, paddr_t low, paddr_t high, 525 1.33 thorpej struct pglist *rlist) 526 1.22 drochner { 527 1.91 tnn int todo; 528 1.91 tnn long limit, candidate; 529 1.22 drochner struct vm_page *pg; 530 1.52 matt bool second_pass; 531 1.24 drochner #ifdef PGALLOC_VERBOSE 532 1.86 chs printf("pgalloc: simple %d pgs from psi %d\n", num, psi); 533 1.24 drochner #endif 534 1.22 drochner 535 1.68 cherry KASSERT(uvm_physseg_get_start(psi) <= uvm_physseg_get_avail_start(psi)); 536 1.68 cherry KASSERT(uvm_physseg_get_start(psi) <= uvm_physseg_get_avail_end(psi)); 537 1.68 cherry KASSERT(uvm_physseg_get_avail_start(psi) <= uvm_physseg_get_end(psi)); 538 1.68 cherry KASSERT(uvm_physseg_get_avail_end(psi) <= uvm_physseg_get_end(psi)); 539 1.39 ad 540 1.52 matt low = atop(low); 541 1.52 matt high = atop(high); 542 1.52 matt 543 1.57 matt /* 544 1.57 matt * Make sure that physseg falls within with range to be allocated from. 545 1.57 matt */ 546 1.68 cherry if (high <= uvm_physseg_get_avail_start(psi) || 547 1.68 cherry low >= uvm_physseg_get_avail_end(psi)) 548 1.90 skrll return -1; 549 1.57 matt 550 1.89 skrll todo = num; 551 1.91 tnn candidate = ulmax(low, uvm_physseg_get_avail_start(psi) + 552 1.89 skrll uvm_physseg_get_start_hint(psi)); 553 1.91 tnn limit = ulmin(high, uvm_physseg_get_avail_end(psi)); 554 1.89 skrll pg = uvm_physseg_get_pg(psi, candidate - uvm_physseg_get_start(psi)); 555 1.89 skrll second_pass = false; 556 1.89 skrll 557 1.60 enami again: 558 1.66 matt for (;; candidate++, pg++) { 559 1.66 matt if (candidate >= limit) { 560 1.68 cherry if (uvm_physseg_get_start_hint(psi) == 0 || second_pass) { 561 1.66 matt candidate = limit - 1; 562 1.52 matt break; 563 1.57 matt } 564 1.52 matt second_pass = true; 565 1.91 tnn candidate = ulmax(low, uvm_physseg_get_avail_start(psi)); 566 1.91 tnn limit = ulmin(limit, uvm_physseg_get_avail_start(psi) + 567 1.68 cherry uvm_physseg_get_start_hint(psi)); 568 1.68 cherry pg = uvm_physseg_get_pg(psi, candidate - uvm_physseg_get_start(psi)); 569 1.60 enami goto again; 570 1.52 matt } 571 1.58 matt #if defined(DEBUG) 572 1.54 matt { 573 1.68 cherry paddr_t cidx = 0; 574 1.68 cherry const uvm_physseg_t bank = uvm_physseg_find(candidate, &cidx); 575 1.68 cherry KDASSERTMSG(bank == psi, 576 1.91 tnn "uvm_physseg_find(%#lx) (%"PRIxPHYSSEG ") != psi %"PRIxPHYSSEG, 577 1.68 cherry candidate, bank, psi); 578 1.68 cherry KDASSERTMSG(cidx == candidate - uvm_physseg_get_start(psi), 579 1.91 tnn "uvm_physseg_find(%#lx): %#"PRIxPADDR" != off %"PRIxPADDR, 580 1.92 tnn candidate, cidx, (paddr_t)candidate - uvm_physseg_get_start(psi)); 581 1.54 matt } 582 1.22 drochner #endif 583 1.22 drochner if (VM_PAGE_IS_FREE(pg) == 0) 584 1.22 drochner continue; 585 1.22 drochner 586 1.22 drochner uvm_pglist_add(pg, rlist); 587 1.52 matt if (--todo == 0) { 588 1.22 drochner break; 589 1.52 matt } 590 1.22 drochner } 591 1.24 drochner 592 1.52 matt /* 593 1.52 matt * The next time we need to search this segment, 594 1.52 matt * start just after the pages we just allocated. 595 1.52 matt */ 596 1.68 cherry uvm_physseg_set_start_hint(psi, candidate + 1 - uvm_physseg_get_avail_start(psi)); 597 1.68 cherry KASSERTMSG(uvm_physseg_get_start_hint(psi) <= uvm_physseg_get_avail_end(psi) - 598 1.68 cherry uvm_physseg_get_avail_start(psi), 599 1.91 tnn "%#lx %lu (%#lx) <= %#"PRIxPADDR" - %#"PRIxPADDR" (%#"PRIxPADDR")", 600 1.66 matt candidate + 1, 601 1.68 cherry uvm_physseg_get_start_hint(psi), 602 1.68 cherry uvm_physseg_get_start_hint(psi), 603 1.68 cherry uvm_physseg_get_avail_end(psi), 604 1.68 cherry uvm_physseg_get_avail_start(psi), 605 1.68 cherry uvm_physseg_get_avail_end(psi) - uvm_physseg_get_avail_start(psi)); 606 1.52 matt 607 1.24 drochner #ifdef PGALLOC_VERBOSE 608 1.24 drochner printf("got %d pgs\n", num - todo); 609 1.24 drochner #endif 610 1.24 drochner return (num - todo); /* number of pages allocated */ 611 1.22 drochner } 612 1.22 drochner 613 1.20 drochner static int 614 1.33 thorpej uvm_pglistalloc_simple(int num, paddr_t low, paddr_t high, 615 1.33 thorpej struct pglist *rlist, int waitok) 616 1.20 drochner { 617 1.68 cherry int fl, error; 618 1.68 cherry uvm_physseg_t psi; 619 1.72 mrg int count = 0; 620 1.20 drochner 621 1.20 drochner /* Default to "lose". */ 622 1.20 drochner error = ENOMEM; 623 1.90 skrll bool valid = false; 624 1.20 drochner 625 1.20 drochner again: 626 1.20 drochner /* 627 1.20 drochner * Block all memory allocation and lock the free list. 628 1.20 drochner */ 629 1.78 ad uvm_pgfl_lock(); 630 1.72 mrg count++; 631 1.20 drochner 632 1.20 drochner /* Are there even any free pages? */ 633 1.83 ad if (uvm_availmem(false) <= 634 1.79 ad (uvmexp.reserve_pagedaemon + uvmexp.reserve_kernel)) 635 1.20 drochner goto out; 636 1.20 drochner 637 1.22 drochner for (fl = 0; fl < VM_NFREELIST; fl++) { 638 1.22 drochner #if (VM_PHYSSEG_STRAT == VM_PSTRAT_BIGFIRST) 639 1.68 cherry for (psi = uvm_physseg_get_last(); uvm_physseg_valid_p(psi); psi = uvm_physseg_get_prev(psi)) 640 1.22 drochner #else 641 1.68 cherry for (psi = uvm_physseg_get_first(); uvm_physseg_valid_p(psi); psi = uvm_physseg_get_next(psi)) 642 1.22 drochner #endif 643 1.22 drochner { 644 1.68 cherry if (uvm_physseg_get_free_list(psi) != fl) 645 1.22 drochner continue; 646 1.22 drochner 647 1.90 skrll int done = uvm_pglistalloc_s_ps(psi, num, low, high, 648 1.90 skrll rlist); 649 1.90 skrll if (done >= 0) { 650 1.90 skrll valid = true; 651 1.90 skrll num -= done; 652 1.90 skrll } 653 1.24 drochner if (num == 0) { 654 1.22 drochner error = 0; 655 1.22 drochner goto out; 656 1.22 drochner } 657 1.22 drochner } 658 1.20 drochner 659 1.3 mrg } 660 1.90 skrll if (!valid) { 661 1.90 skrll uvm_pgfl_unlock(); 662 1.90 skrll return EINVAL; 663 1.90 skrll } 664 1.1 mrg 665 1.1 mrg out: 666 1.3 mrg /* 667 1.3 mrg * check to see if we need to generate some free pages waking 668 1.3 mrg * the pagedaemon. 669 1.3 mrg */ 670 1.15 chs 671 1.78 ad uvm_pgfl_unlock(); 672 1.36 yamt uvm_kick_pdaemon(); 673 1.38 ad 674 1.20 drochner if (error) { 675 1.20 drochner if (waitok) { 676 1.20 drochner uvm_wait("pglalloc"); 677 1.20 drochner goto again; 678 1.20 drochner } else 679 1.20 drochner uvm_pglistfree(rlist); 680 1.20 drochner } 681 1.24 drochner #ifdef PGALLOC_VERBOSE 682 1.22 drochner if (!error) 683 1.44 reinoud printf("pgalloc: %"PRIxMAX"..%"PRIxMAX"\n", 684 1.44 reinoud (uintmax_t) VM_PAGE_TO_PHYS(TAILQ_FIRST(rlist)), 685 1.44 reinoud (uintmax_t) VM_PAGE_TO_PHYS(TAILQ_LAST(rlist, pglist))); 686 1.22 drochner #endif 687 1.3 mrg return (error); 688 1.20 drochner } 689 1.20 drochner 690 1.20 drochner int 691 1.33 thorpej uvm_pglistalloc(psize_t size, paddr_t low, paddr_t high, paddr_t alignment, 692 1.33 thorpej paddr_t boundary, struct pglist *rlist, int nsegs, int waitok) 693 1.20 drochner { 694 1.24 drochner int num, res; 695 1.20 drochner 696 1.81 ad KASSERT(!cpu_intr_p()); 697 1.81 ad KASSERT(!cpu_softintr_p()); 698 1.20 drochner KASSERT((alignment & (alignment - 1)) == 0); 699 1.20 drochner KASSERT((boundary & (boundary - 1)) == 0); 700 1.20 drochner 701 1.20 drochner /* 702 1.20 drochner * Our allocations are always page granularity, so our alignment 703 1.20 drochner * must be, too. 704 1.20 drochner */ 705 1.20 drochner if (alignment < PAGE_SIZE) 706 1.20 drochner alignment = PAGE_SIZE; 707 1.24 drochner if (boundary != 0 && boundary < size) 708 1.24 drochner return (EINVAL); 709 1.24 drochner num = atop(round_page(size)); 710 1.52 matt low = roundup2(low, alignment); 711 1.21 drochner 712 1.21 drochner TAILQ_INIT(rlist); 713 1.20 drochner 714 1.78 ad /* 715 1.78 ad * Turn off the caching of free pages - we need everything to be on 716 1.78 ad * the global freelists. 717 1.78 ad */ 718 1.78 ad uvm_pgflcache_pause(); 719 1.78 ad 720 1.86 chs if (nsegs < num || alignment != PAGE_SIZE || boundary != 0) 721 1.24 drochner res = uvm_pglistalloc_contig(num, low, high, alignment, 722 1.86 chs boundary, rlist, waitok); 723 1.20 drochner else 724 1.24 drochner res = uvm_pglistalloc_simple(num, low, high, rlist, waitok); 725 1.20 drochner 726 1.78 ad uvm_pgflcache_resume(); 727 1.78 ad 728 1.20 drochner return (res); 729 1.1 mrg } 730 1.1 mrg 731 1.1 mrg /* 732 1.1 mrg * uvm_pglistfree: free a list of pages 733 1.1 mrg * 734 1.1 mrg * => pages should already be unmapped 735 1.1 mrg */ 736 1.1 mrg 737 1.3 mrg void 738 1.33 thorpej uvm_pglistfree(struct pglist *list) 739 1.1 mrg { 740 1.18 chs struct vm_page *pg; 741 1.1 mrg 742 1.81 ad KASSERT(!cpu_intr_p()); 743 1.81 ad KASSERT(!cpu_softintr_p()); 744 1.81 ad 745 1.18 chs while ((pg = TAILQ_FIRST(list)) != NULL) { 746 1.42 ad TAILQ_REMOVE(list, pg, pageq.queue); 747 1.82 ad uvm_pagefree(pg); 748 1.3 mrg STAT_DECR(uvm_pglistalloc_npages); 749 1.3 mrg } 750 1.1 mrg } 751 1.86 chs 752 1.86 chs void 753 1.86 chs uvm_pglistalloc_init(void) 754 1.86 chs { 755 1.86 chs 756 1.86 chs mutex_init(&uvm_pglistalloc_contig_lock, MUTEX_DEFAULT, IPL_NONE); 757 1.86 chs } 758