Home | History | Annotate | Line # | Download | only in uvm
uvm_pglist.c revision 1.90.4.1
      1  1.90.4.1    martin /*	$NetBSD: uvm_pglist.c,v 1.90.4.1 2024/01/15 14:15:54 martin Exp $	*/
      2      1.45    nonaka 
      3       1.1       mrg /*-
      4      1.78        ad  * Copyright (c) 1997, 2019 The NetBSD Foundation, Inc.
      5       1.1       mrg  * All rights reserved.
      6      1.15       chs  *
      7       1.1       mrg  * This code is derived from software contributed to The NetBSD Foundation
      8       1.1       mrg  * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
      9      1.78        ad  * NASA Ames Research Center, and by Andrew Doran.
     10       1.1       mrg  *
     11       1.1       mrg  * Redistribution and use in source and binary forms, with or without
     12       1.1       mrg  * modification, are permitted provided that the following conditions
     13       1.1       mrg  * are met:
     14       1.1       mrg  * 1. Redistributions of source code must retain the above copyright
     15       1.1       mrg  *    notice, this list of conditions and the following disclaimer.
     16      1.15       chs  * 2. Redistributions in binary form must reproduce the above copyright
     17       1.1       mrg  *    notice, this list of conditions and the following disclaimer in the
     18       1.1       mrg  *    documentation and/or other materials provided with the distribution.
     19      1.15       chs  *
     20       1.1       mrg  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     21       1.1       mrg  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     22       1.1       mrg  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     23       1.1       mrg  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     24       1.1       mrg  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     25       1.1       mrg  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     26       1.1       mrg  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     27       1.1       mrg  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     28       1.1       mrg  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     29       1.1       mrg  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     30       1.1       mrg  * POSSIBILITY OF SUCH DAMAGE.
     31       1.1       mrg  */
     32       1.1       mrg 
     33       1.1       mrg /*
     34       1.1       mrg  * uvm_pglist.c: pglist functions
     35       1.1       mrg  */
     36      1.19     lukem 
     37      1.19     lukem #include <sys/cdefs.h>
     38  1.90.4.1    martin __KERNEL_RCSID(0, "$NetBSD: uvm_pglist.c,v 1.90.4.1 2024/01/15 14:15:54 martin Exp $");
     39       1.1       mrg 
     40       1.1       mrg #include <sys/param.h>
     41       1.1       mrg #include <sys/systm.h>
     42      1.81        ad #include <sys/cpu.h>
     43       1.1       mrg 
     44       1.1       mrg #include <uvm/uvm.h>
     45      1.36      yamt #include <uvm/uvm_pdpolicy.h>
     46      1.78        ad #include <uvm/uvm_pgflcache.h>
     47       1.1       mrg 
     48       1.1       mrg #ifdef VM_PAGE_ALLOC_MEMORY_STATS
     49       1.1       mrg #define	STAT_INCR(v)	(v)++
     50       1.1       mrg #define	STAT_DECR(v)	do { \
     51       1.1       mrg 		if ((v) == 0) \
     52       1.1       mrg 			printf("%s:%d -- Already 0!\n", __FILE__, __LINE__); \
     53       1.1       mrg 		else \
     54       1.1       mrg 			(v)--; \
     55      1.25     perry 	} while (/*CONSTCOND*/ 0)
     56       1.1       mrg u_long	uvm_pglistalloc_npages;
     57       1.1       mrg #else
     58       1.1       mrg #define	STAT_INCR(v)
     59       1.1       mrg #define	STAT_DECR(v)
     60       1.1       mrg #endif
     61       1.1       mrg 
     62      1.86       chs kmutex_t uvm_pglistalloc_contig_lock;
     63      1.86       chs 
     64       1.1       mrg /*
     65       1.1       mrg  * uvm_pglistalloc: allocate a list of pages
     66       1.1       mrg  *
     67      1.27  drochner  * => allocated pages are placed onto an rlist.  rlist is
     68      1.27  drochner  *    initialized by uvm_pglistalloc.
     69       1.1       mrg  * => returns 0 on success or errno on failure
     70      1.27  drochner  * => implementation allocates a single segment if any constraints are
     71      1.27  drochner  *	imposed by call arguments.
     72       1.1       mrg  * => doesn't take into account clean non-busy pages on inactive list
     73       1.1       mrg  *	that could be used(?)
     74       1.1       mrg  * => params:
     75       1.1       mrg  *	size		the size of the allocation, rounded to page size.
     76       1.1       mrg  *	low		the low address of the allowed allocation range.
     77       1.1       mrg  *	high		the high address of the allowed allocation range.
     78       1.1       mrg  *	alignment	memory must be aligned to this power-of-two boundary.
     79      1.15       chs  *	boundary	no segment in the allocation may cross this
     80       1.1       mrg  *			power-of-two boundary (relative to zero).
     81       1.1       mrg  */
     82       1.1       mrg 
     83      1.20  drochner static void
     84      1.33   thorpej uvm_pglist_add(struct vm_page *pg, struct pglist *rlist)
     85      1.20  drochner {
     86      1.78        ad 	struct pgfreelist *pgfl;
     87      1.78        ad 	struct pgflbucket *pgb;
     88      1.20  drochner 
     89      1.78        ad 	pgfl = &uvm.page_free[uvm_page_get_freelist(pg)];
     90      1.78        ad 	pgb = pgfl->pgfl_buckets[uvm_page_get_bucket(pg)];
     91      1.39        ad 
     92      1.67  christos #ifdef UVMDEBUG
     93      1.52      matt 	struct vm_page *tp;
     94      1.78        ad 	LIST_FOREACH(tp, &pgb->pgb_colors[VM_PGCOLOR(pg)], pageq.list) {
     95      1.20  drochner 		if (tp == pg)
     96      1.20  drochner 			break;
     97      1.20  drochner 	}
     98      1.20  drochner 	if (tp == NULL)
     99      1.20  drochner 		panic("uvm_pglistalloc: page not on freelist");
    100      1.20  drochner #endif
    101      1.78        ad 	LIST_REMOVE(pg, pageq.list);
    102      1.78        ad 	pgb->pgb_nfree--;
    103      1.84        ad     	CPU_COUNT(CPU_COUNT_FREEPAGES, -1);
    104      1.20  drochner 	pg->flags = PG_CLEAN;
    105      1.20  drochner 	pg->uobject = NULL;
    106      1.20  drochner 	pg->uanon = NULL;
    107      1.42        ad 	TAILQ_INSERT_TAIL(rlist, pg, pageq.queue);
    108      1.20  drochner 	STAT_INCR(uvm_pglistalloc_npages);
    109      1.20  drochner }
    110      1.20  drochner 
    111      1.20  drochner static int
    112      1.68    cherry uvm_pglistalloc_c_ps(uvm_physseg_t psi, int num, paddr_t low, paddr_t high,
    113      1.33   thorpej     paddr_t alignment, paddr_t boundary, struct pglist *rlist)
    114       1.1       mrg {
    115  1.90.4.1    martin 	long candidate, limit, candidateidx, end, idx;
    116  1.90.4.1    martin 	int skip;
    117  1.90.4.1    martin 	long pagemask;
    118      1.52      matt 	bool second_pass;
    119      1.24  drochner #ifdef DEBUG
    120      1.22  drochner 	paddr_t idxpa, lastidxpa;
    121      1.68    cherry 	paddr_t cidx = 0;	/* XXX: GCC */
    122      1.22  drochner #endif
    123      1.24  drochner #ifdef PGALLOC_VERBOSE
    124      1.80       rin 	printf("pgalloc: contig %d pgs from psi %d\n", num, psi);
    125      1.24  drochner #endif
    126       1.1       mrg 
    127      1.52      matt 	low = atop(low);
    128      1.52      matt 	high = atop(high);
    129      1.52      matt 
    130      1.52      matt 	/*
    131      1.57      matt 	 * Make sure that physseg falls within with range to be allocated from.
    132      1.57      matt 	 */
    133      1.89     skrll 	if (high <= uvm_physseg_get_avail_start(psi) ||
    134      1.89     skrll 	    low >= uvm_physseg_get_avail_end(psi))
    135      1.90     skrll 		return -1;
    136      1.57      matt 
    137      1.57      matt 	/*
    138      1.52      matt 	 * We start our search at the just after where the last allocation
    139      1.52      matt 	 * succeeded.
    140      1.52      matt 	 */
    141      1.89     skrll 	alignment = atop(alignment);
    142  1.90.4.1    martin 	candidate = roundup2(ulmax(low, uvm_physseg_get_avail_start(psi) +
    143      1.68    cherry 		uvm_physseg_get_start_hint(psi)), alignment);
    144  1.90.4.1    martin 	limit = ulmin(high, uvm_physseg_get_avail_end(psi));
    145      1.24  drochner 	pagemask = ~((boundary >> PAGE_SHIFT) - 1);
    146      1.52      matt 	skip = 0;
    147      1.52      matt 	second_pass = false;
    148      1.12       chs 
    149      1.24  drochner 	for (;;) {
    150      1.52      matt 		bool ok = true;
    151      1.52      matt 		signed int cnt;
    152      1.52      matt 
    153      1.66      matt 		if (candidate + num > limit) {
    154      1.68    cherry 			if (uvm_physseg_get_start_hint(psi) == 0 || second_pass) {
    155      1.52      matt 				/*
    156      1.52      matt 				 * We've run past the allowable range.
    157      1.52      matt 				 */
    158      1.52      matt 				return 0; /* FAIL = 0 pages*/
    159      1.52      matt 			}
    160       1.3       mrg 			/*
    161      1.52      matt 			 * We've wrapped around the end of this segment
    162      1.52      matt 			 * so restart at the beginning but now our limit
    163      1.52      matt 			 * is were we started.
    164       1.3       mrg 			 */
    165      1.52      matt 			second_pass = true;
    166  1.90.4.1    martin 			candidate = roundup2(ulmax(low, uvm_physseg_get_avail_start(psi)), alignment);
    167  1.90.4.1    martin 			limit = ulmin(limit, uvm_physseg_get_avail_start(psi) +
    168      1.68    cherry 			    uvm_physseg_get_start_hint(psi));
    169      1.52      matt 			skip = 0;
    170      1.52      matt 			continue;
    171       1.3       mrg 		}
    172      1.24  drochner 		if (boundary != 0 &&
    173      1.66      matt 		    ((candidate ^ (candidate + num - 1)) & pagemask) != 0) {
    174      1.24  drochner 			/*
    175      1.24  drochner 			 * Region crosses boundary. Jump to the boundary
    176      1.24  drochner 			 * just crossed and ensure alignment.
    177      1.24  drochner 			 */
    178      1.66      matt 			candidate = (candidate + num - 1) & pagemask;
    179      1.66      matt 			candidate = roundup2(candidate, alignment);
    180      1.52      matt 			skip = 0;
    181      1.24  drochner 			continue;
    182      1.24  drochner 		}
    183      1.22  drochner #ifdef DEBUG
    184       1.3       mrg 		/*
    185       1.3       mrg 		 * Make sure this is a managed physical page.
    186       1.3       mrg 		 */
    187       1.3       mrg 
    188      1.68    cherry 		if (uvm_physseg_find(candidate, &cidx) != psi)
    189      1.22  drochner 			panic("pgalloc contig: botch1");
    190      1.68    cherry 		if (cidx != candidate - uvm_physseg_get_start(psi))
    191      1.22  drochner 			panic("pgalloc contig: botch2");
    192      1.68    cherry 		if (uvm_physseg_find(candidate + num - 1, &cidx) != psi)
    193      1.22  drochner 			panic("pgalloc contig: botch3");
    194      1.68    cherry 		if (cidx != candidate - uvm_physseg_get_start(psi) + num - 1)
    195      1.31  junyoung 			panic("pgalloc contig: botch4");
    196      1.22  drochner #endif
    197      1.68    cherry 		candidateidx = candidate - uvm_physseg_get_start(psi);
    198      1.66      matt 		end = candidateidx + num;
    199       1.3       mrg 
    200       1.3       mrg 		/*
    201      1.24  drochner 		 * Found a suitable starting page.  See if the range is free.
    202       1.3       mrg 		 */
    203      1.52      matt #ifdef PGALLOC_VERBOSE
    204  1.90.4.1    martin 		printf("%s: psi=%d candidate=%#lx end=%#lx skip=%#x, align=%#"PRIxPADDR,
    205      1.80       rin 		    __func__, psi, candidateidx, end, skip, alignment);
    206      1.52      matt #endif
    207      1.52      matt 		/*
    208      1.52      matt 		 * We start at the end and work backwards since if we find a
    209      1.52      matt 		 * non-free page, it makes no sense to continue.
    210      1.52      matt 		 *
    211      1.52      matt 		 * But on the plus size we have "vetted" some number of free
    212      1.52      matt 		 * pages.  If this iteration fails, we may be able to skip
    213      1.52      matt 		 * testing most of those pages again in the next pass.
    214      1.52      matt 		 */
    215      1.66      matt 		for (idx = end - 1; idx >= candidateidx + skip; idx--) {
    216      1.68    cherry 			if (VM_PAGE_IS_FREE(uvm_physseg_get_pg(psi, idx)) == 0) {
    217      1.52      matt 				ok = false;
    218       1.3       mrg 				break;
    219      1.52      matt 			}
    220      1.24  drochner 
    221      1.24  drochner #ifdef DEBUG
    222      1.66      matt 			if (idx > candidateidx) {
    223      1.68    cherry 				idxpa = VM_PAGE_TO_PHYS(uvm_physseg_get_pg(psi, idx));
    224      1.68    cherry 				lastidxpa = VM_PAGE_TO_PHYS(uvm_physseg_get_pg(psi, idx - 1));
    225      1.12       chs 				if ((lastidxpa + PAGE_SIZE) != idxpa) {
    226       1.3       mrg 					/*
    227       1.3       mrg 					 * Region not contiguous.
    228       1.3       mrg 					 */
    229      1.22  drochner 					panic("pgalloc contig: botch5");
    230       1.3       mrg 				}
    231       1.3       mrg 				if (boundary != 0 &&
    232      1.24  drochner 				    ((lastidxpa ^ idxpa) & ~(boundary - 1))
    233      1.24  drochner 				    != 0) {
    234       1.3       mrg 					/*
    235       1.3       mrg 					 * Region crosses boundary.
    236       1.3       mrg 					 */
    237      1.24  drochner 					panic("pgalloc contig: botch6");
    238       1.3       mrg 				}
    239       1.3       mrg 			}
    240      1.24  drochner #endif
    241       1.3       mrg 		}
    242      1.52      matt 
    243      1.52      matt 		if (ok) {
    244      1.52      matt 			while (skip-- > 0) {
    245      1.68    cherry 				KDASSERT(VM_PAGE_IS_FREE(uvm_physseg_get_pg(psi, candidateidx + skip)));
    246      1.52      matt 			}
    247      1.52      matt #ifdef PGALLOC_VERBOSE
    248      1.52      matt 			printf(": ok\n");
    249      1.52      matt #endif
    250       1.3       mrg 			break;
    251      1.52      matt 		}
    252      1.24  drochner 
    253      1.52      matt #ifdef PGALLOC_VERBOSE
    254      1.66      matt 		printf(": non-free at %#x\n", idx - candidateidx);
    255      1.52      matt #endif
    256      1.52      matt 		/*
    257      1.52      matt 		 * count the number of pages we can advance
    258      1.52      matt 		 * since we know they aren't all free.
    259      1.52      matt 		 */
    260      1.66      matt 		cnt = idx + 1 - candidateidx;
    261      1.52      matt 		/*
    262      1.52      matt 		 * now round up that to the needed alignment.
    263      1.52      matt 		 */
    264      1.52      matt 		cnt = roundup2(cnt, alignment);
    265      1.52      matt 		/*
    266      1.87     skrll 		 * The number of pages we can skip checking
    267      1.52      matt 		 * (might be 0 if cnt > num).
    268      1.52      matt 		 */
    269      1.71  riastrad 		skip = uimax(num - cnt, 0);
    270      1.66      matt 		candidate += cnt;
    271       1.1       mrg 	}
    272       1.1       mrg 
    273       1.3       mrg 	/*
    274       1.3       mrg 	 * we have a chunk of memory that conforms to the requested constraints.
    275       1.3       mrg 	 */
    276      1.68    cherry 	for (idx = candidateidx; idx < end; idx++)
    277      1.68    cherry 		uvm_pglist_add(uvm_physseg_get_pg(psi, idx), rlist);
    278      1.52      matt 
    279      1.52      matt 	/*
    280      1.52      matt 	 * the next time we need to search this segment, start after this
    281      1.52      matt 	 * chunk of pages we just allocated.
    282      1.52      matt 	 */
    283      1.68    cherry 	uvm_physseg_set_start_hint(psi, candidate + num -
    284      1.68    cherry 	    uvm_physseg_get_avail_start(psi));
    285      1.68    cherry 	KASSERTMSG(uvm_physseg_get_start_hint(psi) <=
    286      1.68    cherry 	    uvm_physseg_get_avail_end(psi) - uvm_physseg_get_avail_start(psi),
    287  1.90.4.1    martin 	    "%lx %lu (%#lx) <= %#"PRIxPADDR" - %#"PRIxPADDR" (%#"PRIxPADDR")",
    288      1.66      matt 	    candidate + num,
    289      1.68    cherry 	    uvm_physseg_get_start_hint(psi), uvm_physseg_get_start_hint(psi),
    290      1.68    cherry 	    uvm_physseg_get_avail_end(psi), uvm_physseg_get_avail_start(psi),
    291      1.68    cherry 	    uvm_physseg_get_avail_end(psi) - uvm_physseg_get_avail_start(psi));
    292      1.24  drochner 
    293      1.24  drochner #ifdef PGALLOC_VERBOSE
    294      1.24  drochner 	printf("got %d pgs\n", num);
    295      1.24  drochner #endif
    296      1.52      matt 	return num; /* number of pages allocated */
    297      1.22  drochner }
    298      1.22  drochner 
    299      1.22  drochner static int
    300      1.86       chs uvm_pglistalloc_contig_aggressive(int num, paddr_t low, paddr_t high,
    301      1.86       chs     paddr_t alignment, paddr_t boundary, struct pglist *rlist)
    302      1.86       chs {
    303      1.86       chs 	struct vm_page *pg;
    304      1.86       chs 	struct pglist tmp;
    305      1.86       chs 	paddr_t pa, off, spa, amask, bmask, rlo, rhi;
    306      1.86       chs 	uvm_physseg_t upm;
    307      1.86       chs 	int error, i, run, acnt;
    308      1.86       chs 
    309      1.86       chs 	/*
    310      1.86       chs 	 * Allocate pages the normal way and for each new page, check if
    311      1.86       chs 	 * the page completes a range satisfying the request.
    312      1.86       chs 	 * The pagedaemon will evict pages as we go and we are very likely
    313      1.86       chs 	 * to get compatible pages eventually.
    314      1.86       chs 	 */
    315      1.86       chs 
    316      1.86       chs 	error = ENOMEM;
    317      1.86       chs 	TAILQ_INIT(&tmp);
    318      1.86       chs 	acnt = atop(alignment);
    319      1.86       chs 	amask = ~(alignment - 1);
    320      1.86       chs 	bmask = ~(boundary - 1);
    321      1.86       chs 	KASSERT(bmask <= amask);
    322      1.86       chs 	mutex_enter(&uvm_pglistalloc_contig_lock);
    323      1.86       chs 	while (uvm_reclaimable()) {
    324      1.86       chs 		pg = uvm_pagealloc(NULL, 0, NULL, 0);
    325      1.86       chs 		if (pg == NULL) {
    326      1.86       chs 			uvm_wait("pglac2");
    327      1.86       chs 			continue;
    328      1.86       chs 		}
    329      1.86       chs 		pg->flags |= PG_PGLCA;
    330      1.86       chs 		TAILQ_INSERT_HEAD(&tmp, pg, pageq.queue);
    331      1.86       chs 
    332      1.86       chs 		pa = VM_PAGE_TO_PHYS(pg);
    333      1.86       chs 		if (pa < low || pa >= high) {
    334      1.86       chs 			continue;
    335      1.86       chs 		}
    336      1.86       chs 
    337      1.86       chs 		upm = uvm_physseg_find(atop(pa), &off);
    338      1.86       chs 		KASSERT(uvm_physseg_valid_p(upm));
    339      1.86       chs 
    340      1.86       chs 		spa = pa & amask;
    341      1.86       chs 
    342      1.86       chs 		/*
    343      1.86       chs 		 * Look backward for at most num - 1 pages, back to
    344      1.86       chs 		 * the highest of:
    345      1.86       chs 		 *  - the first page in the physseg
    346      1.86       chs 		 *  - the specified low address
    347      1.86       chs 		 *  - num-1 pages before the one we just allocated
    348      1.86       chs 		 *  - the start of the boundary range containing pa
    349      1.86       chs 		 * all rounded up to alignment.
    350      1.86       chs 		 */
    351      1.86       chs 
    352      1.86       chs 		rlo = roundup2(ptoa(uvm_physseg_get_avail_start(upm)), alignment);
    353      1.86       chs 		rlo = MAX(rlo, roundup2(low, alignment));
    354      1.86       chs 		rlo = MAX(rlo, roundup2(pa - ptoa(num - 1), alignment));
    355      1.86       chs 		if (boundary) {
    356      1.86       chs 			rlo = MAX(rlo, spa & bmask);
    357      1.86       chs 		}
    358      1.86       chs 
    359      1.86       chs 		/*
    360      1.86       chs 		 * Look forward as far as the lowest of:
    361      1.86       chs 		 *  - the last page of the physseg
    362      1.86       chs 		 *  - the specified high address
    363      1.86       chs 		 *  - the boundary after pa
    364      1.86       chs 		 */
    365      1.86       chs 
    366      1.86       chs 		rhi = ptoa(uvm_physseg_get_avail_end(upm));
    367      1.86       chs 		rhi = MIN(rhi, high);
    368      1.86       chs 		if (boundary) {
    369      1.86       chs 			rhi = MIN(rhi, rounddown2(pa, boundary) + boundary);
    370      1.86       chs 		}
    371      1.86       chs 
    372      1.86       chs 		/*
    373      1.86       chs 		 * Make sure our range to consider is big enough.
    374      1.86       chs 		 */
    375      1.86       chs 
    376      1.86       chs 		if (rhi - rlo < ptoa(num)) {
    377      1.86       chs 			continue;
    378      1.86       chs 		}
    379      1.86       chs 
    380      1.86       chs 		run = 0;
    381      1.86       chs 		while (spa > rlo) {
    382      1.86       chs 
    383      1.86       chs 			/*
    384      1.86       chs 			 * Examine pages before spa in groups of acnt.
    385      1.86       chs 			 * If all the pages in a group are marked then add
    386      1.86       chs 			 * these pages to the run.
    387      1.86       chs 			 */
    388      1.86       chs 
    389      1.86       chs 			for (i = 0; i < acnt; i++) {
    390      1.86       chs 				pg = PHYS_TO_VM_PAGE(spa - alignment + ptoa(i));
    391      1.86       chs 				if ((pg->flags & PG_PGLCA) == 0) {
    392      1.86       chs 					break;
    393      1.86       chs 				}
    394      1.86       chs 			}
    395      1.86       chs 			if (i < acnt) {
    396      1.86       chs 				break;
    397      1.86       chs 			}
    398      1.86       chs 			spa -= alignment;
    399      1.86       chs 			run += acnt;
    400      1.86       chs 		}
    401      1.86       chs 
    402      1.86       chs 		/*
    403      1.86       chs 		 * Look forward for any remaining pages.
    404      1.86       chs 		 */
    405      1.86       chs 
    406      1.88       chs 		if (spa + ptoa(num) > rhi) {
    407      1.88       chs 			continue;
    408      1.88       chs 		}
    409      1.86       chs 		for (; run < num; run++) {
    410      1.86       chs 			pg = PHYS_TO_VM_PAGE(spa + ptoa(run));
    411      1.86       chs 			if ((pg->flags & PG_PGLCA) == 0) {
    412      1.86       chs 				break;
    413      1.86       chs 			}
    414      1.86       chs 		}
    415      1.86       chs 		if (run < num) {
    416      1.86       chs 			continue;
    417      1.86       chs 		}
    418      1.86       chs 
    419      1.86       chs 		/*
    420      1.86       chs 		 * We found a match.  Move these pages from the tmp list to
    421      1.86       chs 		 * the caller's list.
    422      1.86       chs 		 */
    423      1.86       chs 
    424      1.86       chs 		for (i = 0; i < num; i++) {
    425      1.86       chs 			pg = PHYS_TO_VM_PAGE(spa + ptoa(i));
    426      1.86       chs 			TAILQ_REMOVE(&tmp, pg, pageq.queue);
    427      1.86       chs 			pg->flags &= ~PG_PGLCA;
    428      1.86       chs 			TAILQ_INSERT_TAIL(rlist, pg, pageq.queue);
    429      1.86       chs 			STAT_INCR(uvm_pglistalloc_npages);
    430      1.86       chs 		}
    431      1.86       chs 
    432      1.86       chs 		error = 0;
    433      1.86       chs 		break;
    434      1.86       chs 	}
    435      1.86       chs 
    436      1.86       chs 	/*
    437      1.86       chs 	 * Free all the pages that we didn't need.
    438      1.86       chs 	 */
    439      1.86       chs 
    440      1.86       chs 	while (!TAILQ_EMPTY(&tmp)) {
    441      1.86       chs 		pg = TAILQ_FIRST(&tmp);
    442      1.86       chs 		TAILQ_REMOVE(&tmp, pg, pageq.queue);
    443      1.86       chs 		pg->flags &= ~PG_PGLCA;
    444      1.86       chs 		uvm_pagefree(pg);
    445      1.86       chs 	}
    446      1.86       chs 	mutex_exit(&uvm_pglistalloc_contig_lock);
    447      1.86       chs 	return error;
    448      1.86       chs }
    449      1.86       chs 
    450      1.86       chs static int
    451      1.33   thorpej uvm_pglistalloc_contig(int num, paddr_t low, paddr_t high, paddr_t alignment,
    452      1.86       chs     paddr_t boundary, struct pglist *rlist, int waitok)
    453      1.22  drochner {
    454      1.68    cherry 	int fl;
    455      1.38        ad 	int error;
    456      1.86       chs 	uvm_physseg_t psi;
    457      1.22  drochner 
    458      1.22  drochner 	/* Default to "lose". */
    459      1.22  drochner 	error = ENOMEM;
    460      1.90     skrll 	bool valid = false;
    461      1.22  drochner 
    462      1.22  drochner 	/*
    463      1.22  drochner 	 * Block all memory allocation and lock the free list.
    464      1.22  drochner 	 */
    465      1.78        ad 	uvm_pgfl_lock();
    466      1.22  drochner 
    467      1.22  drochner 	/* Are there even any free pages? */
    468      1.83        ad 	if (uvm_availmem(false) <=
    469      1.79        ad 	    (uvmexp.reserve_pagedaemon + uvmexp.reserve_kernel))
    470      1.22  drochner 		goto out;
    471      1.22  drochner 
    472      1.22  drochner 	for (fl = 0; fl < VM_NFREELIST; fl++) {
    473      1.22  drochner #if (VM_PHYSSEG_STRAT == VM_PSTRAT_BIGFIRST)
    474      1.68    cherry 		for (psi = uvm_physseg_get_last(); uvm_physseg_valid_p(psi); psi = uvm_physseg_get_prev(psi))
    475      1.22  drochner #else
    476      1.68    cherry 		for (psi = uvm_physseg_get_first(); uvm_physseg_valid_p(psi); psi = uvm_physseg_get_next(psi))
    477      1.22  drochner #endif
    478      1.22  drochner 		{
    479      1.68    cherry 			if (uvm_physseg_get_free_list(psi) != fl)
    480      1.22  drochner 				continue;
    481      1.22  drochner 
    482      1.90     skrll 			int done = uvm_pglistalloc_c_ps(psi, num, low, high,
    483      1.90     skrll 			    alignment, boundary, rlist);
    484      1.90     skrll 			if (done >= 0) {
    485      1.90     skrll 				valid = true;
    486      1.90     skrll 				num -= done;
    487      1.90     skrll 			}
    488      1.24  drochner 			if (num == 0) {
    489      1.24  drochner #ifdef PGALLOC_VERBOSE
    490      1.44   reinoud 				printf("pgalloc: %"PRIxMAX"-%"PRIxMAX"\n",
    491      1.44   reinoud 				       (uintmax_t) VM_PAGE_TO_PHYS(TAILQ_FIRST(rlist)),
    492      1.44   reinoud 				       (uintmax_t) VM_PAGE_TO_PHYS(TAILQ_LAST(rlist, pglist)));
    493      1.22  drochner #endif
    494      1.22  drochner 				error = 0;
    495      1.22  drochner 				goto out;
    496      1.22  drochner 			}
    497      1.22  drochner 		}
    498      1.22  drochner 	}
    499      1.90     skrll 	if (!valid) {
    500      1.90     skrll 		uvm_pgfl_unlock();
    501      1.90     skrll 		return EINVAL;
    502      1.90     skrll 	}
    503      1.20  drochner 
    504      1.20  drochner out:
    505      1.86       chs 	uvm_pgfl_unlock();
    506      1.86       chs 
    507      1.20  drochner 	/*
    508      1.86       chs 	 * If that didn't work, try the more aggressive approach.
    509      1.20  drochner 	 */
    510      1.87     skrll 
    511      1.86       chs 	if (error) {
    512      1.86       chs 		if (waitok) {
    513      1.86       chs 			error = uvm_pglistalloc_contig_aggressive(num, low, high,
    514      1.86       chs 			    alignment, boundary, rlist);
    515      1.86       chs 		} else {
    516      1.86       chs 			uvm_pglistfree(rlist);
    517      1.86       chs 			uvm_kick_pdaemon();
    518      1.86       chs 		}
    519      1.86       chs 	}
    520      1.86       chs 	return error;
    521      1.20  drochner }
    522      1.20  drochner 
    523      1.24  drochner static int
    524      1.68    cherry uvm_pglistalloc_s_ps(uvm_physseg_t psi, int num, paddr_t low, paddr_t high,
    525      1.33   thorpej     struct pglist *rlist)
    526      1.22  drochner {
    527  1.90.4.1    martin 	int todo;
    528  1.90.4.1    martin 	long limit, candidate;
    529      1.22  drochner 	struct vm_page *pg;
    530      1.52      matt 	bool second_pass;
    531      1.24  drochner #ifdef PGALLOC_VERBOSE
    532      1.86       chs 	printf("pgalloc: simple %d pgs from psi %d\n", num, psi);
    533      1.24  drochner #endif
    534      1.22  drochner 
    535      1.68    cherry 	KASSERT(uvm_physseg_get_start(psi) <= uvm_physseg_get_avail_start(psi));
    536      1.68    cherry 	KASSERT(uvm_physseg_get_start(psi) <= uvm_physseg_get_avail_end(psi));
    537      1.68    cherry 	KASSERT(uvm_physseg_get_avail_start(psi) <= uvm_physseg_get_end(psi));
    538      1.68    cherry 	KASSERT(uvm_physseg_get_avail_end(psi) <= uvm_physseg_get_end(psi));
    539      1.39        ad 
    540      1.52      matt 	low = atop(low);
    541      1.52      matt 	high = atop(high);
    542      1.52      matt 
    543      1.57      matt 	/*
    544      1.57      matt 	 * Make sure that physseg falls within with range to be allocated from.
    545      1.57      matt 	 */
    546      1.68    cherry 	if (high <= uvm_physseg_get_avail_start(psi) ||
    547      1.68    cherry 	    low >= uvm_physseg_get_avail_end(psi))
    548      1.90     skrll 		return -1;
    549      1.57      matt 
    550      1.89     skrll 	todo = num;
    551  1.90.4.1    martin 	candidate = ulmax(low, uvm_physseg_get_avail_start(psi) +
    552      1.89     skrll 	    uvm_physseg_get_start_hint(psi));
    553  1.90.4.1    martin 	limit = ulmin(high, uvm_physseg_get_avail_end(psi));
    554      1.89     skrll 	pg = uvm_physseg_get_pg(psi, candidate - uvm_physseg_get_start(psi));
    555      1.89     skrll 	second_pass = false;
    556      1.89     skrll 
    557      1.60     enami again:
    558      1.66      matt 	for (;; candidate++, pg++) {
    559      1.66      matt 		if (candidate >= limit) {
    560      1.68    cherry 			if (uvm_physseg_get_start_hint(psi) == 0 || second_pass) {
    561      1.66      matt 				candidate = limit - 1;
    562      1.52      matt 				break;
    563      1.57      matt 			}
    564      1.52      matt 			second_pass = true;
    565  1.90.4.1    martin 			candidate = ulmax(low, uvm_physseg_get_avail_start(psi));
    566  1.90.4.1    martin 			limit = ulmin(limit, uvm_physseg_get_avail_start(psi) +
    567      1.68    cherry 			    uvm_physseg_get_start_hint(psi));
    568      1.68    cherry 			pg = uvm_physseg_get_pg(psi, candidate - uvm_physseg_get_start(psi));
    569      1.60     enami 			goto again;
    570      1.52      matt 		}
    571      1.58      matt #if defined(DEBUG)
    572      1.54      matt 		{
    573      1.68    cherry 			paddr_t cidx = 0;
    574      1.68    cherry 			const uvm_physseg_t bank = uvm_physseg_find(candidate, &cidx);
    575      1.68    cherry 			KDASSERTMSG(bank == psi,
    576  1.90.4.1    martin 			    "uvm_physseg_find(%#lx) (%"PRIxPHYSSEG ") != psi %"PRIxPHYSSEG,
    577      1.68    cherry 			     candidate, bank, psi);
    578      1.68    cherry 			KDASSERTMSG(cidx == candidate - uvm_physseg_get_start(psi),
    579  1.90.4.1    martin 			    "uvm_physseg_find(%#lx): %#"PRIxPADDR" != off %"PRIxPADDR,
    580  1.90.4.1    martin 			     candidate, cidx, (paddr_t)candidate - uvm_physseg_get_start(psi));
    581      1.54      matt 		}
    582      1.22  drochner #endif
    583      1.22  drochner 		if (VM_PAGE_IS_FREE(pg) == 0)
    584      1.22  drochner 			continue;
    585      1.22  drochner 
    586      1.22  drochner 		uvm_pglist_add(pg, rlist);
    587      1.52      matt 		if (--todo == 0) {
    588      1.22  drochner 			break;
    589      1.52      matt 		}
    590      1.22  drochner 	}
    591      1.24  drochner 
    592      1.52      matt 	/*
    593      1.52      matt 	 * The next time we need to search this segment,
    594      1.52      matt 	 * start just after the pages we just allocated.
    595      1.52      matt 	 */
    596      1.68    cherry 	uvm_physseg_set_start_hint(psi, candidate + 1 - uvm_physseg_get_avail_start(psi));
    597      1.68    cherry 	KASSERTMSG(uvm_physseg_get_start_hint(psi) <= uvm_physseg_get_avail_end(psi) -
    598      1.68    cherry 	    uvm_physseg_get_avail_start(psi),
    599  1.90.4.1    martin 	    "%#lx %lu (%#lx) <= %#"PRIxPADDR" - %#"PRIxPADDR" (%#"PRIxPADDR")",
    600      1.66      matt 	    candidate + 1,
    601      1.68    cherry 	    uvm_physseg_get_start_hint(psi),
    602      1.68    cherry 	    uvm_physseg_get_start_hint(psi),
    603      1.68    cherry 	    uvm_physseg_get_avail_end(psi),
    604      1.68    cherry 	    uvm_physseg_get_avail_start(psi),
    605      1.68    cherry 	    uvm_physseg_get_avail_end(psi) - uvm_physseg_get_avail_start(psi));
    606      1.52      matt 
    607      1.24  drochner #ifdef PGALLOC_VERBOSE
    608      1.24  drochner 	printf("got %d pgs\n", num - todo);
    609      1.24  drochner #endif
    610      1.24  drochner 	return (num - todo); /* number of pages allocated */
    611      1.22  drochner }
    612      1.22  drochner 
    613      1.20  drochner static int
    614      1.33   thorpej uvm_pglistalloc_simple(int num, paddr_t low, paddr_t high,
    615      1.33   thorpej     struct pglist *rlist, int waitok)
    616      1.20  drochner {
    617      1.68    cherry 	int fl, error;
    618      1.68    cherry 	uvm_physseg_t psi;
    619      1.72       mrg 	int count = 0;
    620      1.20  drochner 
    621      1.20  drochner 	/* Default to "lose". */
    622      1.20  drochner 	error = ENOMEM;
    623      1.90     skrll 	bool valid = false;
    624      1.20  drochner 
    625      1.20  drochner again:
    626      1.20  drochner 	/*
    627      1.20  drochner 	 * Block all memory allocation and lock the free list.
    628      1.20  drochner 	 */
    629      1.78        ad 	uvm_pgfl_lock();
    630      1.72       mrg 	count++;
    631      1.20  drochner 
    632      1.20  drochner 	/* Are there even any free pages? */
    633      1.83        ad 	if (uvm_availmem(false) <=
    634      1.79        ad 	    (uvmexp.reserve_pagedaemon + uvmexp.reserve_kernel))
    635      1.20  drochner 		goto out;
    636      1.20  drochner 
    637      1.22  drochner 	for (fl = 0; fl < VM_NFREELIST; fl++) {
    638      1.22  drochner #if (VM_PHYSSEG_STRAT == VM_PSTRAT_BIGFIRST)
    639      1.68    cherry 		for (psi = uvm_physseg_get_last(); uvm_physseg_valid_p(psi); psi = uvm_physseg_get_prev(psi))
    640      1.22  drochner #else
    641      1.68    cherry 		for (psi = uvm_physseg_get_first(); uvm_physseg_valid_p(psi); psi = uvm_physseg_get_next(psi))
    642      1.22  drochner #endif
    643      1.22  drochner 		{
    644      1.68    cherry 			if (uvm_physseg_get_free_list(psi) != fl)
    645      1.22  drochner 				continue;
    646      1.22  drochner 
    647      1.90     skrll 			int done = uvm_pglistalloc_s_ps(psi, num, low, high,
    648      1.90     skrll                             rlist);
    649      1.90     skrll 			if (done >= 0) {
    650      1.90     skrll 				valid = true;
    651      1.90     skrll 				num -= done;
    652      1.90     skrll 			}
    653      1.24  drochner 			if (num == 0) {
    654      1.22  drochner 				error = 0;
    655      1.22  drochner 				goto out;
    656      1.22  drochner 			}
    657      1.22  drochner 		}
    658      1.20  drochner 
    659       1.3       mrg 	}
    660      1.90     skrll 	if (!valid) {
    661      1.90     skrll 		uvm_pgfl_unlock();
    662      1.90     skrll 		return EINVAL;
    663      1.90     skrll 	}
    664       1.1       mrg 
    665       1.1       mrg out:
    666       1.3       mrg 	/*
    667       1.3       mrg 	 * check to see if we need to generate some free pages waking
    668       1.3       mrg 	 * the pagedaemon.
    669       1.3       mrg 	 */
    670      1.15       chs 
    671      1.78        ad 	uvm_pgfl_unlock();
    672      1.36      yamt 	uvm_kick_pdaemon();
    673      1.38        ad 
    674      1.20  drochner 	if (error) {
    675      1.20  drochner 		if (waitok) {
    676      1.20  drochner 			uvm_wait("pglalloc");
    677      1.20  drochner 			goto again;
    678      1.20  drochner 		} else
    679      1.20  drochner 			uvm_pglistfree(rlist);
    680      1.20  drochner 	}
    681      1.24  drochner #ifdef PGALLOC_VERBOSE
    682      1.22  drochner 	if (!error)
    683      1.44   reinoud 		printf("pgalloc: %"PRIxMAX"..%"PRIxMAX"\n",
    684      1.44   reinoud 		       (uintmax_t) VM_PAGE_TO_PHYS(TAILQ_FIRST(rlist)),
    685      1.44   reinoud 		       (uintmax_t) VM_PAGE_TO_PHYS(TAILQ_LAST(rlist, pglist)));
    686      1.22  drochner #endif
    687       1.3       mrg 	return (error);
    688      1.20  drochner }
    689      1.20  drochner 
    690      1.20  drochner int
    691      1.33   thorpej uvm_pglistalloc(psize_t size, paddr_t low, paddr_t high, paddr_t alignment,
    692      1.33   thorpej     paddr_t boundary, struct pglist *rlist, int nsegs, int waitok)
    693      1.20  drochner {
    694      1.24  drochner 	int num, res;
    695      1.20  drochner 
    696      1.81        ad 	KASSERT(!cpu_intr_p());
    697      1.81        ad 	KASSERT(!cpu_softintr_p());
    698      1.20  drochner 	KASSERT((alignment & (alignment - 1)) == 0);
    699      1.20  drochner 	KASSERT((boundary & (boundary - 1)) == 0);
    700      1.20  drochner 
    701      1.20  drochner 	/*
    702      1.20  drochner 	 * Our allocations are always page granularity, so our alignment
    703      1.20  drochner 	 * must be, too.
    704      1.20  drochner 	 */
    705      1.20  drochner 	if (alignment < PAGE_SIZE)
    706      1.20  drochner 		alignment = PAGE_SIZE;
    707      1.24  drochner 	if (boundary != 0 && boundary < size)
    708      1.24  drochner 		return (EINVAL);
    709      1.24  drochner 	num = atop(round_page(size));
    710      1.52      matt 	low = roundup2(low, alignment);
    711      1.21  drochner 
    712      1.21  drochner 	TAILQ_INIT(rlist);
    713      1.20  drochner 
    714      1.78        ad 	/*
    715      1.78        ad 	 * Turn off the caching of free pages - we need everything to be on
    716      1.78        ad 	 * the global freelists.
    717      1.78        ad 	 */
    718      1.78        ad 	uvm_pgflcache_pause();
    719      1.78        ad 
    720      1.86       chs 	if (nsegs < num || alignment != PAGE_SIZE || boundary != 0)
    721      1.24  drochner 		res = uvm_pglistalloc_contig(num, low, high, alignment,
    722      1.86       chs 					     boundary, rlist, waitok);
    723      1.20  drochner 	else
    724      1.24  drochner 		res = uvm_pglistalloc_simple(num, low, high, rlist, waitok);
    725      1.20  drochner 
    726      1.78        ad 	uvm_pgflcache_resume();
    727      1.78        ad 
    728      1.20  drochner 	return (res);
    729       1.1       mrg }
    730       1.1       mrg 
    731       1.1       mrg /*
    732       1.1       mrg  * uvm_pglistfree: free a list of pages
    733       1.1       mrg  *
    734       1.1       mrg  * => pages should already be unmapped
    735       1.1       mrg  */
    736       1.1       mrg 
    737       1.3       mrg void
    738      1.33   thorpej uvm_pglistfree(struct pglist *list)
    739       1.1       mrg {
    740      1.18       chs 	struct vm_page *pg;
    741       1.1       mrg 
    742      1.81        ad 	KASSERT(!cpu_intr_p());
    743      1.81        ad 	KASSERT(!cpu_softintr_p());
    744      1.81        ad 
    745      1.18       chs 	while ((pg = TAILQ_FIRST(list)) != NULL) {
    746      1.42        ad 		TAILQ_REMOVE(list, pg, pageq.queue);
    747      1.82        ad 		uvm_pagefree(pg);
    748       1.3       mrg 		STAT_DECR(uvm_pglistalloc_npages);
    749       1.3       mrg 	}
    750       1.1       mrg }
    751      1.86       chs 
    752      1.86       chs void
    753      1.86       chs uvm_pglistalloc_init(void)
    754      1.86       chs {
    755      1.86       chs 
    756      1.86       chs 	mutex_init(&uvm_pglistalloc_contig_lock, MUTEX_DEFAULT, IPL_NONE);
    757      1.86       chs }
    758