Home | History | Annotate | Line # | Download | only in uvm
uvm_pglist.c revision 1.86.2.1
      1  1.86.2.1   thorpej /*	$NetBSD: uvm_pglist.c,v 1.86.2.1 2021/04/03 22:29:03 thorpej Exp $	*/
      2      1.45    nonaka 
      3       1.1       mrg /*-
      4      1.78        ad  * Copyright (c) 1997, 2019 The NetBSD Foundation, Inc.
      5       1.1       mrg  * All rights reserved.
      6      1.15       chs  *
      7       1.1       mrg  * This code is derived from software contributed to The NetBSD Foundation
      8       1.1       mrg  * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
      9      1.78        ad  * NASA Ames Research Center, and by Andrew Doran.
     10       1.1       mrg  *
     11       1.1       mrg  * Redistribution and use in source and binary forms, with or without
     12       1.1       mrg  * modification, are permitted provided that the following conditions
     13       1.1       mrg  * are met:
     14       1.1       mrg  * 1. Redistributions of source code must retain the above copyright
     15       1.1       mrg  *    notice, this list of conditions and the following disclaimer.
     16      1.15       chs  * 2. Redistributions in binary form must reproduce the above copyright
     17       1.1       mrg  *    notice, this list of conditions and the following disclaimer in the
     18       1.1       mrg  *    documentation and/or other materials provided with the distribution.
     19      1.15       chs  *
     20       1.1       mrg  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     21       1.1       mrg  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     22       1.1       mrg  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     23       1.1       mrg  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     24       1.1       mrg  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     25       1.1       mrg  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     26       1.1       mrg  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     27       1.1       mrg  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     28       1.1       mrg  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     29       1.1       mrg  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     30       1.1       mrg  * POSSIBILITY OF SUCH DAMAGE.
     31       1.1       mrg  */
     32       1.1       mrg 
     33       1.1       mrg /*
     34       1.1       mrg  * uvm_pglist.c: pglist functions
     35       1.1       mrg  */
     36      1.19     lukem 
     37      1.19     lukem #include <sys/cdefs.h>
     38  1.86.2.1   thorpej __KERNEL_RCSID(0, "$NetBSD: uvm_pglist.c,v 1.86.2.1 2021/04/03 22:29:03 thorpej Exp $");
     39       1.1       mrg 
     40       1.1       mrg #include <sys/param.h>
     41       1.1       mrg #include <sys/systm.h>
     42      1.81        ad #include <sys/cpu.h>
     43       1.1       mrg 
     44       1.1       mrg #include <uvm/uvm.h>
     45      1.36      yamt #include <uvm/uvm_pdpolicy.h>
     46      1.78        ad #include <uvm/uvm_pgflcache.h>
     47       1.1       mrg 
     48       1.1       mrg #ifdef VM_PAGE_ALLOC_MEMORY_STATS
     49       1.1       mrg #define	STAT_INCR(v)	(v)++
     50       1.1       mrg #define	STAT_DECR(v)	do { \
     51       1.1       mrg 		if ((v) == 0) \
     52       1.1       mrg 			printf("%s:%d -- Already 0!\n", __FILE__, __LINE__); \
     53       1.1       mrg 		else \
     54       1.1       mrg 			(v)--; \
     55      1.25     perry 	} while (/*CONSTCOND*/ 0)
     56       1.1       mrg u_long	uvm_pglistalloc_npages;
     57       1.1       mrg #else
     58       1.1       mrg #define	STAT_INCR(v)
     59       1.1       mrg #define	STAT_DECR(v)
     60       1.1       mrg #endif
     61       1.1       mrg 
     62      1.86       chs kmutex_t uvm_pglistalloc_contig_lock;
     63      1.86       chs 
     64       1.1       mrg /*
     65       1.1       mrg  * uvm_pglistalloc: allocate a list of pages
     66       1.1       mrg  *
     67      1.27  drochner  * => allocated pages are placed onto an rlist.  rlist is
     68      1.27  drochner  *    initialized by uvm_pglistalloc.
     69       1.1       mrg  * => returns 0 on success or errno on failure
     70      1.27  drochner  * => implementation allocates a single segment if any constraints are
     71      1.27  drochner  *	imposed by call arguments.
     72       1.1       mrg  * => doesn't take into account clean non-busy pages on inactive list
     73       1.1       mrg  *	that could be used(?)
     74       1.1       mrg  * => params:
     75       1.1       mrg  *	size		the size of the allocation, rounded to page size.
     76       1.1       mrg  *	low		the low address of the allowed allocation range.
     77       1.1       mrg  *	high		the high address of the allowed allocation range.
     78       1.1       mrg  *	alignment	memory must be aligned to this power-of-two boundary.
     79      1.15       chs  *	boundary	no segment in the allocation may cross this
     80       1.1       mrg  *			power-of-two boundary (relative to zero).
     81       1.1       mrg  */
     82       1.1       mrg 
     83      1.20  drochner static void
     84      1.33   thorpej uvm_pglist_add(struct vm_page *pg, struct pglist *rlist)
     85      1.20  drochner {
     86      1.78        ad 	struct pgfreelist *pgfl;
     87      1.78        ad 	struct pgflbucket *pgb;
     88      1.20  drochner 
     89      1.78        ad 	pgfl = &uvm.page_free[uvm_page_get_freelist(pg)];
     90      1.78        ad 	pgb = pgfl->pgfl_buckets[uvm_page_get_bucket(pg)];
     91      1.39        ad 
     92      1.67  christos #ifdef UVMDEBUG
     93      1.52      matt 	struct vm_page *tp;
     94      1.78        ad 	LIST_FOREACH(tp, &pgb->pgb_colors[VM_PGCOLOR(pg)], pageq.list) {
     95      1.20  drochner 		if (tp == pg)
     96      1.20  drochner 			break;
     97      1.20  drochner 	}
     98      1.20  drochner 	if (tp == NULL)
     99      1.20  drochner 		panic("uvm_pglistalloc: page not on freelist");
    100      1.20  drochner #endif
    101      1.78        ad 	LIST_REMOVE(pg, pageq.list);
    102      1.78        ad 	pgb->pgb_nfree--;
    103      1.84        ad     	CPU_COUNT(CPU_COUNT_FREEPAGES, -1);
    104      1.20  drochner 	pg->flags = PG_CLEAN;
    105      1.20  drochner 	pg->uobject = NULL;
    106      1.20  drochner 	pg->uanon = NULL;
    107      1.42        ad 	TAILQ_INSERT_TAIL(rlist, pg, pageq.queue);
    108      1.20  drochner 	STAT_INCR(uvm_pglistalloc_npages);
    109      1.20  drochner }
    110      1.20  drochner 
    111      1.20  drochner static int
    112      1.68    cherry uvm_pglistalloc_c_ps(uvm_physseg_t psi, int num, paddr_t low, paddr_t high,
    113      1.33   thorpej     paddr_t alignment, paddr_t boundary, struct pglist *rlist)
    114       1.1       mrg {
    115      1.66      matt 	signed int candidate, limit, candidateidx, end, idx, skip;
    116      1.24  drochner 	int pagemask;
    117      1.52      matt 	bool second_pass;
    118      1.24  drochner #ifdef DEBUG
    119      1.22  drochner 	paddr_t idxpa, lastidxpa;
    120      1.68    cherry 	paddr_t cidx = 0;	/* XXX: GCC */
    121      1.22  drochner #endif
    122      1.24  drochner #ifdef PGALLOC_VERBOSE
    123      1.80       rin 	printf("pgalloc: contig %d pgs from psi %d\n", num, psi);
    124      1.24  drochner #endif
    125       1.1       mrg 
    126      1.52      matt 	low = atop(low);
    127      1.52      matt 	high = atop(high);
    128      1.52      matt 	alignment = atop(alignment);
    129      1.52      matt 
    130      1.52      matt 	/*
    131      1.57      matt 	 * Make sure that physseg falls within with range to be allocated from.
    132      1.57      matt 	 */
    133      1.68    cherry 	if (high <= uvm_physseg_get_avail_start(psi) || low >= uvm_physseg_get_avail_end(psi))
    134      1.57      matt 		return 0;
    135      1.57      matt 
    136      1.57      matt 	/*
    137      1.52      matt 	 * We start our search at the just after where the last allocation
    138      1.52      matt 	 * succeeded.
    139      1.52      matt 	 */
    140      1.71  riastrad 	candidate = roundup2(uimax(low, uvm_physseg_get_avail_start(psi) +
    141      1.68    cherry 		uvm_physseg_get_start_hint(psi)), alignment);
    142      1.71  riastrad 	limit = uimin(high, uvm_physseg_get_avail_end(psi));
    143      1.24  drochner 	pagemask = ~((boundary >> PAGE_SHIFT) - 1);
    144      1.52      matt 	skip = 0;
    145      1.52      matt 	second_pass = false;
    146      1.12       chs 
    147      1.24  drochner 	for (;;) {
    148      1.52      matt 		bool ok = true;
    149      1.52      matt 		signed int cnt;
    150      1.52      matt 
    151      1.66      matt 		if (candidate + num > limit) {
    152      1.68    cherry 			if (uvm_physseg_get_start_hint(psi) == 0 || second_pass) {
    153      1.52      matt 				/*
    154      1.52      matt 				 * We've run past the allowable range.
    155      1.52      matt 				 */
    156      1.52      matt 				return 0; /* FAIL = 0 pages*/
    157      1.52      matt 			}
    158       1.3       mrg 			/*
    159      1.52      matt 			 * We've wrapped around the end of this segment
    160      1.52      matt 			 * so restart at the beginning but now our limit
    161      1.52      matt 			 * is were we started.
    162       1.3       mrg 			 */
    163      1.52      matt 			second_pass = true;
    164      1.71  riastrad 			candidate = roundup2(uimax(low, uvm_physseg_get_avail_start(psi)), alignment);
    165      1.71  riastrad 			limit = uimin(limit, uvm_physseg_get_avail_start(psi) +
    166      1.68    cherry 			    uvm_physseg_get_start_hint(psi));
    167      1.52      matt 			skip = 0;
    168      1.52      matt 			continue;
    169       1.3       mrg 		}
    170      1.24  drochner 		if (boundary != 0 &&
    171      1.66      matt 		    ((candidate ^ (candidate + num - 1)) & pagemask) != 0) {
    172      1.24  drochner 			/*
    173      1.24  drochner 			 * Region crosses boundary. Jump to the boundary
    174      1.24  drochner 			 * just crossed and ensure alignment.
    175      1.24  drochner 			 */
    176      1.66      matt 			candidate = (candidate + num - 1) & pagemask;
    177      1.66      matt 			candidate = roundup2(candidate, alignment);
    178      1.52      matt 			skip = 0;
    179      1.24  drochner 			continue;
    180      1.24  drochner 		}
    181      1.22  drochner #ifdef DEBUG
    182       1.3       mrg 		/*
    183       1.3       mrg 		 * Make sure this is a managed physical page.
    184       1.3       mrg 		 */
    185       1.3       mrg 
    186      1.68    cherry 		if (uvm_physseg_find(candidate, &cidx) != psi)
    187      1.22  drochner 			panic("pgalloc contig: botch1");
    188      1.68    cherry 		if (cidx != candidate - uvm_physseg_get_start(psi))
    189      1.22  drochner 			panic("pgalloc contig: botch2");
    190      1.68    cherry 		if (uvm_physseg_find(candidate + num - 1, &cidx) != psi)
    191      1.22  drochner 			panic("pgalloc contig: botch3");
    192      1.68    cherry 		if (cidx != candidate - uvm_physseg_get_start(psi) + num - 1)
    193      1.31  junyoung 			panic("pgalloc contig: botch4");
    194      1.22  drochner #endif
    195      1.68    cherry 		candidateidx = candidate - uvm_physseg_get_start(psi);
    196      1.66      matt 		end = candidateidx + num;
    197       1.3       mrg 
    198       1.3       mrg 		/*
    199      1.24  drochner 		 * Found a suitable starting page.  See if the range is free.
    200       1.3       mrg 		 */
    201      1.52      matt #ifdef PGALLOC_VERBOSE
    202      1.80       rin 		printf("%s: psi=%d candidate=%#x end=%#x skip=%#x, align=%#"PRIxPADDR,
    203      1.80       rin 		    __func__, psi, candidateidx, end, skip, alignment);
    204      1.52      matt #endif
    205      1.52      matt 		/*
    206      1.52      matt 		 * We start at the end and work backwards since if we find a
    207      1.52      matt 		 * non-free page, it makes no sense to continue.
    208      1.52      matt 		 *
    209      1.52      matt 		 * But on the plus size we have "vetted" some number of free
    210      1.52      matt 		 * pages.  If this iteration fails, we may be able to skip
    211      1.52      matt 		 * testing most of those pages again in the next pass.
    212      1.52      matt 		 */
    213      1.66      matt 		for (idx = end - 1; idx >= candidateidx + skip; idx--) {
    214      1.68    cherry 			if (VM_PAGE_IS_FREE(uvm_physseg_get_pg(psi, idx)) == 0) {
    215      1.52      matt 				ok = false;
    216       1.3       mrg 				break;
    217      1.52      matt 			}
    218      1.24  drochner 
    219      1.24  drochner #ifdef DEBUG
    220      1.66      matt 			if (idx > candidateidx) {
    221      1.68    cherry 				idxpa = VM_PAGE_TO_PHYS(uvm_physseg_get_pg(psi, idx));
    222      1.68    cherry 				lastidxpa = VM_PAGE_TO_PHYS(uvm_physseg_get_pg(psi, idx - 1));
    223      1.12       chs 				if ((lastidxpa + PAGE_SIZE) != idxpa) {
    224       1.3       mrg 					/*
    225       1.3       mrg 					 * Region not contiguous.
    226       1.3       mrg 					 */
    227      1.22  drochner 					panic("pgalloc contig: botch5");
    228       1.3       mrg 				}
    229       1.3       mrg 				if (boundary != 0 &&
    230      1.24  drochner 				    ((lastidxpa ^ idxpa) & ~(boundary - 1))
    231      1.24  drochner 				    != 0) {
    232       1.3       mrg 					/*
    233       1.3       mrg 					 * Region crosses boundary.
    234       1.3       mrg 					 */
    235      1.24  drochner 					panic("pgalloc contig: botch6");
    236       1.3       mrg 				}
    237       1.3       mrg 			}
    238      1.24  drochner #endif
    239       1.3       mrg 		}
    240      1.52      matt 
    241      1.52      matt 		if (ok) {
    242      1.52      matt 			while (skip-- > 0) {
    243      1.68    cherry 				KDASSERT(VM_PAGE_IS_FREE(uvm_physseg_get_pg(psi, candidateidx + skip)));
    244      1.52      matt 			}
    245      1.52      matt #ifdef PGALLOC_VERBOSE
    246      1.52      matt 			printf(": ok\n");
    247      1.52      matt #endif
    248       1.3       mrg 			break;
    249      1.52      matt 		}
    250      1.24  drochner 
    251      1.52      matt #ifdef PGALLOC_VERBOSE
    252      1.66      matt 		printf(": non-free at %#x\n", idx - candidateidx);
    253      1.52      matt #endif
    254      1.52      matt 		/*
    255      1.52      matt 		 * count the number of pages we can advance
    256      1.52      matt 		 * since we know they aren't all free.
    257      1.52      matt 		 */
    258      1.66      matt 		cnt = idx + 1 - candidateidx;
    259      1.52      matt 		/*
    260      1.52      matt 		 * now round up that to the needed alignment.
    261      1.52      matt 		 */
    262      1.52      matt 		cnt = roundup2(cnt, alignment);
    263      1.52      matt 		/*
    264  1.86.2.1   thorpej 		 * The number of pages we can skip checking
    265      1.52      matt 		 * (might be 0 if cnt > num).
    266      1.52      matt 		 */
    267      1.71  riastrad 		skip = uimax(num - cnt, 0);
    268      1.66      matt 		candidate += cnt;
    269       1.1       mrg 	}
    270       1.1       mrg 
    271       1.3       mrg 	/*
    272       1.3       mrg 	 * we have a chunk of memory that conforms to the requested constraints.
    273       1.3       mrg 	 */
    274      1.68    cherry 	for (idx = candidateidx; idx < end; idx++)
    275      1.68    cherry 		uvm_pglist_add(uvm_physseg_get_pg(psi, idx), rlist);
    276      1.52      matt 
    277      1.52      matt 	/*
    278      1.52      matt 	 * the next time we need to search this segment, start after this
    279      1.52      matt 	 * chunk of pages we just allocated.
    280      1.52      matt 	 */
    281      1.68    cherry 	uvm_physseg_set_start_hint(psi, candidate + num -
    282      1.68    cherry 	    uvm_physseg_get_avail_start(psi));
    283      1.68    cherry 	KASSERTMSG(uvm_physseg_get_start_hint(psi) <=
    284      1.68    cherry 	    uvm_physseg_get_avail_end(psi) - uvm_physseg_get_avail_start(psi),
    285      1.62       jym 	    "%x %u (%#x) <= %#"PRIxPADDR" - %#"PRIxPADDR" (%#"PRIxPADDR")",
    286      1.66      matt 	    candidate + num,
    287      1.68    cherry 	    uvm_physseg_get_start_hint(psi), uvm_physseg_get_start_hint(psi),
    288      1.68    cherry 	    uvm_physseg_get_avail_end(psi), uvm_physseg_get_avail_start(psi),
    289      1.68    cherry 	    uvm_physseg_get_avail_end(psi) - uvm_physseg_get_avail_start(psi));
    290      1.24  drochner 
    291      1.24  drochner #ifdef PGALLOC_VERBOSE
    292      1.24  drochner 	printf("got %d pgs\n", num);
    293      1.24  drochner #endif
    294      1.52      matt 	return num; /* number of pages allocated */
    295      1.22  drochner }
    296      1.22  drochner 
    297      1.22  drochner static int
    298      1.86       chs uvm_pglistalloc_contig_aggressive(int num, paddr_t low, paddr_t high,
    299      1.86       chs     paddr_t alignment, paddr_t boundary, struct pglist *rlist)
    300      1.86       chs {
    301      1.86       chs 	struct vm_page *pg;
    302      1.86       chs 	struct pglist tmp;
    303      1.86       chs 	paddr_t pa, off, spa, amask, bmask, rlo, rhi;
    304      1.86       chs 	uvm_physseg_t upm;
    305      1.86       chs 	int error, i, run, acnt;
    306      1.86       chs 
    307      1.86       chs 	/*
    308      1.86       chs 	 * Allocate pages the normal way and for each new page, check if
    309      1.86       chs 	 * the page completes a range satisfying the request.
    310      1.86       chs 	 * The pagedaemon will evict pages as we go and we are very likely
    311      1.86       chs 	 * to get compatible pages eventually.
    312      1.86       chs 	 */
    313      1.86       chs 
    314      1.86       chs 	error = ENOMEM;
    315      1.86       chs 	TAILQ_INIT(&tmp);
    316      1.86       chs 	acnt = atop(alignment);
    317      1.86       chs 	amask = ~(alignment - 1);
    318      1.86       chs 	bmask = ~(boundary - 1);
    319      1.86       chs 	KASSERT(bmask <= amask);
    320      1.86       chs 	mutex_enter(&uvm_pglistalloc_contig_lock);
    321      1.86       chs 	while (uvm_reclaimable()) {
    322      1.86       chs 		pg = uvm_pagealloc(NULL, 0, NULL, 0);
    323      1.86       chs 		if (pg == NULL) {
    324      1.86       chs 			uvm_wait("pglac2");
    325      1.86       chs 			continue;
    326      1.86       chs 		}
    327      1.86       chs 		pg->flags |= PG_PGLCA;
    328      1.86       chs 		TAILQ_INSERT_HEAD(&tmp, pg, pageq.queue);
    329      1.86       chs 
    330      1.86       chs 		pa = VM_PAGE_TO_PHYS(pg);
    331      1.86       chs 		if (pa < low || pa >= high) {
    332      1.86       chs 			continue;
    333      1.86       chs 		}
    334      1.86       chs 
    335      1.86       chs 		upm = uvm_physseg_find(atop(pa), &off);
    336      1.86       chs 		KASSERT(uvm_physseg_valid_p(upm));
    337      1.86       chs 
    338      1.86       chs 		spa = pa & amask;
    339      1.86       chs 
    340      1.86       chs 		/*
    341      1.86       chs 		 * Look backward for at most num - 1 pages, back to
    342      1.86       chs 		 * the highest of:
    343      1.86       chs 		 *  - the first page in the physseg
    344      1.86       chs 		 *  - the specified low address
    345      1.86       chs 		 *  - num-1 pages before the one we just allocated
    346      1.86       chs 		 *  - the start of the boundary range containing pa
    347      1.86       chs 		 * all rounded up to alignment.
    348      1.86       chs 		 */
    349      1.86       chs 
    350      1.86       chs 		rlo = roundup2(ptoa(uvm_physseg_get_avail_start(upm)), alignment);
    351      1.86       chs 		rlo = MAX(rlo, roundup2(low, alignment));
    352      1.86       chs 		rlo = MAX(rlo, roundup2(pa - ptoa(num - 1), alignment));
    353      1.86       chs 		if (boundary) {
    354      1.86       chs 			rlo = MAX(rlo, spa & bmask);
    355      1.86       chs 		}
    356      1.86       chs 
    357      1.86       chs 		/*
    358      1.86       chs 		 * Look forward as far as the lowest of:
    359      1.86       chs 		 *  - the last page of the physseg
    360      1.86       chs 		 *  - the specified high address
    361      1.86       chs 		 *  - the boundary after pa
    362      1.86       chs 		 */
    363      1.86       chs 
    364      1.86       chs 		rhi = ptoa(uvm_physseg_get_avail_end(upm));
    365      1.86       chs 		rhi = MIN(rhi, high);
    366      1.86       chs 		if (boundary) {
    367      1.86       chs 			rhi = MIN(rhi, rounddown2(pa, boundary) + boundary);
    368      1.86       chs 		}
    369      1.86       chs 
    370      1.86       chs 		/*
    371      1.86       chs 		 * Make sure our range to consider is big enough.
    372      1.86       chs 		 */
    373      1.86       chs 
    374      1.86       chs 		if (rhi - rlo < ptoa(num)) {
    375      1.86       chs 			continue;
    376      1.86       chs 		}
    377      1.86       chs 
    378      1.86       chs 		run = 0;
    379      1.86       chs 		while (spa > rlo) {
    380      1.86       chs 
    381      1.86       chs 			/*
    382      1.86       chs 			 * Examine pages before spa in groups of acnt.
    383      1.86       chs 			 * If all the pages in a group are marked then add
    384      1.86       chs 			 * these pages to the run.
    385      1.86       chs 			 */
    386      1.86       chs 
    387      1.86       chs 			for (i = 0; i < acnt; i++) {
    388      1.86       chs 				pg = PHYS_TO_VM_PAGE(spa - alignment + ptoa(i));
    389      1.86       chs 				if ((pg->flags & PG_PGLCA) == 0) {
    390      1.86       chs 					break;
    391      1.86       chs 				}
    392      1.86       chs 			}
    393      1.86       chs 			if (i < acnt) {
    394      1.86       chs 				break;
    395      1.86       chs 			}
    396      1.86       chs 			spa -= alignment;
    397      1.86       chs 			run += acnt;
    398      1.86       chs 		}
    399      1.86       chs 
    400      1.86       chs 		/*
    401      1.86       chs 		 * Look forward for any remaining pages.
    402      1.86       chs 		 */
    403      1.86       chs 
    404  1.86.2.1   thorpej 		if (spa + ptoa(num) > rhi) {
    405  1.86.2.1   thorpej 			continue;
    406  1.86.2.1   thorpej 		}
    407      1.86       chs 		for (; run < num; run++) {
    408      1.86       chs 			pg = PHYS_TO_VM_PAGE(spa + ptoa(run));
    409      1.86       chs 			if ((pg->flags & PG_PGLCA) == 0) {
    410      1.86       chs 				break;
    411      1.86       chs 			}
    412      1.86       chs 		}
    413      1.86       chs 		if (run < num) {
    414      1.86       chs 			continue;
    415      1.86       chs 		}
    416      1.86       chs 
    417      1.86       chs 		/*
    418      1.86       chs 		 * We found a match.  Move these pages from the tmp list to
    419      1.86       chs 		 * the caller's list.
    420      1.86       chs 		 */
    421      1.86       chs 
    422      1.86       chs 		for (i = 0; i < num; i++) {
    423      1.86       chs 			pg = PHYS_TO_VM_PAGE(spa + ptoa(i));
    424      1.86       chs 			TAILQ_REMOVE(&tmp, pg, pageq.queue);
    425      1.86       chs 			pg->flags &= ~PG_PGLCA;
    426      1.86       chs 			TAILQ_INSERT_TAIL(rlist, pg, pageq.queue);
    427      1.86       chs 			STAT_INCR(uvm_pglistalloc_npages);
    428      1.86       chs 		}
    429      1.86       chs 
    430      1.86       chs 		error = 0;
    431      1.86       chs 		break;
    432      1.86       chs 	}
    433      1.86       chs 
    434      1.86       chs 	/*
    435      1.86       chs 	 * Free all the pages that we didn't need.
    436      1.86       chs 	 */
    437      1.86       chs 
    438      1.86       chs 	while (!TAILQ_EMPTY(&tmp)) {
    439      1.86       chs 		pg = TAILQ_FIRST(&tmp);
    440      1.86       chs 		TAILQ_REMOVE(&tmp, pg, pageq.queue);
    441      1.86       chs 		pg->flags &= ~PG_PGLCA;
    442      1.86       chs 		uvm_pagefree(pg);
    443      1.86       chs 	}
    444      1.86       chs 	mutex_exit(&uvm_pglistalloc_contig_lock);
    445      1.86       chs 	return error;
    446      1.86       chs }
    447      1.86       chs 
    448      1.86       chs static int
    449      1.33   thorpej uvm_pglistalloc_contig(int num, paddr_t low, paddr_t high, paddr_t alignment,
    450      1.86       chs     paddr_t boundary, struct pglist *rlist, int waitok)
    451      1.22  drochner {
    452      1.68    cherry 	int fl;
    453      1.38        ad 	int error;
    454      1.86       chs 	uvm_physseg_t psi;
    455      1.22  drochner 
    456      1.22  drochner 	/* Default to "lose". */
    457      1.22  drochner 	error = ENOMEM;
    458      1.22  drochner 
    459      1.22  drochner 	/*
    460      1.22  drochner 	 * Block all memory allocation and lock the free list.
    461      1.22  drochner 	 */
    462      1.78        ad 	uvm_pgfl_lock();
    463      1.22  drochner 
    464      1.22  drochner 	/* Are there even any free pages? */
    465      1.83        ad 	if (uvm_availmem(false) <=
    466      1.79        ad 	    (uvmexp.reserve_pagedaemon + uvmexp.reserve_kernel))
    467      1.22  drochner 		goto out;
    468      1.22  drochner 
    469      1.22  drochner 	for (fl = 0; fl < VM_NFREELIST; fl++) {
    470      1.22  drochner #if (VM_PHYSSEG_STRAT == VM_PSTRAT_BIGFIRST)
    471      1.68    cherry 		for (psi = uvm_physseg_get_last(); uvm_physseg_valid_p(psi); psi = uvm_physseg_get_prev(psi))
    472      1.22  drochner #else
    473      1.68    cherry 		for (psi = uvm_physseg_get_first(); uvm_physseg_valid_p(psi); psi = uvm_physseg_get_next(psi))
    474      1.22  drochner #endif
    475      1.22  drochner 		{
    476      1.68    cherry 			if (uvm_physseg_get_free_list(psi) != fl)
    477      1.22  drochner 				continue;
    478      1.22  drochner 
    479      1.68    cherry 			num -= uvm_pglistalloc_c_ps(psi, num, low, high,
    480      1.24  drochner 						    alignment, boundary, rlist);
    481      1.24  drochner 			if (num == 0) {
    482      1.24  drochner #ifdef PGALLOC_VERBOSE
    483      1.44   reinoud 				printf("pgalloc: %"PRIxMAX"-%"PRIxMAX"\n",
    484      1.44   reinoud 				       (uintmax_t) VM_PAGE_TO_PHYS(TAILQ_FIRST(rlist)),
    485      1.44   reinoud 				       (uintmax_t) VM_PAGE_TO_PHYS(TAILQ_LAST(rlist, pglist)));
    486      1.22  drochner #endif
    487      1.22  drochner 				error = 0;
    488      1.22  drochner 				goto out;
    489      1.22  drochner 			}
    490      1.22  drochner 		}
    491      1.22  drochner 	}
    492      1.20  drochner 
    493      1.20  drochner out:
    494      1.86       chs 	uvm_pgfl_unlock();
    495      1.86       chs 
    496      1.20  drochner 	/*
    497      1.86       chs 	 * If that didn't work, try the more aggressive approach.
    498      1.20  drochner 	 */
    499  1.86.2.1   thorpej 
    500      1.86       chs 	if (error) {
    501      1.86       chs 		if (waitok) {
    502      1.86       chs 			error = uvm_pglistalloc_contig_aggressive(num, low, high,
    503      1.86       chs 			    alignment, boundary, rlist);
    504      1.86       chs 		} else {
    505      1.86       chs 			uvm_pglistfree(rlist);
    506      1.86       chs 			uvm_kick_pdaemon();
    507      1.86       chs 		}
    508      1.86       chs 	}
    509      1.86       chs 	return error;
    510      1.20  drochner }
    511      1.20  drochner 
    512      1.24  drochner static int
    513      1.68    cherry uvm_pglistalloc_s_ps(uvm_physseg_t psi, int num, paddr_t low, paddr_t high,
    514      1.33   thorpej     struct pglist *rlist)
    515      1.22  drochner {
    516      1.66      matt 	int todo, limit, candidate;
    517      1.22  drochner 	struct vm_page *pg;
    518      1.52      matt 	bool second_pass;
    519      1.24  drochner #ifdef PGALLOC_VERBOSE
    520      1.86       chs 	printf("pgalloc: simple %d pgs from psi %d\n", num, psi);
    521      1.24  drochner #endif
    522      1.22  drochner 
    523      1.68    cherry 	KASSERT(uvm_physseg_get_start(psi) <= uvm_physseg_get_avail_start(psi));
    524      1.68    cherry 	KASSERT(uvm_physseg_get_start(psi) <= uvm_physseg_get_avail_end(psi));
    525      1.68    cherry 	KASSERT(uvm_physseg_get_avail_start(psi) <= uvm_physseg_get_end(psi));
    526      1.68    cherry 	KASSERT(uvm_physseg_get_avail_end(psi) <= uvm_physseg_get_end(psi));
    527      1.39        ad 
    528      1.52      matt 	low = atop(low);
    529      1.52      matt 	high = atop(high);
    530      1.24  drochner 	todo = num;
    531      1.71  riastrad 	candidate = uimax(low, uvm_physseg_get_avail_start(psi) +
    532      1.68    cherry 	    uvm_physseg_get_start_hint(psi));
    533      1.71  riastrad 	limit = uimin(high, uvm_physseg_get_avail_end(psi));
    534      1.68    cherry 	pg = uvm_physseg_get_pg(psi, candidate - uvm_physseg_get_start(psi));
    535      1.52      matt 	second_pass = false;
    536      1.52      matt 
    537      1.57      matt 	/*
    538      1.57      matt 	 * Make sure that physseg falls within with range to be allocated from.
    539      1.57      matt 	 */
    540      1.68    cherry 	if (high <= uvm_physseg_get_avail_start(psi) ||
    541      1.68    cherry 	    low >= uvm_physseg_get_avail_end(psi))
    542      1.57      matt 		return 0;
    543      1.57      matt 
    544      1.60     enami again:
    545      1.66      matt 	for (;; candidate++, pg++) {
    546      1.66      matt 		if (candidate >= limit) {
    547      1.68    cherry 			if (uvm_physseg_get_start_hint(psi) == 0 || second_pass) {
    548      1.66      matt 				candidate = limit - 1;
    549      1.52      matt 				break;
    550      1.57      matt 			}
    551      1.52      matt 			second_pass = true;
    552      1.71  riastrad 			candidate = uimax(low, uvm_physseg_get_avail_start(psi));
    553      1.71  riastrad 			limit = uimin(limit, uvm_physseg_get_avail_start(psi) +
    554      1.68    cherry 			    uvm_physseg_get_start_hint(psi));
    555      1.68    cherry 			pg = uvm_physseg_get_pg(psi, candidate - uvm_physseg_get_start(psi));
    556      1.60     enami 			goto again;
    557      1.52      matt 		}
    558      1.58      matt #if defined(DEBUG)
    559      1.54      matt 		{
    560      1.68    cherry 			paddr_t cidx = 0;
    561      1.68    cherry 			const uvm_physseg_t bank = uvm_physseg_find(candidate, &cidx);
    562      1.68    cherry 			KDASSERTMSG(bank == psi,
    563      1.70     skrll 			    "uvm_physseg_find(%#x) (%"PRIxPHYSSEG ") != psi %"PRIxPHYSSEG,
    564      1.68    cherry 			     candidate, bank, psi);
    565      1.68    cherry 			KDASSERTMSG(cidx == candidate - uvm_physseg_get_start(psi),
    566      1.68    cherry 			    "uvm_physseg_find(%#x): %#"PRIxPADDR" != off %"PRIxPADDR,
    567      1.68    cherry 			     candidate, cidx, candidate - uvm_physseg_get_start(psi));
    568      1.54      matt 		}
    569      1.22  drochner #endif
    570      1.22  drochner 		if (VM_PAGE_IS_FREE(pg) == 0)
    571      1.22  drochner 			continue;
    572      1.22  drochner 
    573      1.22  drochner 		uvm_pglist_add(pg, rlist);
    574      1.52      matt 		if (--todo == 0) {
    575      1.22  drochner 			break;
    576      1.52      matt 		}
    577      1.22  drochner 	}
    578      1.24  drochner 
    579      1.52      matt 	/*
    580      1.52      matt 	 * The next time we need to search this segment,
    581      1.52      matt 	 * start just after the pages we just allocated.
    582      1.52      matt 	 */
    583      1.68    cherry 	uvm_physseg_set_start_hint(psi, candidate + 1 - uvm_physseg_get_avail_start(psi));
    584      1.68    cherry 	KASSERTMSG(uvm_physseg_get_start_hint(psi) <= uvm_physseg_get_avail_end(psi) -
    585      1.68    cherry 	    uvm_physseg_get_avail_start(psi),
    586      1.62       jym 	    "%#x %u (%#x) <= %#"PRIxPADDR" - %#"PRIxPADDR" (%#"PRIxPADDR")",
    587      1.66      matt 	    candidate + 1,
    588      1.68    cherry 	    uvm_physseg_get_start_hint(psi),
    589      1.68    cherry 	    uvm_physseg_get_start_hint(psi),
    590      1.68    cherry 	    uvm_physseg_get_avail_end(psi),
    591      1.68    cherry 	    uvm_physseg_get_avail_start(psi),
    592      1.68    cherry 	    uvm_physseg_get_avail_end(psi) - uvm_physseg_get_avail_start(psi));
    593      1.52      matt 
    594      1.24  drochner #ifdef PGALLOC_VERBOSE
    595      1.24  drochner 	printf("got %d pgs\n", num - todo);
    596      1.24  drochner #endif
    597      1.24  drochner 	return (num - todo); /* number of pages allocated */
    598      1.22  drochner }
    599      1.22  drochner 
    600      1.20  drochner static int
    601      1.33   thorpej uvm_pglistalloc_simple(int num, paddr_t low, paddr_t high,
    602      1.33   thorpej     struct pglist *rlist, int waitok)
    603      1.20  drochner {
    604      1.68    cherry 	int fl, error;
    605      1.68    cherry 	uvm_physseg_t psi;
    606      1.72       mrg 	int count = 0;
    607      1.20  drochner 
    608      1.20  drochner 	/* Default to "lose". */
    609      1.20  drochner 	error = ENOMEM;
    610      1.20  drochner 
    611      1.20  drochner again:
    612      1.20  drochner 	/*
    613      1.20  drochner 	 * Block all memory allocation and lock the free list.
    614      1.20  drochner 	 */
    615      1.78        ad 	uvm_pgfl_lock();
    616      1.72       mrg 	count++;
    617      1.20  drochner 
    618      1.20  drochner 	/* Are there even any free pages? */
    619      1.83        ad 	if (uvm_availmem(false) <=
    620      1.79        ad 	    (uvmexp.reserve_pagedaemon + uvmexp.reserve_kernel))
    621      1.20  drochner 		goto out;
    622      1.20  drochner 
    623      1.22  drochner 	for (fl = 0; fl < VM_NFREELIST; fl++) {
    624      1.22  drochner #if (VM_PHYSSEG_STRAT == VM_PSTRAT_BIGFIRST)
    625      1.68    cherry 		for (psi = uvm_physseg_get_last(); uvm_physseg_valid_p(psi); psi = uvm_physseg_get_prev(psi))
    626      1.22  drochner #else
    627      1.68    cherry 		for (psi = uvm_physseg_get_first(); uvm_physseg_valid_p(psi); psi = uvm_physseg_get_next(psi))
    628      1.22  drochner #endif
    629      1.22  drochner 		{
    630      1.68    cherry 			if (uvm_physseg_get_free_list(psi) != fl)
    631      1.22  drochner 				continue;
    632      1.22  drochner 
    633      1.68    cherry 			num -= uvm_pglistalloc_s_ps(psi, num, low, high, rlist);
    634      1.24  drochner 			if (num == 0) {
    635      1.22  drochner 				error = 0;
    636      1.22  drochner 				goto out;
    637      1.22  drochner 			}
    638      1.22  drochner 		}
    639      1.20  drochner 
    640       1.3       mrg 	}
    641       1.1       mrg 
    642       1.1       mrg out:
    643       1.3       mrg 	/*
    644       1.3       mrg 	 * check to see if we need to generate some free pages waking
    645       1.3       mrg 	 * the pagedaemon.
    646       1.3       mrg 	 */
    647      1.15       chs 
    648      1.78        ad 	uvm_pgfl_unlock();
    649      1.36      yamt 	uvm_kick_pdaemon();
    650      1.38        ad 
    651      1.20  drochner 	if (error) {
    652      1.20  drochner 		if (waitok) {
    653      1.20  drochner 			uvm_wait("pglalloc");
    654      1.20  drochner 			goto again;
    655      1.20  drochner 		} else
    656      1.20  drochner 			uvm_pglistfree(rlist);
    657      1.20  drochner 	}
    658      1.24  drochner #ifdef PGALLOC_VERBOSE
    659      1.22  drochner 	if (!error)
    660      1.44   reinoud 		printf("pgalloc: %"PRIxMAX"..%"PRIxMAX"\n",
    661      1.44   reinoud 		       (uintmax_t) VM_PAGE_TO_PHYS(TAILQ_FIRST(rlist)),
    662      1.44   reinoud 		       (uintmax_t) VM_PAGE_TO_PHYS(TAILQ_LAST(rlist, pglist)));
    663      1.22  drochner #endif
    664       1.3       mrg 	return (error);
    665      1.20  drochner }
    666      1.20  drochner 
    667      1.20  drochner int
    668      1.33   thorpej uvm_pglistalloc(psize_t size, paddr_t low, paddr_t high, paddr_t alignment,
    669      1.33   thorpej     paddr_t boundary, struct pglist *rlist, int nsegs, int waitok)
    670      1.20  drochner {
    671      1.24  drochner 	int num, res;
    672      1.20  drochner 
    673      1.81        ad 	KASSERT(!cpu_intr_p());
    674      1.81        ad 	KASSERT(!cpu_softintr_p());
    675      1.20  drochner 	KASSERT((alignment & (alignment - 1)) == 0);
    676      1.20  drochner 	KASSERT((boundary & (boundary - 1)) == 0);
    677      1.20  drochner 
    678      1.20  drochner 	/*
    679      1.20  drochner 	 * Our allocations are always page granularity, so our alignment
    680      1.20  drochner 	 * must be, too.
    681      1.20  drochner 	 */
    682      1.20  drochner 	if (alignment < PAGE_SIZE)
    683      1.20  drochner 		alignment = PAGE_SIZE;
    684      1.24  drochner 	if (boundary != 0 && boundary < size)
    685      1.24  drochner 		return (EINVAL);
    686      1.24  drochner 	num = atop(round_page(size));
    687      1.52      matt 	low = roundup2(low, alignment);
    688      1.21  drochner 
    689      1.21  drochner 	TAILQ_INIT(rlist);
    690      1.20  drochner 
    691      1.78        ad 	/*
    692      1.78        ad 	 * Turn off the caching of free pages - we need everything to be on
    693      1.78        ad 	 * the global freelists.
    694      1.78        ad 	 */
    695      1.78        ad 	uvm_pgflcache_pause();
    696      1.78        ad 
    697      1.86       chs 	if (nsegs < num || alignment != PAGE_SIZE || boundary != 0)
    698      1.24  drochner 		res = uvm_pglistalloc_contig(num, low, high, alignment,
    699      1.86       chs 					     boundary, rlist, waitok);
    700      1.20  drochner 	else
    701      1.24  drochner 		res = uvm_pglistalloc_simple(num, low, high, rlist, waitok);
    702      1.20  drochner 
    703      1.78        ad 	uvm_pgflcache_resume();
    704      1.78        ad 
    705      1.20  drochner 	return (res);
    706       1.1       mrg }
    707       1.1       mrg 
    708       1.1       mrg /*
    709       1.1       mrg  * uvm_pglistfree: free a list of pages
    710       1.1       mrg  *
    711       1.1       mrg  * => pages should already be unmapped
    712       1.1       mrg  */
    713       1.1       mrg 
    714       1.3       mrg void
    715      1.33   thorpej uvm_pglistfree(struct pglist *list)
    716       1.1       mrg {
    717      1.18       chs 	struct vm_page *pg;
    718       1.1       mrg 
    719      1.81        ad 	KASSERT(!cpu_intr_p());
    720      1.81        ad 	KASSERT(!cpu_softintr_p());
    721      1.81        ad 
    722      1.18       chs 	while ((pg = TAILQ_FIRST(list)) != NULL) {
    723      1.42        ad 		TAILQ_REMOVE(list, pg, pageq.queue);
    724      1.82        ad 		uvm_pagefree(pg);
    725       1.3       mrg 		STAT_DECR(uvm_pglistalloc_npages);
    726       1.3       mrg 	}
    727       1.1       mrg }
    728      1.86       chs 
    729      1.86       chs void
    730      1.86       chs uvm_pglistalloc_init(void)
    731      1.86       chs {
    732      1.86       chs 
    733      1.86       chs 	mutex_init(&uvm_pglistalloc_contig_lock, MUTEX_DEFAULT, IPL_NONE);
    734      1.86       chs }
    735