uvm_pglist.c revision 1.39 1 /* $NetBSD: uvm_pglist.c,v 1.39 2008/02/27 14:24:24 ad Exp $ */
2
3 /*-
4 * Copyright (c) 1997 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
9 * NASA Ames Research Center.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 * 3. All advertising materials mentioning features or use of this software
20 * must display the following acknowledgement:
21 * This product includes software developed by the NetBSD
22 * Foundation, Inc. and its contributors.
23 * 4. Neither the name of The NetBSD Foundation nor the names of its
24 * contributors may be used to endorse or promote products derived
25 * from this software without specific prior written permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
28 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
29 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
30 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
31 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37 * POSSIBILITY OF SUCH DAMAGE.
38 */
39
40 /*
41 * uvm_pglist.c: pglist functions
42 */
43
44 #include <sys/cdefs.h>
45 __KERNEL_RCSID(0, "$NetBSD: uvm_pglist.c,v 1.39 2008/02/27 14:24:24 ad Exp $");
46
47 #include <sys/param.h>
48 #include <sys/systm.h>
49 #include <sys/malloc.h>
50 #include <sys/proc.h>
51
52 #include <uvm/uvm.h>
53 #include <uvm/uvm_pdpolicy.h>
54
55 #ifdef VM_PAGE_ALLOC_MEMORY_STATS
56 #define STAT_INCR(v) (v)++
57 #define STAT_DECR(v) do { \
58 if ((v) == 0) \
59 printf("%s:%d -- Already 0!\n", __FILE__, __LINE__); \
60 else \
61 (v)--; \
62 } while (/*CONSTCOND*/ 0)
63 u_long uvm_pglistalloc_npages;
64 #else
65 #define STAT_INCR(v)
66 #define STAT_DECR(v)
67 #endif
68
69 /*
70 * uvm_pglistalloc: allocate a list of pages
71 *
72 * => allocated pages are placed onto an rlist. rlist is
73 * initialized by uvm_pglistalloc.
74 * => returns 0 on success or errno on failure
75 * => implementation allocates a single segment if any constraints are
76 * imposed by call arguments.
77 * => doesn't take into account clean non-busy pages on inactive list
78 * that could be used(?)
79 * => params:
80 * size the size of the allocation, rounded to page size.
81 * low the low address of the allowed allocation range.
82 * high the high address of the allowed allocation range.
83 * alignment memory must be aligned to this power-of-two boundary.
84 * boundary no segment in the allocation may cross this
85 * power-of-two boundary (relative to zero).
86 */
87
88 static void
89 uvm_pglist_add(struct vm_page *pg, struct pglist *rlist)
90 {
91 int free_list, color, pgflidx;
92 #ifdef DEBUG
93 struct vm_page *tp;
94 #endif
95
96 KASSERT(mutex_owned(&uvm_fpageqlock));
97
98 #if PGFL_NQUEUES != 2
99 #error uvm_pglistalloc needs to be updated
100 #endif
101
102 free_list = uvm_page_lookup_freelist(pg);
103 color = VM_PGCOLOR_BUCKET(pg);
104 pgflidx = (pg->flags & PG_ZERO) ? PGFL_ZEROS : PGFL_UNKNOWN;
105 #ifdef DEBUG
106 for (tp = TAILQ_FIRST(&uvm.page_free[
107 free_list].pgfl_buckets[color].pgfl_queues[pgflidx]);
108 tp != NULL;
109 tp = TAILQ_NEXT(tp, pageq)) {
110 if (tp == pg)
111 break;
112 }
113 if (tp == NULL)
114 panic("uvm_pglistalloc: page not on freelist");
115 #endif
116 TAILQ_REMOVE(&uvm.page_free[free_list].pgfl_buckets[
117 color].pgfl_queues[pgflidx], pg, pageq);
118 uvmexp.free--;
119 if (pg->flags & PG_ZERO)
120 uvmexp.zeropages--;
121 pg->flags = PG_CLEAN;
122 pg->pqflags = 0;
123 pg->uobject = NULL;
124 pg->uanon = NULL;
125 TAILQ_INSERT_TAIL(rlist, pg, pageq);
126 STAT_INCR(uvm_pglistalloc_npages);
127 }
128
129 static int
130 uvm_pglistalloc_c_ps(struct vm_physseg *ps, int num, paddr_t low, paddr_t high,
131 paddr_t alignment, paddr_t boundary, struct pglist *rlist)
132 {
133 int try, limit, tryidx, end, idx;
134 struct vm_page *pgs;
135 int pagemask;
136 #ifdef DEBUG
137 paddr_t idxpa, lastidxpa;
138 int cidx = 0; /* XXX: GCC */
139 #endif
140 #ifdef PGALLOC_VERBOSE
141 printf("pgalloc: contig %d pgs from psi %ld\n", num,
142 (long)(ps - vm_physmem));
143 #endif
144
145 KASSERT(mutex_owned(&uvm_fpageqlock));
146
147 try = roundup(max(atop(low), ps->avail_start), atop(alignment));
148 limit = min(atop(high), ps->avail_end);
149 pagemask = ~((boundary >> PAGE_SHIFT) - 1);
150
151 for (;;) {
152 if (try + num > limit) {
153 /*
154 * We've run past the allowable range.
155 */
156 return (0); /* FAIL */
157 }
158 if (boundary != 0 &&
159 ((try ^ (try + num - 1)) & pagemask) != 0) {
160 /*
161 * Region crosses boundary. Jump to the boundary
162 * just crossed and ensure alignment.
163 */
164 try = (try + num - 1) & pagemask;
165 try = roundup(try, atop(alignment));
166 continue;
167 }
168 #ifdef DEBUG
169 /*
170 * Make sure this is a managed physical page.
171 */
172
173 if (vm_physseg_find(try, &cidx) != ps - vm_physmem)
174 panic("pgalloc contig: botch1");
175 if (cidx != try - ps->start)
176 panic("pgalloc contig: botch2");
177 if (vm_physseg_find(try + num - 1, &cidx) != ps - vm_physmem)
178 panic("pgalloc contig: botch3");
179 if (cidx != try - ps->start + num - 1)
180 panic("pgalloc contig: botch4");
181 #endif
182 tryidx = try - ps->start;
183 end = tryidx + num;
184 pgs = ps->pgs;
185
186 /*
187 * Found a suitable starting page. See if the range is free.
188 */
189 for (idx = tryidx; idx < end; idx++) {
190 if (VM_PAGE_IS_FREE(&pgs[idx]) == 0)
191 break;
192
193 #ifdef DEBUG
194 idxpa = VM_PAGE_TO_PHYS(&pgs[idx]);
195 if (idx > tryidx) {
196 lastidxpa = VM_PAGE_TO_PHYS(&pgs[idx - 1]);
197 if ((lastidxpa + PAGE_SIZE) != idxpa) {
198 /*
199 * Region not contiguous.
200 */
201 panic("pgalloc contig: botch5");
202 }
203 if (boundary != 0 &&
204 ((lastidxpa ^ idxpa) & ~(boundary - 1))
205 != 0) {
206 /*
207 * Region crosses boundary.
208 */
209 panic("pgalloc contig: botch6");
210 }
211 }
212 #endif
213 }
214 if (idx == end)
215 break;
216
217 try += atop(alignment);
218 }
219
220 /*
221 * we have a chunk of memory that conforms to the requested constraints.
222 */
223 idx = tryidx;
224 while (idx < end)
225 uvm_pglist_add(&pgs[idx++], rlist);
226
227 #ifdef PGALLOC_VERBOSE
228 printf("got %d pgs\n", num);
229 #endif
230 return (num); /* number of pages allocated */
231 }
232
233 static int
234 uvm_pglistalloc_contig(int num, paddr_t low, paddr_t high, paddr_t alignment,
235 paddr_t boundary, struct pglist *rlist)
236 {
237 int fl, psi;
238 struct vm_physseg *ps;
239 int error;
240
241 /* Default to "lose". */
242 error = ENOMEM;
243
244 /*
245 * Block all memory allocation and lock the free list.
246 */
247 mutex_spin_enter(&uvm_fpageqlock);
248
249 /* Are there even any free pages? */
250 if (uvmexp.free <= (uvmexp.reserve_pagedaemon + uvmexp.reserve_kernel))
251 goto out;
252
253 for (fl = 0; fl < VM_NFREELIST; fl++) {
254 #if (VM_PHYSSEG_STRAT == VM_PSTRAT_BIGFIRST)
255 for (psi = vm_nphysseg - 1 ; psi >= 0 ; psi--)
256 #else
257 for (psi = 0 ; psi < vm_nphysseg ; psi++)
258 #endif
259 {
260 ps = &vm_physmem[psi];
261
262 if (ps->free_list != fl)
263 continue;
264
265 num -= uvm_pglistalloc_c_ps(ps, num, low, high,
266 alignment, boundary, rlist);
267 if (num == 0) {
268 #ifdef PGALLOC_VERBOSE
269 printf("pgalloc: %lx-%lx\n",
270 VM_PAGE_TO_PHYS(TAILQ_FIRST(rlist)),
271 VM_PAGE_TO_PHYS(TAILQ_LAST(rlist)));
272 #endif
273 error = 0;
274 goto out;
275 }
276 }
277 }
278
279 out:
280 /*
281 * check to see if we need to generate some free pages waking
282 * the pagedaemon.
283 */
284
285 uvm_kick_pdaemon();
286 mutex_spin_exit(&uvm_fpageqlock);
287 return (error);
288 }
289
290 static int
291 uvm_pglistalloc_s_ps(struct vm_physseg *ps, int num, paddr_t low, paddr_t high,
292 struct pglist *rlist)
293 {
294 int todo, limit, try;
295 struct vm_page *pg;
296 #ifdef DEBUG
297 int cidx = 0; /* XXX: GCC */
298 #endif
299 #ifdef PGALLOC_VERBOSE
300 printf("pgalloc: simple %d pgs from psi %ld\n", num,
301 (long)(ps - vm_physmem));
302 #endif
303
304 KASSERT(mutex_owned(&uvm_fpageqlock));
305
306 todo = num;
307 limit = min(atop(high), ps->avail_end);
308
309 for (try = max(atop(low), ps->avail_start);
310 try < limit; try ++) {
311 #ifdef DEBUG
312 if (vm_physseg_find(try, &cidx) != ps - vm_physmem)
313 panic("pgalloc simple: botch1");
314 if (cidx != (try - ps->start))
315 panic("pgalloc simple: botch2");
316 #endif
317 pg = &ps->pgs[try - ps->start];
318 if (VM_PAGE_IS_FREE(pg) == 0)
319 continue;
320
321 uvm_pglist_add(pg, rlist);
322 if (--todo == 0)
323 break;
324 }
325
326 #ifdef PGALLOC_VERBOSE
327 printf("got %d pgs\n", num - todo);
328 #endif
329 return (num - todo); /* number of pages allocated */
330 }
331
332 static int
333 uvm_pglistalloc_simple(int num, paddr_t low, paddr_t high,
334 struct pglist *rlist, int waitok)
335 {
336 int fl, psi, error;
337 struct vm_physseg *ps;
338
339 /* Default to "lose". */
340 error = ENOMEM;
341
342 again:
343 /*
344 * Block all memory allocation and lock the free list.
345 */
346 mutex_spin_enter(&uvm_fpageqlock);
347
348 /* Are there even any free pages? */
349 if (uvmexp.free <= (uvmexp.reserve_pagedaemon + uvmexp.reserve_kernel))
350 goto out;
351
352 for (fl = 0; fl < VM_NFREELIST; fl++) {
353 #if (VM_PHYSSEG_STRAT == VM_PSTRAT_BIGFIRST)
354 for (psi = vm_nphysseg - 1 ; psi >= 0 ; psi--)
355 #else
356 for (psi = 0 ; psi < vm_nphysseg ; psi++)
357 #endif
358 {
359 ps = &vm_physmem[psi];
360
361 if (ps->free_list != fl)
362 continue;
363
364 num -= uvm_pglistalloc_s_ps(ps, num, low, high, rlist);
365 if (num == 0) {
366 error = 0;
367 goto out;
368 }
369 }
370
371 }
372
373 out:
374 /*
375 * check to see if we need to generate some free pages waking
376 * the pagedaemon.
377 */
378
379 uvm_kick_pdaemon();
380 mutex_spin_exit(&uvm_fpageqlock);
381
382 if (error) {
383 if (waitok) {
384 /* XXX perhaps some time limitation? */
385 #ifdef DEBUG
386 printf("pglistalloc waiting\n");
387 #endif
388 uvm_wait("pglalloc");
389 goto again;
390 } else
391 uvm_pglistfree(rlist);
392 }
393 #ifdef PGALLOC_VERBOSE
394 if (!error)
395 printf("pgalloc: %lx..%lx\n",
396 VM_PAGE_TO_PHYS(TAILQ_FIRST(rlist)),
397 VM_PAGE_TO_PHYS(TAILQ_LAST(rlist, pglist)));
398 #endif
399 return (error);
400 }
401
402 int
403 uvm_pglistalloc(psize_t size, paddr_t low, paddr_t high, paddr_t alignment,
404 paddr_t boundary, struct pglist *rlist, int nsegs, int waitok)
405 {
406 int num, res;
407
408 KASSERT((alignment & (alignment - 1)) == 0);
409 KASSERT((boundary & (boundary - 1)) == 0);
410
411 /*
412 * Our allocations are always page granularity, so our alignment
413 * must be, too.
414 */
415 if (alignment < PAGE_SIZE)
416 alignment = PAGE_SIZE;
417 if (boundary != 0 && boundary < size)
418 return (EINVAL);
419 num = atop(round_page(size));
420 low = roundup(low, alignment);
421
422 TAILQ_INIT(rlist);
423
424 if ((nsegs < size >> PAGE_SHIFT) || (alignment != PAGE_SIZE) ||
425 (boundary != 0))
426 res = uvm_pglistalloc_contig(num, low, high, alignment,
427 boundary, rlist);
428 else
429 res = uvm_pglistalloc_simple(num, low, high, rlist, waitok);
430
431 return (res);
432 }
433
434 /*
435 * uvm_pglistfree: free a list of pages
436 *
437 * => pages should already be unmapped
438 */
439
440 void
441 uvm_pglistfree(struct pglist *list)
442 {
443 struct vm_page *pg;
444
445 /*
446 * Lock the free list and free each page.
447 */
448
449 mutex_spin_enter(&uvm_fpageqlock);
450 while ((pg = TAILQ_FIRST(list)) != NULL) {
451 bool iszero;
452
453 KASSERT(!uvmpdpol_pageisqueued_p(pg));
454 TAILQ_REMOVE(list, pg, pageq);
455 iszero = (pg->flags & PG_ZERO);
456 pg->pqflags = PQ_FREE;
457 #ifdef DEBUG
458 pg->uobject = (void *)0xdeadbeef;
459 pg->offset = 0xdeadbeef;
460 pg->uanon = (void *)0xdeadbeef;
461 #endif /* DEBUG */
462 #ifdef DEBUG
463 if (iszero)
464 uvm_pagezerocheck(pg);
465 #endif /* DEBUG */
466 TAILQ_INSERT_HEAD(&uvm.page_free[uvm_page_lookup_freelist(pg)].
467 pgfl_buckets[VM_PGCOLOR_BUCKET(pg)].
468 pgfl_queues[iszero ? PGFL_ZEROS : PGFL_UNKNOWN], pg, pageq);
469 uvmexp.free++;
470 if (iszero)
471 uvmexp.zeropages++;
472 if (uvmexp.zeropages < UVM_PAGEZERO_TARGET)
473 uvm.page_idle_zero = vm_page_zero_enable;
474 STAT_DECR(uvm_pglistalloc_npages);
475 }
476 mutex_spin_exit(&uvm_fpageqlock);
477 }
478