uvm_pglist.c revision 1.22 1 /* $NetBSD: uvm_pglist.c,v 1.22 2002/06/18 15:49:48 drochner Exp $ */
2
3 /*-
4 * Copyright (c) 1997 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
9 * NASA Ames Research Center.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 * 3. All advertising materials mentioning features or use of this software
20 * must display the following acknowledgement:
21 * This product includes software developed by the NetBSD
22 * Foundation, Inc. and its contributors.
23 * 4. Neither the name of The NetBSD Foundation nor the names of its
24 * contributors may be used to endorse or promote products derived
25 * from this software without specific prior written permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
28 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
29 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
30 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
31 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37 * POSSIBILITY OF SUCH DAMAGE.
38 */
39
40 /*
41 * uvm_pglist.c: pglist functions
42 */
43
44 #include <sys/cdefs.h>
45 __KERNEL_RCSID(0, "$NetBSD: uvm_pglist.c,v 1.22 2002/06/18 15:49:48 drochner Exp $");
46
47 #include <sys/param.h>
48 #include <sys/systm.h>
49 #include <sys/malloc.h>
50 #include <sys/proc.h>
51
52 #include <uvm/uvm.h>
53
54 #ifdef VM_PAGE_ALLOC_MEMORY_STATS
55 #define STAT_INCR(v) (v)++
56 #define STAT_DECR(v) do { \
57 if ((v) == 0) \
58 printf("%s:%d -- Already 0!\n", __FILE__, __LINE__); \
59 else \
60 (v)--; \
61 } while (0)
62 u_long uvm_pglistalloc_npages;
63 #else
64 #define STAT_INCR(v)
65 #define STAT_DECR(v)
66 #endif
67
68 /*
69 * uvm_pglistalloc: allocate a list of pages
70 *
71 * => allocated pages are placed at the tail of rlist. rlist is
72 * assumed to be properly initialized by caller.
73 * => returns 0 on success or errno on failure
74 * => XXX: implementation allocates only a single segment, also
75 * might be able to better advantage of vm_physeg[].
76 * => doesn't take into account clean non-busy pages on inactive list
77 * that could be used(?)
78 * => params:
79 * size the size of the allocation, rounded to page size.
80 * low the low address of the allowed allocation range.
81 * high the high address of the allowed allocation range.
82 * alignment memory must be aligned to this power-of-two boundary.
83 * boundary no segment in the allocation may cross this
84 * power-of-two boundary (relative to zero).
85 */
86
87 static void uvm_pglist_add(struct vm_page *, struct pglist *);
88 static int uvm_pglistalloc_c_ps(int, psize_t, paddr_t, paddr_t,
89 paddr_t, paddr_t, struct pglist *);
90 static int uvm_pglistalloc_contig(psize_t, paddr_t, paddr_t, paddr_t, paddr_t,
91 struct pglist *);
92 static void uvm_pglistalloc_s_ps(int, paddr_t, paddr_t,
93 struct pglist *, int *);
94 static int uvm_pglistalloc_simple(psize_t, paddr_t, paddr_t,
95 struct pglist *, int);
96
97 static void
98 uvm_pglist_add(pg, rlist)
99 struct vm_page *pg;
100 struct pglist *rlist;
101 {
102 int free_list, color, pgflidx;
103 #ifdef DEBUG
104 struct vm_page *tp;
105 #endif
106
107 #if PGFL_NQUEUES != 2
108 #error uvm_pglistalloc needs to be updated
109 #endif
110
111 free_list = uvm_page_lookup_freelist(pg);
112 color = VM_PGCOLOR_BUCKET(pg);
113 pgflidx = (pg->flags & PG_ZERO) ? PGFL_ZEROS : PGFL_UNKNOWN;
114 #ifdef DEBUG
115 for (tp = TAILQ_FIRST(&uvm.page_free[
116 free_list].pgfl_buckets[color].pgfl_queues[pgflidx]);
117 tp != NULL;
118 tp = TAILQ_NEXT(tp, pageq)) {
119 if (tp == pg)
120 break;
121 }
122 if (tp == NULL)
123 panic("uvm_pglistalloc: page not on freelist");
124 #endif
125 TAILQ_REMOVE(&uvm.page_free[free_list].pgfl_buckets[
126 color].pgfl_queues[pgflidx], pg, pageq);
127 uvmexp.free--;
128 if (pg->flags & PG_ZERO)
129 uvmexp.zeropages--;
130 pg->flags = PG_CLEAN;
131 pg->pqflags = 0;
132 pg->uobject = NULL;
133 pg->uanon = NULL;
134 TAILQ_INSERT_TAIL(rlist, pg, pageq);
135 STAT_INCR(uvm_pglistalloc_npages);
136 }
137
138 static int
139 uvm_pglistalloc_c_ps(psi, size, low, high, alignment, boundary, rlist)
140 int psi;
141 psize_t size;
142 paddr_t low, high, alignment, boundary;
143 struct pglist *rlist;
144 {
145 int try, limit, tryidx, end, idx;
146 struct vm_page *pgs;
147 paddr_t idxpa, lastidxpa;
148 u_long pagemask;
149 #ifdef DEBUG
150 int cidx;
151 #endif
152
153 limit = min(atop(high), vm_physmem[psi].avail_end);
154 pagemask = ~(boundary - 1);
155
156 for (try = roundup(max(atop(low), vm_physmem[psi].avail_start),
157 atop(alignment));; try += atop(alignment)) {
158 if (try + atop(size) >= limit) {
159
160 /*
161 * We've run past the allowable range.
162 */
163
164 return (0); /* FAIL */
165 }
166 #ifdef DEBUG
167 /*
168 * Make sure this is a managed physical page.
169 */
170
171 if (vm_physseg_find(try, &cidx) != psi)
172 panic("pgalloc contig: botch1");
173 if (cidx != try - vm_physmem[psi].start)
174 panic("pgalloc contig: botch2");
175 if (vm_physseg_find(try + atop(size), &cidx) != psi)
176 panic("pgalloc contig: botch3");
177 if (cidx != try - vm_physmem[psi].start + atop(size))
178 panic("pgalloc contig: botch4");
179 #endif
180 tryidx = try - vm_physmem[psi].start;
181 end = tryidx + (size / PAGE_SIZE);
182 pgs = vm_physmem[psi].pgs;
183
184 /*
185 * Found a suitable starting page. See of the range is free.
186 */
187
188 for (idx = tryidx; idx < end; idx++) {
189 if (VM_PAGE_IS_FREE(&pgs[idx]) == 0) {
190 break;
191 }
192 idxpa = VM_PAGE_TO_PHYS(&pgs[idx]);
193 if (idx > tryidx) {
194 lastidxpa = VM_PAGE_TO_PHYS(&pgs[idx - 1]);
195 if ((lastidxpa + PAGE_SIZE) != idxpa) {
196
197 /*
198 * Region not contiguous.
199 */
200
201 panic("pgalloc contig: botch5");
202 }
203 if (boundary != 0 &&
204 ((lastidxpa ^ idxpa) & pagemask) != 0) {
205
206 /*
207 * Region crosses boundary.
208 */
209
210 break;
211 }
212 }
213 }
214 if (idx == end) {
215 break;
216 }
217 }
218
219 /*
220 * we have a chunk of memory that conforms to the requested constraints.
221 */
222 idx = tryidx;
223 while (idx < end) {
224 uvm_pglist_add(&pgs[idx++], rlist);
225 }
226 return (1);
227 }
228
229 static int
230 uvm_pglistalloc_contig(size, low, high, alignment, boundary, rlist)
231 psize_t size;
232 paddr_t low, high, alignment, boundary;
233 struct pglist *rlist;
234 {
235 int fl, psi;
236 int s, error;
237
238 if (boundary != 0 && boundary < size)
239 return (EINVAL);
240
241 /* Default to "lose". */
242 error = ENOMEM;
243
244 /*
245 * Block all memory allocation and lock the free list.
246 */
247
248 s = uvm_lock_fpageq();
249
250 /* Are there even any free pages? */
251 if (uvmexp.free <= (uvmexp.reserve_pagedaemon + uvmexp.reserve_kernel))
252 goto out;
253
254 for (fl = 0; fl < VM_NFREELIST; fl++) {
255 #if (VM_PHYSSEG_STRAT == VM_PSTRAT_BIGFIRST)
256 for (psi = vm_nphysseg - 1 ; psi >= 0 ; psi--)
257 #else
258 for (psi = 0 ; psi < vm_nphysseg ; psi++)
259 #endif
260 {
261 if (vm_physmem[psi].free_list != fl)
262 continue;
263
264 if (uvm_pglistalloc_c_ps(psi, size, low, high,
265 alignment, boundary, rlist)) {
266 #if 0
267 printf("pgalloc: %lx-%lx\n",
268 TAILQ_FIRST(rlist)->phys_addr,
269 TAILQ_LAST(rlist, pglist)->phys_addr);
270 #endif
271 error = 0;
272 goto out;
273 }
274 }
275 }
276
277 out:
278 /*
279 * check to see if we need to generate some free pages waking
280 * the pagedaemon.
281 */
282
283 UVM_KICK_PDAEMON();
284 uvm_unlock_fpageq(s);
285 return (error);
286 }
287
288 static void
289 uvm_pglistalloc_s_ps(psi, low, high, rlist, todo)
290 int psi;
291 paddr_t low, high;
292 struct pglist *rlist;
293 int *todo;
294 {
295 int limit, try;
296 struct vm_page *pg;
297 #ifdef DEBUG
298 int cidx;
299 #endif
300
301 limit = min(atop(high), vm_physmem[psi].avail_end);
302
303 for (try = max(atop(low), vm_physmem[psi].avail_start);
304 try < limit; try ++) {
305 #ifdef DEBUG
306 if (vm_physseg_find(try, &cidx) != psi)
307 panic("pgalloc simple: botch1");
308 if (cidx != (try - vm_physmem[psi].start))
309 panic("pgalloc simple: botch2");
310 #endif
311 pg = &vm_physmem[psi].pgs[try - vm_physmem[psi].start];
312 if (VM_PAGE_IS_FREE(pg) == 0)
313 continue;
314
315 uvm_pglist_add(pg, rlist);
316 if (--(*todo) == 0)
317 break;
318 }
319 }
320
321 static int
322 uvm_pglistalloc_simple(size, low, high, rlist, waitok)
323 psize_t size;
324 paddr_t low, high;
325 struct pglist *rlist;
326 int waitok;
327 {
328 int fl, psi, s, todo, error;
329
330 /* Default to "lose". */
331 error = ENOMEM;
332
333 todo = size / PAGE_SIZE;
334
335 again:
336 /*
337 * Block all memory allocation and lock the free list.
338 */
339
340 s = uvm_lock_fpageq();
341
342 /* Are there even any free pages? */
343 if (uvmexp.free <= (uvmexp.reserve_pagedaemon + uvmexp.reserve_kernel))
344 goto out;
345
346 for (fl = 0; fl < VM_NFREELIST; fl++) {
347 #if (VM_PHYSSEG_STRAT == VM_PSTRAT_BIGFIRST)
348 for (psi = vm_nphysseg - 1 ; psi >= 0 ; psi--)
349 #else
350 for (psi = 0 ; psi < vm_nphysseg ; psi++)
351 #endif
352 {
353 if (vm_physmem[psi].free_list != fl)
354 continue;
355
356 uvm_pglistalloc_s_ps(psi, low, high, rlist, &todo);
357 if (todo == 0) {
358 error = 0;
359 goto out;
360 }
361 }
362
363 }
364
365 out:
366 /*
367 * check to see if we need to generate some free pages waking
368 * the pagedaemon.
369 */
370
371 UVM_KICK_PDAEMON();
372 uvm_unlock_fpageq(s);
373 if (error) {
374 if (waitok) {
375 /* XXX perhaps some time limitation? */
376 #ifdef DEBUG
377 printf("pglistalloc waiting\n");
378 #endif
379 uvm_wait("pglalloc");
380 goto again;
381 } else
382 uvm_pglistfree(rlist);
383 }
384 #if 0
385 if (!error)
386 printf("pgalloc: %lx..%lx\n",
387 TAILQ_FIRST(rlist)->phys_addr,
388 TAILQ_LAST(rlist, pglist)->phys_addr);
389 #endif
390 return (error);
391 }
392
393 int
394 uvm_pglistalloc(size, low, high, alignment, boundary, rlist, nsegs, waitok)
395 psize_t size;
396 paddr_t low, high, alignment, boundary;
397 struct pglist *rlist;
398 int nsegs, waitok;
399 {
400 int res;
401
402 KASSERT((alignment & (alignment - 1)) == 0);
403 KASSERT((boundary & (boundary - 1)) == 0);
404
405 /*
406 * Our allocations are always page granularity, so our alignment
407 * must be, too.
408 */
409 if (alignment < PAGE_SIZE)
410 alignment = PAGE_SIZE;
411 size = round_page(size);
412 low = roundup(low, alignment);
413
414 TAILQ_INIT(rlist);
415
416 if ((nsegs < size / PAGE_SIZE) || (alignment != PAGE_SIZE)
417 || (boundary != 0))
418 res = uvm_pglistalloc_contig(size, low, high, alignment,
419 boundary, rlist);
420 else
421 res = uvm_pglistalloc_simple(size, low, high, rlist, waitok);
422
423 return (res);
424 }
425
426 /*
427 * uvm_pglistfree: free a list of pages
428 *
429 * => pages should already be unmapped
430 */
431
432 void
433 uvm_pglistfree(list)
434 struct pglist *list;
435 {
436 struct vm_page *pg;
437 int s;
438
439 /*
440 * Lock the free list and free each page.
441 */
442
443 s = uvm_lock_fpageq();
444 while ((pg = TAILQ_FIRST(list)) != NULL) {
445 KASSERT((pg->pqflags & (PQ_ACTIVE|PQ_INACTIVE)) == 0);
446 TAILQ_REMOVE(list, pg, pageq);
447 pg->pqflags = PQ_FREE;
448 TAILQ_INSERT_TAIL(&uvm.page_free[uvm_page_lookup_freelist(pg)].
449 pgfl_buckets[VM_PGCOLOR_BUCKET(pg)].
450 pgfl_queues[PGFL_UNKNOWN], pg, pageq);
451 uvmexp.free++;
452 if (uvmexp.zeropages < UVM_PAGEZERO_TARGET)
453 uvm.page_idle_zero = vm_page_zero_enable;
454 STAT_DECR(uvm_pglistalloc_npages);
455 }
456 uvm_unlock_fpageq(s);
457 }
458