uvm_aobj.c revision 1.99.4.2 1 /* $NetBSD: uvm_aobj.c,v 1.99.4.2 2009/05/04 08:14:39 yamt Exp $ */
2
3 /*
4 * Copyright (c) 1998 Chuck Silvers, Charles D. Cranor and
5 * Washington University.
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. All advertising materials mentioning features or use of this software
17 * must display the following acknowledgement:
18 * This product includes software developed by Charles D. Cranor and
19 * Washington University.
20 * 4. The name of the author may not be used to endorse or promote products
21 * derived from this software without specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
24 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
25 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
26 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
27 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
28 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
32 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33 *
34 * from: Id: uvm_aobj.c,v 1.1.2.5 1998/02/06 05:14:38 chs Exp
35 */
36 /*
37 * uvm_aobj.c: anonymous memory uvm_object pager
38 *
39 * author: Chuck Silvers <chuq (at) chuq.com>
40 * started: Jan-1998
41 *
42 * - design mostly from Chuck Cranor
43 */
44
45 #include <sys/cdefs.h>
46 __KERNEL_RCSID(0, "$NetBSD: uvm_aobj.c,v 1.99.4.2 2009/05/04 08:14:39 yamt Exp $");
47
48 #include "opt_uvmhist.h"
49
50 #include <sys/param.h>
51 #include <sys/systm.h>
52 #include <sys/proc.h>
53 #include <sys/kernel.h>
54 #include <sys/kmem.h>
55 #include <sys/pool.h>
56
57 #include <uvm/uvm.h>
58
59 /*
60 * an aobj manages anonymous-memory backed uvm_objects. in addition
61 * to keeping the list of resident pages, it also keeps a list of
62 * allocated swap blocks. depending on the size of the aobj this list
63 * of allocated swap blocks is either stored in an array (small objects)
64 * or in a hash table (large objects).
65 */
66
67 /*
68 * local structures
69 */
70
71 /*
72 * for hash tables, we break the address space of the aobj into blocks
73 * of UAO_SWHASH_CLUSTER_SIZE pages. we require the cluster size to
74 * be a power of two.
75 */
76
77 #define UAO_SWHASH_CLUSTER_SHIFT 4
78 #define UAO_SWHASH_CLUSTER_SIZE (1 << UAO_SWHASH_CLUSTER_SHIFT)
79
80 /* get the "tag" for this page index */
81 #define UAO_SWHASH_ELT_TAG(PAGEIDX) \
82 ((PAGEIDX) >> UAO_SWHASH_CLUSTER_SHIFT)
83
84 #define UAO_SWHASH_ELT_PAGESLOT_IDX(PAGEIDX) \
85 ((PAGEIDX) & (UAO_SWHASH_CLUSTER_SIZE - 1))
86
87 /* given an ELT and a page index, find the swap slot */
88 #define UAO_SWHASH_ELT_PAGESLOT(ELT, PAGEIDX) \
89 ((ELT)->slots[UAO_SWHASH_ELT_PAGESLOT_IDX(PAGEIDX)])
90
91 /* given an ELT, return its pageidx base */
92 #define UAO_SWHASH_ELT_PAGEIDX_BASE(ELT) \
93 ((ELT)->tag << UAO_SWHASH_CLUSTER_SHIFT)
94
95 /*
96 * the swhash hash function
97 */
98
99 #define UAO_SWHASH_HASH(AOBJ, PAGEIDX) \
100 (&(AOBJ)->u_swhash[(((PAGEIDX) >> UAO_SWHASH_CLUSTER_SHIFT) \
101 & (AOBJ)->u_swhashmask)])
102
103 /*
104 * the swhash threshhold determines if we will use an array or a
105 * hash table to store the list of allocated swap blocks.
106 */
107
108 #define UAO_SWHASH_THRESHOLD (UAO_SWHASH_CLUSTER_SIZE * 4)
109 #define UAO_USES_SWHASH(AOBJ) \
110 ((AOBJ)->u_pages > UAO_SWHASH_THRESHOLD) /* use hash? */
111
112 /*
113 * the number of buckets in a swhash, with an upper bound
114 */
115
116 #define UAO_SWHASH_MAXBUCKETS 256
117 #define UAO_SWHASH_BUCKETS(AOBJ) \
118 (MIN((AOBJ)->u_pages >> UAO_SWHASH_CLUSTER_SHIFT, \
119 UAO_SWHASH_MAXBUCKETS))
120
121 /*
122 * uao_swhash_elt: when a hash table is being used, this structure defines
123 * the format of an entry in the bucket list.
124 */
125
126 struct uao_swhash_elt {
127 LIST_ENTRY(uao_swhash_elt) list; /* the hash list */
128 voff_t tag; /* our 'tag' */
129 int count; /* our number of active slots */
130 int slots[UAO_SWHASH_CLUSTER_SIZE]; /* the slots */
131 };
132
133 /*
134 * uao_swhash: the swap hash table structure
135 */
136
137 LIST_HEAD(uao_swhash, uao_swhash_elt);
138
139 /*
140 * uao_swhash_elt_pool: pool of uao_swhash_elt structures
141 * NOTE: Pages for this pool must not come from a pageable kernel map!
142 */
143 static POOL_INIT(uao_swhash_elt_pool, sizeof(struct uao_swhash_elt), 0, 0, 0,
144 "uaoeltpl", NULL, IPL_VM);
145
146 static struct pool_cache uvm_aobj_cache;
147
148 /*
149 * uvm_aobj: the actual anon-backed uvm_object
150 *
151 * => the uvm_object is at the top of the structure, this allows
152 * (struct uvm_aobj *) == (struct uvm_object *)
153 * => only one of u_swslots and u_swhash is used in any given aobj
154 */
155
156 struct uvm_aobj {
157 struct uvm_object u_obj; /* has: lock, pgops, memq, #pages, #refs */
158 pgoff_t u_pages; /* number of pages in entire object */
159 int u_flags; /* the flags (see uvm_aobj.h) */
160 int *u_swslots; /* array of offset->swapslot mappings */
161 /*
162 * hashtable of offset->swapslot mappings
163 * (u_swhash is an array of bucket heads)
164 */
165 struct uao_swhash *u_swhash;
166 u_long u_swhashmask; /* mask for hashtable */
167 LIST_ENTRY(uvm_aobj) u_list; /* global list of aobjs */
168 };
169
170 /*
171 * local functions
172 */
173
174 static void uao_free(struct uvm_aobj *);
175 static int uao_get(struct uvm_object *, voff_t, struct vm_page **,
176 int *, int, vm_prot_t, int, int);
177 static int uao_put(struct uvm_object *, voff_t, voff_t, int);
178
179 static void uao_detach_locked(struct uvm_object *);
180 static void uao_reference_locked(struct uvm_object *);
181
182 #if defined(VMSWAP)
183 static struct uao_swhash_elt *uao_find_swhash_elt
184 (struct uvm_aobj *, int, bool);
185
186 static bool uao_pagein(struct uvm_aobj *, int, int);
187 static bool uao_pagein_page(struct uvm_aobj *, int);
188 static void uao_dropswap_range1(struct uvm_aobj *, voff_t, voff_t);
189 #endif /* defined(VMSWAP) */
190
191 /*
192 * aobj_pager
193 *
194 * note that some functions (e.g. put) are handled elsewhere
195 */
196
197 const struct uvm_pagerops aobj_pager = {
198 .pgo_reference = uao_reference,
199 .pgo_detach = uao_detach,
200 .pgo_get = uao_get,
201 .pgo_put = uao_put,
202 };
203
204 /*
205 * uao_list: global list of active aobjs, locked by uao_list_lock
206 */
207
208 static LIST_HEAD(aobjlist, uvm_aobj) uao_list;
209 static kmutex_t uao_list_lock;
210
211 /*
212 * functions
213 */
214
215 /*
216 * hash table/array related functions
217 */
218
219 #if defined(VMSWAP)
220
221 /*
222 * uao_find_swhash_elt: find (or create) a hash table entry for a page
223 * offset.
224 *
225 * => the object should be locked by the caller
226 */
227
228 static struct uao_swhash_elt *
229 uao_find_swhash_elt(struct uvm_aobj *aobj, int pageidx, bool create)
230 {
231 struct uao_swhash *swhash;
232 struct uao_swhash_elt *elt;
233 voff_t page_tag;
234
235 swhash = UAO_SWHASH_HASH(aobj, pageidx);
236 page_tag = UAO_SWHASH_ELT_TAG(pageidx);
237
238 /*
239 * now search the bucket for the requested tag
240 */
241
242 LIST_FOREACH(elt, swhash, list) {
243 if (elt->tag == page_tag) {
244 return elt;
245 }
246 }
247 if (!create) {
248 return NULL;
249 }
250
251 /*
252 * allocate a new entry for the bucket and init/insert it in
253 */
254
255 elt = pool_get(&uao_swhash_elt_pool, PR_NOWAIT);
256 if (elt == NULL) {
257 return NULL;
258 }
259 LIST_INSERT_HEAD(swhash, elt, list);
260 elt->tag = page_tag;
261 elt->count = 0;
262 memset(elt->slots, 0, sizeof(elt->slots));
263 return elt;
264 }
265
266 /*
267 * uao_find_swslot: find the swap slot number for an aobj/pageidx
268 *
269 * => object must be locked by caller
270 */
271
272 int
273 uao_find_swslot(struct uvm_object *uobj, int pageidx)
274 {
275 struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
276 struct uao_swhash_elt *elt;
277
278 /*
279 * if noswap flag is set, then we never return a slot
280 */
281
282 if (aobj->u_flags & UAO_FLAG_NOSWAP)
283 return(0);
284
285 /*
286 * if hashing, look in hash table.
287 */
288
289 if (UAO_USES_SWHASH(aobj)) {
290 elt = uao_find_swhash_elt(aobj, pageidx, false);
291 if (elt)
292 return(UAO_SWHASH_ELT_PAGESLOT(elt, pageidx));
293 else
294 return(0);
295 }
296
297 /*
298 * otherwise, look in the array
299 */
300
301 return(aobj->u_swslots[pageidx]);
302 }
303
304 /*
305 * uao_set_swslot: set the swap slot for a page in an aobj.
306 *
307 * => setting a slot to zero frees the slot
308 * => object must be locked by caller
309 * => we return the old slot number, or -1 if we failed to allocate
310 * memory to record the new slot number
311 */
312
313 int
314 uao_set_swslot(struct uvm_object *uobj, int pageidx, int slot)
315 {
316 struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
317 struct uao_swhash_elt *elt;
318 int oldslot;
319 UVMHIST_FUNC("uao_set_swslot"); UVMHIST_CALLED(pdhist);
320 UVMHIST_LOG(pdhist, "aobj %p pageidx %d slot %d",
321 aobj, pageidx, slot, 0);
322
323 /*
324 * if noswap flag is set, then we can't set a non-zero slot.
325 */
326
327 if (aobj->u_flags & UAO_FLAG_NOSWAP) {
328 if (slot == 0)
329 return(0);
330
331 printf("uao_set_swslot: uobj = %p\n", uobj);
332 panic("uao_set_swslot: NOSWAP object");
333 }
334
335 /*
336 * are we using a hash table? if so, add it in the hash.
337 */
338
339 if (UAO_USES_SWHASH(aobj)) {
340
341 /*
342 * Avoid allocating an entry just to free it again if
343 * the page had not swap slot in the first place, and
344 * we are freeing.
345 */
346
347 elt = uao_find_swhash_elt(aobj, pageidx, slot != 0);
348 if (elt == NULL) {
349 return slot ? -1 : 0;
350 }
351
352 oldslot = UAO_SWHASH_ELT_PAGESLOT(elt, pageidx);
353 UAO_SWHASH_ELT_PAGESLOT(elt, pageidx) = slot;
354
355 /*
356 * now adjust the elt's reference counter and free it if we've
357 * dropped it to zero.
358 */
359
360 if (slot) {
361 if (oldslot == 0)
362 elt->count++;
363 } else {
364 if (oldslot)
365 elt->count--;
366
367 if (elt->count == 0) {
368 LIST_REMOVE(elt, list);
369 pool_put(&uao_swhash_elt_pool, elt);
370 }
371 }
372 } else {
373 /* we are using an array */
374 oldslot = aobj->u_swslots[pageidx];
375 aobj->u_swslots[pageidx] = slot;
376 }
377 return (oldslot);
378 }
379
380 #endif /* defined(VMSWAP) */
381
382 /*
383 * end of hash/array functions
384 */
385
386 /*
387 * uao_free: free all resources held by an aobj, and then free the aobj
388 *
389 * => the aobj should be dead
390 */
391
392 static void
393 uao_free(struct uvm_aobj *aobj)
394 {
395 int swpgonlydelta = 0;
396
397
398 #if defined(VMSWAP)
399 uao_dropswap_range1(aobj, 0, 0);
400 #endif /* defined(VMSWAP) */
401
402 mutex_exit(&aobj->u_obj.vmobjlock);
403
404 #if defined(VMSWAP)
405 if (UAO_USES_SWHASH(aobj)) {
406
407 /*
408 * free the hash table itself.
409 */
410
411 hashdone(aobj->u_swhash, HASH_LIST, aobj->u_swhashmask);
412 } else {
413
414 /*
415 * free the array itsself.
416 */
417
418 kmem_free(aobj->u_swslots, aobj->u_pages * sizeof(int));
419 }
420 #endif /* defined(VMSWAP) */
421
422 /*
423 * finally free the aobj itself
424 */
425
426 UVM_OBJ_DESTROY(&aobj->u_obj);
427 pool_cache_put(&uvm_aobj_cache, aobj);
428
429 /*
430 * adjust the counter of pages only in swap for all
431 * the swap slots we've freed.
432 */
433
434 if (swpgonlydelta > 0) {
435 mutex_enter(&uvm_swap_data_lock);
436 KASSERT(uvmexp.swpgonly >= swpgonlydelta);
437 uvmexp.swpgonly -= swpgonlydelta;
438 mutex_exit(&uvm_swap_data_lock);
439 }
440 }
441
442 /*
443 * pager functions
444 */
445
446 /*
447 * uao_create: create an aobj of the given size and return its uvm_object.
448 *
449 * => for normal use, flags are always zero
450 * => for the kernel object, the flags are:
451 * UAO_FLAG_KERNOBJ - allocate the kernel object (can only happen once)
452 * UAO_FLAG_KERNSWAP - enable swapping of kernel object (" ")
453 */
454
455 struct uvm_object *
456 uao_create(vsize_t size, int flags)
457 {
458 static struct uvm_aobj kernel_object_store;
459 static int kobj_alloced = 0;
460 pgoff_t pages = round_page(size) >> PAGE_SHIFT;
461 struct uvm_aobj *aobj;
462 int refs;
463
464 /*
465 * malloc a new aobj unless we are asked for the kernel object
466 */
467
468 if (flags & UAO_FLAG_KERNOBJ) {
469 KASSERT(!kobj_alloced);
470 aobj = &kernel_object_store;
471 aobj->u_pages = pages;
472 aobj->u_flags = UAO_FLAG_NOSWAP;
473 refs = UVM_OBJ_KERN;
474 kobj_alloced = UAO_FLAG_KERNOBJ;
475 } else if (flags & UAO_FLAG_KERNSWAP) {
476 KASSERT(kobj_alloced == UAO_FLAG_KERNOBJ);
477 aobj = &kernel_object_store;
478 kobj_alloced = UAO_FLAG_KERNSWAP;
479 refs = 0xdeadbeaf; /* XXX: gcc */
480 } else {
481 aobj = pool_cache_get(&uvm_aobj_cache, PR_WAITOK);
482 aobj->u_pages = pages;
483 aobj->u_flags = 0;
484 refs = 1;
485 }
486
487 /*
488 * allocate hash/array if necessary
489 *
490 * note: in the KERNSWAP case no need to worry about locking since
491 * we are still booting we should be the only thread around.
492 */
493
494 if (flags == 0 || (flags & UAO_FLAG_KERNSWAP) != 0) {
495 #if defined(VMSWAP)
496 const int kernswap = (flags & UAO_FLAG_KERNSWAP) != 0;
497
498 /* allocate hash table or array depending on object size */
499 if (UAO_USES_SWHASH(aobj)) {
500 aobj->u_swhash = hashinit(UAO_SWHASH_BUCKETS(aobj),
501 HASH_LIST, kernswap ? false : true,
502 &aobj->u_swhashmask);
503 if (aobj->u_swhash == NULL)
504 panic("uao_create: hashinit swhash failed");
505 } else {
506 aobj->u_swslots = kmem_zalloc(pages * sizeof(int),
507 kernswap ? KM_NOSLEEP : KM_SLEEP);
508 if (aobj->u_swslots == NULL)
509 panic("uao_create: malloc swslots failed");
510 }
511 #endif /* defined(VMSWAP) */
512
513 if (flags) {
514 aobj->u_flags &= ~UAO_FLAG_NOSWAP; /* clear noswap */
515 return(&aobj->u_obj);
516 }
517 }
518
519 /*
520 * init aobj fields
521 */
522
523 UVM_OBJ_INIT(&aobj->u_obj, &aobj_pager, refs);
524
525 /*
526 * now that aobj is ready, add it to the global list
527 */
528
529 mutex_enter(&uao_list_lock);
530 LIST_INSERT_HEAD(&uao_list, aobj, u_list);
531 mutex_exit(&uao_list_lock);
532 return(&aobj->u_obj);
533 }
534
535
536
537 /*
538 * uao_init: set up aobj pager subsystem
539 *
540 * => called at boot time from uvm_pager_init()
541 */
542
543 void
544 uao_init(void)
545 {
546 static int uao_initialized;
547
548 if (uao_initialized)
549 return;
550 uao_initialized = true;
551 LIST_INIT(&uao_list);
552 mutex_init(&uao_list_lock, MUTEX_DEFAULT, IPL_NONE);
553 pool_cache_bootstrap(&uvm_aobj_cache, sizeof(struct uvm_aobj), 0, 0,
554 0, "aobj", NULL, IPL_NONE, NULL, NULL, NULL);
555 }
556
557 /*
558 * uao_reference: add a ref to an aobj
559 *
560 * => aobj must be unlocked
561 * => just lock it and call the locked version
562 */
563
564 void
565 uao_reference(struct uvm_object *uobj)
566 {
567
568 /*
569 * kernel_object already has plenty of references, leave it alone.
570 */
571
572 if (UVM_OBJ_IS_KERN_OBJECT(uobj))
573 return;
574
575 mutex_enter(&uobj->vmobjlock);
576 uao_reference_locked(uobj);
577 mutex_exit(&uobj->vmobjlock);
578 }
579
580 /*
581 * uao_reference_locked: add a ref to an aobj that is already locked
582 *
583 * => aobj must be locked
584 * this needs to be separate from the normal routine
585 * since sometimes we need to add a reference to an aobj when
586 * it's already locked.
587 */
588
589 static void
590 uao_reference_locked(struct uvm_object *uobj)
591 {
592 UVMHIST_FUNC("uao_reference"); UVMHIST_CALLED(maphist);
593
594 /*
595 * kernel_object already has plenty of references, leave it alone.
596 */
597
598 if (UVM_OBJ_IS_KERN_OBJECT(uobj))
599 return;
600
601 uobj->uo_refs++;
602 UVMHIST_LOG(maphist, "<- done (uobj=0x%x, ref = %d)",
603 uobj, uobj->uo_refs,0,0);
604 }
605
606 /*
607 * uao_detach: drop a reference to an aobj
608 *
609 * => aobj must be unlocked
610 * => just lock it and call the locked version
611 */
612
613 void
614 uao_detach(struct uvm_object *uobj)
615 {
616
617 /*
618 * detaching from kernel_object is a noop.
619 */
620
621 if (UVM_OBJ_IS_KERN_OBJECT(uobj))
622 return;
623
624 mutex_enter(&uobj->vmobjlock);
625 uao_detach_locked(uobj);
626 }
627
628 /*
629 * uao_detach_locked: drop a reference to an aobj
630 *
631 * => aobj must be locked, and is unlocked (or freed) upon return.
632 * this needs to be separate from the normal routine
633 * since sometimes we need to detach from an aobj when
634 * it's already locked.
635 */
636
637 static void
638 uao_detach_locked(struct uvm_object *uobj)
639 {
640 struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
641 struct vm_page *pg;
642 UVMHIST_FUNC("uao_detach"); UVMHIST_CALLED(maphist);
643
644 /*
645 * detaching from kernel_object is a noop.
646 */
647
648 if (UVM_OBJ_IS_KERN_OBJECT(uobj)) {
649 mutex_exit(&uobj->vmobjlock);
650 return;
651 }
652
653 UVMHIST_LOG(maphist," (uobj=0x%x) ref=%d", uobj,uobj->uo_refs,0,0);
654 uobj->uo_refs--;
655 if (uobj->uo_refs) {
656 mutex_exit(&uobj->vmobjlock);
657 UVMHIST_LOG(maphist, "<- done (rc>0)", 0,0,0,0);
658 return;
659 }
660
661 /*
662 * remove the aobj from the global list.
663 */
664
665 mutex_enter(&uao_list_lock);
666 LIST_REMOVE(aobj, u_list);
667 mutex_exit(&uao_list_lock);
668
669 /*
670 * free all the pages left in the aobj. for each page,
671 * when the page is no longer busy (and thus after any disk i/o that
672 * it's involved in is complete), release any swap resources and
673 * free the page itself.
674 */
675
676 mutex_enter(&uvm_pageqlock);
677 while ((pg = TAILQ_FIRST(&uobj->memq)) != NULL) {
678 pmap_page_protect(pg, VM_PROT_NONE);
679 if (pg->flags & PG_BUSY) {
680 pg->flags |= PG_WANTED;
681 mutex_exit(&uvm_pageqlock);
682 UVM_UNLOCK_AND_WAIT(pg, &uobj->vmobjlock, false,
683 "uao_det", 0);
684 mutex_enter(&uobj->vmobjlock);
685 mutex_enter(&uvm_pageqlock);
686 continue;
687 }
688 uao_dropswap(&aobj->u_obj, pg->offset >> PAGE_SHIFT);
689 uvm_pagefree(pg);
690 }
691 mutex_exit(&uvm_pageqlock);
692
693 /*
694 * finally, free the aobj itself.
695 */
696
697 uao_free(aobj);
698 }
699
700 /*
701 * uao_put: flush pages out of a uvm object
702 *
703 * => object should be locked by caller. we may _unlock_ the object
704 * if (and only if) we need to clean a page (PGO_CLEANIT).
705 * XXXJRT Currently, however, we don't. In the case of cleaning
706 * XXXJRT a page, we simply just deactivate it. Should probably
707 * XXXJRT handle this better, in the future (although "flushing"
708 * XXXJRT anonymous memory isn't terribly important).
709 * => if PGO_CLEANIT is not set, then we will neither unlock the object
710 * or block.
711 * => if PGO_ALLPAGE is set, then all pages in the object are valid targets
712 * for flushing.
713 * => NOTE: we rely on the fact that the object's memq is a TAILQ and
714 * that new pages are inserted on the tail end of the list. thus,
715 * we can make a complete pass through the object in one go by starting
716 * at the head and working towards the tail (new pages are put in
717 * front of us).
718 * => NOTE: we are allowed to lock the page queues, so the caller
719 * must not be holding the lock on them [e.g. pagedaemon had
720 * better not call us with the queues locked]
721 * => we return 0 unless we encountered some sort of I/O error
722 * XXXJRT currently never happens, as we never directly initiate
723 * XXXJRT I/O
724 *
725 * note on page traversal:
726 * we can traverse the pages in an object either by going down the
727 * linked list in "uobj->memq", or we can go over the address range
728 * by page doing hash table lookups for each address. depending
729 * on how many pages are in the object it may be cheaper to do one
730 * or the other. we set "by_list" to true if we are using memq.
731 * if the cost of a hash lookup was equal to the cost of the list
732 * traversal we could compare the number of pages in the start->stop
733 * range to the total number of pages in the object. however, it
734 * seems that a hash table lookup is more expensive than the linked
735 * list traversal, so we multiply the number of pages in the
736 * start->stop range by a penalty which we define below.
737 */
738
739 static int
740 uao_put(struct uvm_object *uobj, voff_t start, voff_t stop, int flags)
741 {
742 struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
743 struct vm_page *pg, *nextpg, curmp, endmp;
744 bool by_list;
745 voff_t curoff;
746 UVMHIST_FUNC("uao_put"); UVMHIST_CALLED(maphist);
747
748 KASSERT(mutex_owned(&uobj->vmobjlock));
749
750 curoff = 0;
751 if (flags & PGO_ALLPAGES) {
752 start = 0;
753 stop = aobj->u_pages << PAGE_SHIFT;
754 by_list = true; /* always go by the list */
755 } else {
756 start = trunc_page(start);
757 if (stop == 0) {
758 stop = aobj->u_pages << PAGE_SHIFT;
759 } else {
760 stop = round_page(stop);
761 }
762 if (stop > (aobj->u_pages << PAGE_SHIFT)) {
763 printf("uao_flush: strange, got an out of range "
764 "flush (fixed)\n");
765 stop = aobj->u_pages << PAGE_SHIFT;
766 }
767 by_list = (uobj->uo_npages <=
768 ((stop - start) >> PAGE_SHIFT) * UVM_PAGE_TREE_PENALTY);
769 }
770 UVMHIST_LOG(maphist,
771 " flush start=0x%lx, stop=0x%x, by_list=%d, flags=0x%x",
772 start, stop, by_list, flags);
773
774 /*
775 * Don't need to do any work here if we're not freeing
776 * or deactivating pages.
777 */
778
779 if ((flags & (PGO_DEACTIVATE|PGO_FREE)) == 0) {
780 mutex_exit(&uobj->vmobjlock);
781 return 0;
782 }
783
784 /*
785 * Initialize the marker pages. See the comment in
786 * genfs_putpages() also.
787 */
788
789 curmp.uobject = uobj;
790 curmp.offset = (voff_t)-1;
791 curmp.flags = PG_BUSY;
792 endmp.uobject = uobj;
793 endmp.offset = (voff_t)-1;
794 endmp.flags = PG_BUSY;
795
796 /*
797 * now do it. note: we must update nextpg in the body of loop or we
798 * will get stuck. we need to use nextpg if we'll traverse the list
799 * because we may free "pg" before doing the next loop.
800 */
801
802 if (by_list) {
803 TAILQ_INSERT_TAIL(&uobj->memq, &endmp, listq.queue);
804 nextpg = TAILQ_FIRST(&uobj->memq);
805 uvm_lwp_hold(curlwp);
806 } else {
807 curoff = start;
808 nextpg = NULL; /* Quell compiler warning */
809 }
810
811 /* locked: uobj */
812 for (;;) {
813 if (by_list) {
814 pg = nextpg;
815 if (pg == &endmp)
816 break;
817 nextpg = TAILQ_NEXT(pg, listq.queue);
818 if (pg->offset < start || pg->offset >= stop)
819 continue;
820 } else {
821 if (curoff < stop) {
822 pg = uvm_pagelookup(uobj, curoff);
823 curoff += PAGE_SIZE;
824 } else
825 break;
826 if (pg == NULL)
827 continue;
828 }
829
830 /*
831 * wait and try again if the page is busy.
832 */
833
834 if (pg->flags & PG_BUSY) {
835 if (by_list) {
836 TAILQ_INSERT_BEFORE(pg, &curmp, listq.queue);
837 }
838 pg->flags |= PG_WANTED;
839 UVM_UNLOCK_AND_WAIT(pg, &uobj->vmobjlock, 0,
840 "uao_put", 0);
841 mutex_enter(&uobj->vmobjlock);
842 if (by_list) {
843 nextpg = TAILQ_NEXT(&curmp, listq.queue);
844 TAILQ_REMOVE(&uobj->memq, &curmp,
845 listq.queue);
846 } else
847 curoff -= PAGE_SIZE;
848 continue;
849 }
850
851 switch (flags & (PGO_CLEANIT|PGO_FREE|PGO_DEACTIVATE)) {
852
853 /*
854 * XXX In these first 3 cases, we always just
855 * XXX deactivate the page. We may want to
856 * XXX handle the different cases more specifically
857 * XXX in the future.
858 */
859
860 case PGO_CLEANIT|PGO_FREE:
861 case PGO_CLEANIT|PGO_DEACTIVATE:
862 case PGO_DEACTIVATE:
863 deactivate_it:
864 mutex_enter(&uvm_pageqlock);
865 /* skip the page if it's wired */
866 if (pg->wire_count == 0) {
867 uvm_pagedeactivate(pg);
868 }
869 mutex_exit(&uvm_pageqlock);
870 break;
871
872 case PGO_FREE:
873 /*
874 * If there are multiple references to
875 * the object, just deactivate the page.
876 */
877
878 if (uobj->uo_refs > 1)
879 goto deactivate_it;
880
881 /*
882 * free the swap slot and the page.
883 */
884
885 pmap_page_protect(pg, VM_PROT_NONE);
886
887 /*
888 * freeing swapslot here is not strictly necessary.
889 * however, leaving it here doesn't save much
890 * because we need to update swap accounting anyway.
891 */
892
893 uao_dropswap(uobj, pg->offset >> PAGE_SHIFT);
894 mutex_enter(&uvm_pageqlock);
895 uvm_pagefree(pg);
896 mutex_exit(&uvm_pageqlock);
897 break;
898
899 default:
900 panic("%s: impossible", __func__);
901 }
902 }
903 if (by_list) {
904 TAILQ_REMOVE(&uobj->memq, &endmp, listq.queue);
905 uvm_lwp_rele(curlwp);
906 }
907 mutex_exit(&uobj->vmobjlock);
908 return 0;
909 }
910
911 /*
912 * uao_get: fetch me a page
913 *
914 * we have three cases:
915 * 1: page is resident -> just return the page.
916 * 2: page is zero-fill -> allocate a new page and zero it.
917 * 3: page is swapped out -> fetch the page from swap.
918 *
919 * cases 1 and 2 can be handled with PGO_LOCKED, case 3 cannot.
920 * so, if the "center" page hits case 3 (or any page, with PGO_ALLPAGES),
921 * then we will need to return EBUSY.
922 *
923 * => prefer map unlocked (not required)
924 * => object must be locked! we will _unlock_ it before starting any I/O.
925 * => flags: PGO_ALLPAGES: get all of the pages
926 * PGO_LOCKED: fault data structures are locked
927 * => NOTE: offset is the offset of pps[0], _NOT_ pps[centeridx]
928 * => NOTE: caller must check for released pages!!
929 */
930
931 static int
932 uao_get(struct uvm_object *uobj, voff_t offset, struct vm_page **pps,
933 int *npagesp, int centeridx, vm_prot_t access_type, int advice, int flags)
934 {
935 #if defined(VMSWAP)
936 struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
937 #endif /* defined(VMSWAP) */
938 voff_t current_offset;
939 struct vm_page *ptmp = NULL; /* Quell compiler warning */
940 int lcv, gotpages, maxpages, swslot, pageidx;
941 bool done;
942 UVMHIST_FUNC("uao_get"); UVMHIST_CALLED(pdhist);
943
944 UVMHIST_LOG(pdhist, "aobj=%p offset=%d, flags=%d",
945 (struct uvm_aobj *)uobj, offset, flags,0);
946
947 /*
948 * get number of pages
949 */
950
951 maxpages = *npagesp;
952
953 /*
954 * step 1: handled the case where fault data structures are locked.
955 */
956
957 if (flags & PGO_LOCKED) {
958
959 /*
960 * step 1a: get pages that are already resident. only do
961 * this if the data structures are locked (i.e. the first
962 * time through).
963 */
964
965 done = true; /* be optimistic */
966 gotpages = 0; /* # of pages we got so far */
967 for (lcv = 0, current_offset = offset ; lcv < maxpages ;
968 lcv++, current_offset += PAGE_SIZE) {
969 /* do we care about this page? if not, skip it */
970 if (pps[lcv] == PGO_DONTCARE)
971 continue;
972 ptmp = uvm_pagelookup(uobj, current_offset);
973
974 /*
975 * if page is new, attempt to allocate the page,
976 * zero-fill'd.
977 */
978
979 if (ptmp == NULL && uao_find_swslot(&aobj->u_obj,
980 current_offset >> PAGE_SHIFT) == 0) {
981 ptmp = uvm_pagealloc(uobj, current_offset,
982 NULL, UVM_PGA_ZERO);
983 if (ptmp) {
984 /* new page */
985 ptmp->flags &= ~(PG_FAKE);
986 ptmp->pqflags |= PQ_AOBJ;
987 goto gotpage;
988 }
989 }
990
991 /*
992 * to be useful must get a non-busy page
993 */
994
995 if (ptmp == NULL || (ptmp->flags & PG_BUSY) != 0) {
996 if (lcv == centeridx ||
997 (flags & PGO_ALLPAGES) != 0)
998 /* need to do a wait or I/O! */
999 done = false;
1000 continue;
1001 }
1002
1003 /*
1004 * useful page: busy/lock it and plug it in our
1005 * result array
1006 */
1007
1008 /* caller must un-busy this page */
1009 ptmp->flags |= PG_BUSY;
1010 UVM_PAGE_OWN(ptmp, "uao_get1");
1011 gotpage:
1012 pps[lcv] = ptmp;
1013 gotpages++;
1014 }
1015
1016 /*
1017 * step 1b: now we've either done everything needed or we
1018 * to unlock and do some waiting or I/O.
1019 */
1020
1021 UVMHIST_LOG(pdhist, "<- done (done=%d)", done, 0,0,0);
1022 *npagesp = gotpages;
1023 if (done)
1024 return 0;
1025 else
1026 return EBUSY;
1027 }
1028
1029 /*
1030 * step 2: get non-resident or busy pages.
1031 * object is locked. data structures are unlocked.
1032 */
1033
1034 if ((flags & PGO_SYNCIO) == 0) {
1035 goto done;
1036 }
1037
1038 for (lcv = 0, current_offset = offset ; lcv < maxpages ;
1039 lcv++, current_offset += PAGE_SIZE) {
1040
1041 /*
1042 * - skip over pages we've already gotten or don't want
1043 * - skip over pages we don't _have_ to get
1044 */
1045
1046 if (pps[lcv] != NULL ||
1047 (lcv != centeridx && (flags & PGO_ALLPAGES) == 0))
1048 continue;
1049
1050 pageidx = current_offset >> PAGE_SHIFT;
1051
1052 /*
1053 * we have yet to locate the current page (pps[lcv]). we
1054 * first look for a page that is already at the current offset.
1055 * if we find a page, we check to see if it is busy or
1056 * released. if that is the case, then we sleep on the page
1057 * until it is no longer busy or released and repeat the lookup.
1058 * if the page we found is neither busy nor released, then we
1059 * busy it (so we own it) and plug it into pps[lcv]. this
1060 * 'break's the following while loop and indicates we are
1061 * ready to move on to the next page in the "lcv" loop above.
1062 *
1063 * if we exit the while loop with pps[lcv] still set to NULL,
1064 * then it means that we allocated a new busy/fake/clean page
1065 * ptmp in the object and we need to do I/O to fill in the data.
1066 */
1067
1068 /* top of "pps" while loop */
1069 while (pps[lcv] == NULL) {
1070 /* look for a resident page */
1071 ptmp = uvm_pagelookup(uobj, current_offset);
1072
1073 /* not resident? allocate one now (if we can) */
1074 if (ptmp == NULL) {
1075
1076 ptmp = uvm_pagealloc(uobj, current_offset,
1077 NULL, 0);
1078
1079 /* out of RAM? */
1080 if (ptmp == NULL) {
1081 mutex_exit(&uobj->vmobjlock);
1082 UVMHIST_LOG(pdhist,
1083 "sleeping, ptmp == NULL\n",0,0,0,0);
1084 uvm_wait("uao_getpage");
1085 mutex_enter(&uobj->vmobjlock);
1086 continue;
1087 }
1088
1089 /*
1090 * safe with PQ's unlocked: because we just
1091 * alloc'd the page
1092 */
1093
1094 ptmp->pqflags |= PQ_AOBJ;
1095
1096 /*
1097 * got new page ready for I/O. break pps while
1098 * loop. pps[lcv] is still NULL.
1099 */
1100
1101 break;
1102 }
1103
1104 /* page is there, see if we need to wait on it */
1105 if ((ptmp->flags & PG_BUSY) != 0) {
1106 ptmp->flags |= PG_WANTED;
1107 UVMHIST_LOG(pdhist,
1108 "sleeping, ptmp->flags 0x%x\n",
1109 ptmp->flags,0,0,0);
1110 UVM_UNLOCK_AND_WAIT(ptmp, &uobj->vmobjlock,
1111 false, "uao_get", 0);
1112 mutex_enter(&uobj->vmobjlock);
1113 continue;
1114 }
1115
1116 /*
1117 * if we get here then the page has become resident and
1118 * unbusy between steps 1 and 2. we busy it now (so we
1119 * own it) and set pps[lcv] (so that we exit the while
1120 * loop).
1121 */
1122
1123 /* we own it, caller must un-busy */
1124 ptmp->flags |= PG_BUSY;
1125 UVM_PAGE_OWN(ptmp, "uao_get2");
1126 pps[lcv] = ptmp;
1127 }
1128
1129 /*
1130 * if we own the valid page at the correct offset, pps[lcv] will
1131 * point to it. nothing more to do except go to the next page.
1132 */
1133
1134 if (pps[lcv])
1135 continue; /* next lcv */
1136
1137 /*
1138 * we have a "fake/busy/clean" page that we just allocated.
1139 * do the needed "i/o", either reading from swap or zeroing.
1140 */
1141
1142 swslot = uao_find_swslot(&aobj->u_obj, pageidx);
1143
1144 /*
1145 * just zero the page if there's nothing in swap.
1146 */
1147
1148 if (swslot == 0) {
1149
1150 /*
1151 * page hasn't existed before, just zero it.
1152 */
1153
1154 uvm_pagezero(ptmp);
1155 } else {
1156 #if defined(VMSWAP)
1157 int error;
1158
1159 UVMHIST_LOG(pdhist, "pagein from swslot %d",
1160 swslot, 0,0,0);
1161
1162 /*
1163 * page in the swapped-out page.
1164 * unlock object for i/o, relock when done.
1165 */
1166
1167 mutex_exit(&uobj->vmobjlock);
1168 error = uvm_swap_get(ptmp, swslot, PGO_SYNCIO);
1169 mutex_enter(&uobj->vmobjlock);
1170
1171 /*
1172 * I/O done. check for errors.
1173 */
1174
1175 if (error != 0) {
1176 UVMHIST_LOG(pdhist, "<- done (error=%d)",
1177 error,0,0,0);
1178 if (ptmp->flags & PG_WANTED)
1179 wakeup(ptmp);
1180
1181 /*
1182 * remove the swap slot from the aobj
1183 * and mark the aobj as having no real slot.
1184 * don't free the swap slot, thus preventing
1185 * it from being used again.
1186 */
1187
1188 swslot = uao_set_swslot(&aobj->u_obj, pageidx,
1189 SWSLOT_BAD);
1190 if (swslot > 0) {
1191 uvm_swap_markbad(swslot, 1);
1192 }
1193
1194 mutex_enter(&uvm_pageqlock);
1195 uvm_pagefree(ptmp);
1196 mutex_exit(&uvm_pageqlock);
1197 mutex_exit(&uobj->vmobjlock);
1198 return error;
1199 }
1200 #else /* defined(VMSWAP) */
1201 panic("%s: pagein", __func__);
1202 #endif /* defined(VMSWAP) */
1203 }
1204
1205 if ((access_type & VM_PROT_WRITE) == 0) {
1206 ptmp->flags |= PG_CLEAN;
1207 pmap_clear_modify(ptmp);
1208 }
1209
1210 /*
1211 * we got the page! clear the fake flag (indicates valid
1212 * data now in page) and plug into our result array. note
1213 * that page is still busy.
1214 *
1215 * it is the callers job to:
1216 * => check if the page is released
1217 * => unbusy the page
1218 * => activate the page
1219 */
1220
1221 ptmp->flags &= ~PG_FAKE;
1222 pps[lcv] = ptmp;
1223 }
1224
1225 /*
1226 * finally, unlock object and return.
1227 */
1228
1229 done:
1230 mutex_exit(&uobj->vmobjlock);
1231 UVMHIST_LOG(pdhist, "<- done (OK)",0,0,0,0);
1232 return 0;
1233 }
1234
1235 #if defined(VMSWAP)
1236
1237 /*
1238 * uao_dropswap: release any swap resources from this aobj page.
1239 *
1240 * => aobj must be locked or have a reference count of 0.
1241 */
1242
1243 void
1244 uao_dropswap(struct uvm_object *uobj, int pageidx)
1245 {
1246 int slot;
1247
1248 slot = uao_set_swslot(uobj, pageidx, 0);
1249 if (slot) {
1250 uvm_swap_free(slot, 1);
1251 }
1252 }
1253
1254 /*
1255 * page in every page in every aobj that is paged-out to a range of swslots.
1256 *
1257 * => nothing should be locked.
1258 * => returns true if pagein was aborted due to lack of memory.
1259 */
1260
1261 bool
1262 uao_swap_off(int startslot, int endslot)
1263 {
1264 struct uvm_aobj *aobj, *nextaobj;
1265 bool rv;
1266
1267 /*
1268 * walk the list of all aobjs.
1269 */
1270
1271 restart:
1272 mutex_enter(&uao_list_lock);
1273 for (aobj = LIST_FIRST(&uao_list);
1274 aobj != NULL;
1275 aobj = nextaobj) {
1276
1277 /*
1278 * try to get the object lock, start all over if we fail.
1279 * most of the time we'll get the aobj lock,
1280 * so this should be a rare case.
1281 */
1282
1283 if (!mutex_tryenter(&aobj->u_obj.vmobjlock)) {
1284 mutex_exit(&uao_list_lock);
1285 /* XXX Better than yielding but inadequate. */
1286 kpause("livelock", false, 1, NULL);
1287 goto restart;
1288 }
1289
1290 /*
1291 * add a ref to the aobj so it doesn't disappear
1292 * while we're working.
1293 */
1294
1295 uao_reference_locked(&aobj->u_obj);
1296
1297 /*
1298 * now it's safe to unlock the uao list.
1299 */
1300
1301 mutex_exit(&uao_list_lock);
1302
1303 /*
1304 * page in any pages in the swslot range.
1305 * if there's an error, abort and return the error.
1306 */
1307
1308 rv = uao_pagein(aobj, startslot, endslot);
1309 if (rv) {
1310 uao_detach_locked(&aobj->u_obj);
1311 return rv;
1312 }
1313
1314 /*
1315 * we're done with this aobj.
1316 * relock the list and drop our ref on the aobj.
1317 */
1318
1319 mutex_enter(&uao_list_lock);
1320 nextaobj = LIST_NEXT(aobj, u_list);
1321 uao_detach_locked(&aobj->u_obj);
1322 }
1323
1324 /*
1325 * done with traversal, unlock the list
1326 */
1327 mutex_exit(&uao_list_lock);
1328 return false;
1329 }
1330
1331
1332 /*
1333 * page in any pages from aobj in the given range.
1334 *
1335 * => aobj must be locked and is returned locked.
1336 * => returns true if pagein was aborted due to lack of memory.
1337 */
1338 static bool
1339 uao_pagein(struct uvm_aobj *aobj, int startslot, int endslot)
1340 {
1341 bool rv;
1342
1343 if (UAO_USES_SWHASH(aobj)) {
1344 struct uao_swhash_elt *elt;
1345 int buck;
1346
1347 restart:
1348 for (buck = aobj->u_swhashmask; buck >= 0; buck--) {
1349 for (elt = LIST_FIRST(&aobj->u_swhash[buck]);
1350 elt != NULL;
1351 elt = LIST_NEXT(elt, list)) {
1352 int i;
1353
1354 for (i = 0; i < UAO_SWHASH_CLUSTER_SIZE; i++) {
1355 int slot = elt->slots[i];
1356
1357 /*
1358 * if the slot isn't in range, skip it.
1359 */
1360
1361 if (slot < startslot ||
1362 slot >= endslot) {
1363 continue;
1364 }
1365
1366 /*
1367 * process the page,
1368 * the start over on this object
1369 * since the swhash elt
1370 * may have been freed.
1371 */
1372
1373 rv = uao_pagein_page(aobj,
1374 UAO_SWHASH_ELT_PAGEIDX_BASE(elt) + i);
1375 if (rv) {
1376 return rv;
1377 }
1378 goto restart;
1379 }
1380 }
1381 }
1382 } else {
1383 int i;
1384
1385 for (i = 0; i < aobj->u_pages; i++) {
1386 int slot = aobj->u_swslots[i];
1387
1388 /*
1389 * if the slot isn't in range, skip it
1390 */
1391
1392 if (slot < startslot || slot >= endslot) {
1393 continue;
1394 }
1395
1396 /*
1397 * process the page.
1398 */
1399
1400 rv = uao_pagein_page(aobj, i);
1401 if (rv) {
1402 return rv;
1403 }
1404 }
1405 }
1406
1407 return false;
1408 }
1409
1410 /*
1411 * page in a page from an aobj. used for swap_off.
1412 * returns true if pagein was aborted due to lack of memory.
1413 *
1414 * => aobj must be locked and is returned locked.
1415 */
1416
1417 static bool
1418 uao_pagein_page(struct uvm_aobj *aobj, int pageidx)
1419 {
1420 struct vm_page *pg;
1421 int rv, npages;
1422
1423 pg = NULL;
1424 npages = 1;
1425 /* locked: aobj */
1426 rv = uao_get(&aobj->u_obj, pageidx << PAGE_SHIFT,
1427 &pg, &npages, 0, VM_PROT_READ|VM_PROT_WRITE, 0, PGO_SYNCIO);
1428 /* unlocked: aobj */
1429
1430 /*
1431 * relock and finish up.
1432 */
1433
1434 mutex_enter(&aobj->u_obj.vmobjlock);
1435 switch (rv) {
1436 case 0:
1437 break;
1438
1439 case EIO:
1440 case ERESTART:
1441
1442 /*
1443 * nothing more to do on errors.
1444 * ERESTART can only mean that the anon was freed,
1445 * so again there's nothing to do.
1446 */
1447
1448 return false;
1449
1450 default:
1451 return true;
1452 }
1453
1454 /*
1455 * ok, we've got the page now.
1456 * mark it as dirty, clear its swslot and un-busy it.
1457 */
1458 uao_dropswap(&aobj->u_obj, pageidx);
1459
1460 /*
1461 * make sure it's on a page queue.
1462 */
1463 mutex_enter(&uvm_pageqlock);
1464 if (pg->wire_count == 0)
1465 uvm_pageenqueue(pg);
1466 mutex_exit(&uvm_pageqlock);
1467
1468 if (pg->flags & PG_WANTED) {
1469 wakeup(pg);
1470 }
1471 pg->flags &= ~(PG_WANTED|PG_BUSY|PG_CLEAN|PG_FAKE);
1472 UVM_PAGE_OWN(pg, NULL);
1473
1474 return false;
1475 }
1476
1477 /*
1478 * uao_dropswap_range: drop swapslots in the range.
1479 *
1480 * => aobj must be locked and is returned locked.
1481 * => start is inclusive. end is exclusive.
1482 */
1483
1484 void
1485 uao_dropswap_range(struct uvm_object *uobj, voff_t start, voff_t end)
1486 {
1487 struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
1488
1489 KASSERT(mutex_owned(&uobj->vmobjlock));
1490
1491 uao_dropswap_range1(aobj, start, end);
1492 }
1493
1494 static void
1495 uao_dropswap_range1(struct uvm_aobj *aobj, voff_t start, voff_t end)
1496 {
1497 int swpgonlydelta = 0;
1498
1499 if (end == 0) {
1500 end = INT64_MAX;
1501 }
1502
1503 if (UAO_USES_SWHASH(aobj)) {
1504 int i, hashbuckets = aobj->u_swhashmask + 1;
1505 voff_t taghi;
1506 voff_t taglo;
1507
1508 taglo = UAO_SWHASH_ELT_TAG(start);
1509 taghi = UAO_SWHASH_ELT_TAG(end);
1510
1511 for (i = 0; i < hashbuckets; i++) {
1512 struct uao_swhash_elt *elt, *next;
1513
1514 for (elt = LIST_FIRST(&aobj->u_swhash[i]);
1515 elt != NULL;
1516 elt = next) {
1517 int startidx, endidx;
1518 int j;
1519
1520 next = LIST_NEXT(elt, list);
1521
1522 if (elt->tag < taglo || taghi < elt->tag) {
1523 continue;
1524 }
1525
1526 if (elt->tag == taglo) {
1527 startidx =
1528 UAO_SWHASH_ELT_PAGESLOT_IDX(start);
1529 } else {
1530 startidx = 0;
1531 }
1532
1533 if (elt->tag == taghi) {
1534 endidx =
1535 UAO_SWHASH_ELT_PAGESLOT_IDX(end);
1536 } else {
1537 endidx = UAO_SWHASH_CLUSTER_SIZE;
1538 }
1539
1540 for (j = startidx; j < endidx; j++) {
1541 int slot = elt->slots[j];
1542
1543 KASSERT(uvm_pagelookup(&aobj->u_obj,
1544 (UAO_SWHASH_ELT_PAGEIDX_BASE(elt)
1545 + j) << PAGE_SHIFT) == NULL);
1546 if (slot > 0) {
1547 uvm_swap_free(slot, 1);
1548 swpgonlydelta++;
1549 KASSERT(elt->count > 0);
1550 elt->slots[j] = 0;
1551 elt->count--;
1552 }
1553 }
1554
1555 if (elt->count == 0) {
1556 LIST_REMOVE(elt, list);
1557 pool_put(&uao_swhash_elt_pool, elt);
1558 }
1559 }
1560 }
1561 } else {
1562 int i;
1563
1564 if (aobj->u_pages < end) {
1565 end = aobj->u_pages;
1566 }
1567 for (i = start; i < end; i++) {
1568 int slot = aobj->u_swslots[i];
1569
1570 if (slot > 0) {
1571 uvm_swap_free(slot, 1);
1572 swpgonlydelta++;
1573 }
1574 }
1575 }
1576
1577 /*
1578 * adjust the counter of pages only in swap for all
1579 * the swap slots we've freed.
1580 */
1581
1582 if (swpgonlydelta > 0) {
1583 mutex_enter(&uvm_swap_data_lock);
1584 KASSERT(uvmexp.swpgonly >= swpgonlydelta);
1585 uvmexp.swpgonly -= swpgonlydelta;
1586 mutex_exit(&uvm_swap_data_lock);
1587 }
1588 }
1589
1590 #endif /* defined(VMSWAP) */
1591