uvm_bio.c revision 1.39 1 /* $NetBSD: uvm_bio.c,v 1.39 2005/06/27 02:19:48 thorpej Exp $ */
2
3 /*
4 * Copyright (c) 1998 Chuck Silvers.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. The name of the author may not be used to endorse or promote products
16 * derived from this software without specific prior written permission.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
23 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
24 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
25 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 * SUCH DAMAGE.
29 *
30 */
31
32 /*
33 * uvm_bio.c: buffered i/o object mapping cache
34 */
35
36 #include <sys/cdefs.h>
37 __KERNEL_RCSID(0, "$NetBSD: uvm_bio.c,v 1.39 2005/06/27 02:19:48 thorpej Exp $");
38
39 #include "opt_uvmhist.h"
40
41 #include <sys/param.h>
42 #include <sys/systm.h>
43 #include <sys/malloc.h>
44 #include <sys/kernel.h>
45
46 #include <uvm/uvm.h>
47
48 /*
49 * global data structures
50 */
51
52 /*
53 * local functions
54 */
55
56 static int ubc_fault(struct uvm_faultinfo *, vaddr_t, struct vm_page **,
57 int, int, vm_fault_t, vm_prot_t, int);
58 static struct ubc_map *ubc_find_mapping(struct uvm_object *, voff_t);
59
60 /*
61 * local data structues
62 */
63
64 #define UBC_HASH(uobj, offset) \
65 (((((u_long)(uobj)) >> 8) + (((u_long)(offset)) >> PAGE_SHIFT)) & \
66 ubc_object.hashmask)
67
68 #define UBC_QUEUE(offset) \
69 (&ubc_object.inactive[(((u_long)(offset)) >> ubc_winshift) & \
70 (UBC_NQUEUES - 1)])
71
72 #define UBC_UMAP_ADDR(u) \
73 (vaddr_t)(ubc_object.kva + (((u) - ubc_object.umap) << ubc_winshift))
74
75
76 #define UMAP_PAGES_LOCKED 0x0001
77 #define UMAP_MAPPING_CACHED 0x0002
78
79 struct ubc_map
80 {
81 struct uvm_object * uobj; /* mapped object */
82 voff_t offset; /* offset into uobj */
83 voff_t writeoff; /* write offset */
84 vsize_t writelen; /* write len */
85 int refcount; /* refcount on mapping */
86 int flags; /* extra state */
87
88 LIST_ENTRY(ubc_map) hash; /* hash table */
89 TAILQ_ENTRY(ubc_map) inactive; /* inactive queue */
90 };
91
92 static struct ubc_object
93 {
94 struct uvm_object uobj; /* glue for uvm_map() */
95 char *kva; /* where ubc_object is mapped */
96 struct ubc_map *umap; /* array of ubc_map's */
97
98 LIST_HEAD(, ubc_map) *hash; /* hashtable for cached ubc_map's */
99 u_long hashmask; /* mask for hashtable */
100
101 TAILQ_HEAD(ubc_inactive_head, ubc_map) *inactive;
102 /* inactive queues for ubc_map's */
103
104 } ubc_object;
105
106 struct uvm_pagerops ubc_pager =
107 {
108 NULL, /* init */
109 NULL, /* reference */
110 NULL, /* detach */
111 ubc_fault, /* fault */
112 /* ... rest are NULL */
113 };
114
115 int ubc_nwins = UBC_NWINS;
116 int ubc_winshift = UBC_WINSHIFT;
117 int ubc_winsize;
118 #if defined(PMAP_PREFER)
119 int ubc_nqueues;
120 #define UBC_NQUEUES ubc_nqueues
121 #else
122 #define UBC_NQUEUES 1
123 #endif
124
125 /*
126 * ubc_init
127 *
128 * init pager private data structures.
129 */
130
131 void
132 ubc_init(void)
133 {
134 struct ubc_map *umap;
135 vaddr_t va;
136 int i;
137
138 /*
139 * Make sure ubc_winshift is sane.
140 */
141 if (ubc_winshift < PAGE_SHIFT)
142 ubc_winshift = PAGE_SHIFT;
143
144 /*
145 * init ubc_object.
146 * alloc and init ubc_map's.
147 * init inactive queues.
148 * alloc and init hashtable.
149 * map in ubc_object.
150 */
151
152 UVM_OBJ_INIT(&ubc_object.uobj, &ubc_pager, UVM_OBJ_KERN);
153
154 ubc_object.umap = malloc(ubc_nwins * sizeof(struct ubc_map),
155 M_TEMP, M_NOWAIT);
156 if (ubc_object.umap == NULL)
157 panic("ubc_init: failed to allocate ubc_map");
158 memset(ubc_object.umap, 0, ubc_nwins * sizeof(struct ubc_map));
159
160 if (ubc_winshift < PAGE_SHIFT) {
161 ubc_winshift = PAGE_SHIFT;
162 }
163 va = (vaddr_t)1L;
164 #ifdef PMAP_PREFER
165 PMAP_PREFER(0, &va, 0, 0); /* kernel is never topdown */
166 ubc_nqueues = va >> ubc_winshift;
167 if (ubc_nqueues == 0) {
168 ubc_nqueues = 1;
169 }
170 #endif
171 ubc_winsize = 1 << ubc_winshift;
172 ubc_object.inactive = malloc(UBC_NQUEUES *
173 sizeof(struct ubc_inactive_head), M_TEMP, M_NOWAIT);
174 if (ubc_object.inactive == NULL)
175 panic("ubc_init: failed to allocate inactive queue heads");
176 for (i = 0; i < UBC_NQUEUES; i++) {
177 TAILQ_INIT(&ubc_object.inactive[i]);
178 }
179 for (i = 0; i < ubc_nwins; i++) {
180 umap = &ubc_object.umap[i];
181 TAILQ_INSERT_TAIL(&ubc_object.inactive[i & (UBC_NQUEUES - 1)],
182 umap, inactive);
183 }
184
185 ubc_object.hash = hashinit(ubc_nwins, HASH_LIST, M_TEMP, M_NOWAIT,
186 &ubc_object.hashmask);
187 for (i = 0; i <= ubc_object.hashmask; i++) {
188 LIST_INIT(&ubc_object.hash[i]);
189 }
190
191 if (uvm_map(kernel_map, (vaddr_t *)&ubc_object.kva,
192 ubc_nwins << ubc_winshift, &ubc_object.uobj, 0, (vsize_t)va,
193 UVM_MAPFLAG(UVM_PROT_ALL, UVM_PROT_ALL, UVM_INH_NONE,
194 UVM_ADV_RANDOM, UVM_FLAG_NOMERGE)) != 0) {
195 panic("ubc_init: failed to map ubc_object");
196 }
197 UVMHIST_INIT(ubchist, 300);
198 }
199
200 /*
201 * ubc_fault: fault routine for ubc mapping
202 */
203
204 static int
205 ubc_fault(struct uvm_faultinfo *ufi, vaddr_t ign1, struct vm_page **ign2,
206 int ign3, int ign4, vm_fault_t fault_type, vm_prot_t access_type,
207 int flags)
208 {
209 struct uvm_object *uobj;
210 struct ubc_map *umap;
211 vaddr_t va, eva, ubc_offset, slot_offset;
212 int i, error, npages;
213 struct vm_page *pgs[ubc_winsize >> PAGE_SHIFT], *pg;
214 vm_prot_t prot;
215 UVMHIST_FUNC("ubc_fault"); UVMHIST_CALLED(ubchist);
216
217 /*
218 * no need to try with PGO_LOCKED...
219 * we don't need to have the map locked since we know that
220 * no one will mess with it until our reference is released.
221 */
222
223 if (flags & PGO_LOCKED) {
224 uvmfault_unlockall(ufi, NULL, &ubc_object.uobj, NULL);
225 flags &= ~PGO_LOCKED;
226 }
227
228 va = ufi->orig_rvaddr;
229 ubc_offset = va - (vaddr_t)ubc_object.kva;
230 umap = &ubc_object.umap[ubc_offset >> ubc_winshift];
231 KASSERT(umap->refcount != 0);
232 slot_offset = ubc_offset & (ubc_winsize - 1);
233
234 /*
235 * some platforms cannot write to individual bytes atomically, so
236 * software has to do read/modify/write of larger quantities instead.
237 * this means that the access_type for "write" operations
238 * can be VM_PROT_READ, which confuses us mightily.
239 *
240 * deal with this by resetting access_type based on the info
241 * that ubc_alloc() stores for us.
242 */
243
244 access_type = umap->writelen ? VM_PROT_WRITE : VM_PROT_READ;
245 UVMHIST_LOG(ubchist, "va 0x%lx ubc_offset 0x%lx access_type %d",
246 va, ubc_offset, access_type, 0);
247
248 #ifdef DIAGNOSTIC
249 if ((access_type & VM_PROT_WRITE) != 0) {
250 if (slot_offset < trunc_page(umap->writeoff) ||
251 umap->writeoff + umap->writelen <= slot_offset) {
252 panic("ubc_fault: out of range write");
253 }
254 }
255 #endif
256
257 /* no umap locking needed since we have a ref on the umap */
258 uobj = umap->uobj;
259
260 if ((access_type & VM_PROT_WRITE) == 0) {
261 npages = (ubc_winsize - slot_offset) >> PAGE_SHIFT;
262 } else {
263 npages = (round_page(umap->offset + umap->writeoff +
264 umap->writelen) - (umap->offset + slot_offset))
265 >> PAGE_SHIFT;
266 flags |= PGO_PASTEOF;
267 }
268
269 again:
270 memset(pgs, 0, sizeof (pgs));
271 simple_lock(&uobj->vmobjlock);
272
273 UVMHIST_LOG(ubchist, "slot_offset 0x%x writeoff 0x%x writelen 0x%x ",
274 slot_offset, umap->writeoff, umap->writelen, 0);
275 UVMHIST_LOG(ubchist, "getpages uobj %p offset 0x%x npages %d",
276 uobj, umap->offset + slot_offset, npages, 0);
277
278 error = (*uobj->pgops->pgo_get)(uobj, umap->offset + slot_offset, pgs,
279 &npages, 0, access_type, 0, flags);
280 UVMHIST_LOG(ubchist, "getpages error %d npages %d", error, npages, 0,
281 0);
282
283 if (error == EAGAIN) {
284 tsleep(&lbolt, PVM, "ubc_fault", 0);
285 goto again;
286 }
287 if (error) {
288 return error;
289 }
290
291 va = ufi->orig_rvaddr;
292 eva = ufi->orig_rvaddr + (npages << PAGE_SHIFT);
293
294 UVMHIST_LOG(ubchist, "va 0x%lx eva 0x%lx", va, eva, 0, 0);
295 simple_lock(&uobj->vmobjlock);
296 uvm_lock_pageq();
297 for (i = 0; va < eva; i++, va += PAGE_SIZE) {
298
299 /*
300 * for virtually-indexed, virtually-tagged caches we should
301 * avoid creating writable mappings when we don't absolutely
302 * need them, since the "compatible alias" trick doesn't work
303 * on such caches. otherwise, we can always map the pages
304 * writable.
305 */
306
307 #ifdef PMAP_CACHE_VIVT
308 prot = VM_PROT_READ | access_type;
309 #else
310 prot = VM_PROT_READ | VM_PROT_WRITE;
311 #endif
312 UVMHIST_LOG(ubchist, "pgs[%d] = %p", i, pgs[i], 0, 0);
313 pg = pgs[i];
314
315 if (pg == NULL || pg == PGO_DONTCARE) {
316 continue;
317 }
318 if (pg->flags & PG_WANTED) {
319 wakeup(pg);
320 }
321 KASSERT((pg->flags & PG_FAKE) == 0);
322 if (pg->flags & PG_RELEASED) {
323 uvm_pagefree(pg);
324 continue;
325 }
326 if (pg->loan_count != 0) {
327
328 /*
329 * avoid unneeded loan break if possible.
330 */
331
332 if ((access_type & VM_PROT_WRITE) == 0)
333 prot &= ~VM_PROT_WRITE;
334
335 if (prot & VM_PROT_WRITE) {
336 uvm_unlock_pageq();
337 pg = uvm_loanbreak(pg);
338 uvm_lock_pageq();
339 if (pg == NULL)
340 continue; /* will re-fault */
341 }
342 }
343 KASSERT(access_type == VM_PROT_READ ||
344 (pg->flags & PG_RDONLY) == 0);
345 pmap_enter(ufi->orig_map->pmap, va, VM_PAGE_TO_PHYS(pg),
346 (pg->flags & PG_RDONLY) ? prot & ~VM_PROT_WRITE : prot,
347 access_type);
348 uvm_pageactivate(pg);
349 pg->flags &= ~(PG_BUSY);
350 UVM_PAGE_OWN(pg, NULL);
351 }
352 uvm_unlock_pageq();
353 simple_unlock(&uobj->vmobjlock);
354 pmap_update(ufi->orig_map->pmap);
355 return 0;
356 }
357
358 /*
359 * local functions
360 */
361
362 static struct ubc_map *
363 ubc_find_mapping(struct uvm_object *uobj, voff_t offset)
364 {
365 struct ubc_map *umap;
366
367 LIST_FOREACH(umap, &ubc_object.hash[UBC_HASH(uobj, offset)], hash) {
368 if (umap->uobj == uobj && umap->offset == offset) {
369 return umap;
370 }
371 }
372 return NULL;
373 }
374
375
376 /*
377 * ubc interface functions
378 */
379
380 /*
381 * ubc_alloc: allocate a file mapping window
382 */
383
384 void *
385 ubc_alloc(struct uvm_object *uobj, voff_t offset, vsize_t *lenp, int flags)
386 {
387 vaddr_t slot_offset, va;
388 struct ubc_map *umap;
389 voff_t umap_offset;
390 int error;
391 UVMHIST_FUNC("ubc_alloc"); UVMHIST_CALLED(ubchist);
392
393 UVMHIST_LOG(ubchist, "uobj %p offset 0x%lx len 0x%lx",
394 uobj, offset, *lenp, 0);
395
396 KASSERT(*lenp > 0);
397 umap_offset = (offset & ~((voff_t)ubc_winsize - 1));
398 slot_offset = (vaddr_t)(offset & ((voff_t)ubc_winsize - 1));
399 *lenp = MIN(*lenp, ubc_winsize - slot_offset);
400
401 /*
402 * the object is always locked here, so we don't need to add a ref.
403 */
404
405 again:
406 simple_lock(&ubc_object.uobj.vmobjlock);
407 umap = ubc_find_mapping(uobj, umap_offset);
408 if (umap == NULL) {
409 umap = TAILQ_FIRST(UBC_QUEUE(offset));
410 if (umap == NULL) {
411 simple_unlock(&ubc_object.uobj.vmobjlock);
412 tsleep(&lbolt, PVM, "ubc_alloc", 0);
413 goto again;
414 }
415
416 /*
417 * remove from old hash (if any), add to new hash.
418 */
419
420 if (umap->uobj != NULL) {
421 LIST_REMOVE(umap, hash);
422 }
423 umap->uobj = uobj;
424 umap->offset = umap_offset;
425 LIST_INSERT_HEAD(&ubc_object.hash[UBC_HASH(uobj, umap_offset)],
426 umap, hash);
427 va = UBC_UMAP_ADDR(umap);
428 if (umap->flags & UMAP_MAPPING_CACHED) {
429 umap->flags &= ~UMAP_MAPPING_CACHED;
430 pmap_remove(pmap_kernel(), va, va + ubc_winsize);
431 pmap_update(pmap_kernel());
432 }
433 } else {
434 va = UBC_UMAP_ADDR(umap);
435 }
436
437 if (umap->refcount == 0) {
438 TAILQ_REMOVE(UBC_QUEUE(offset), umap, inactive);
439 }
440
441 #ifdef DIAGNOSTIC
442 if ((flags & UBC_WRITE) && (umap->writeoff || umap->writelen)) {
443 panic("ubc_alloc: concurrent writes uobj %p", uobj);
444 }
445 #endif
446 if (flags & UBC_WRITE) {
447 umap->writeoff = slot_offset;
448 umap->writelen = *lenp;
449 }
450
451 umap->refcount++;
452 simple_unlock(&ubc_object.uobj.vmobjlock);
453 UVMHIST_LOG(ubchist, "umap %p refs %d va %p flags 0x%x",
454 umap, umap->refcount, va, flags);
455
456 if (flags & UBC_FAULTBUSY) {
457 int npages = (*lenp + PAGE_SIZE - 1) >> PAGE_SHIFT;
458 struct vm_page *pgs[npages];
459 int gpflags = PGO_SYNCIO|PGO_OVERWRITE|PGO_PASTEOF;
460 int i;
461 KDASSERT(flags & UBC_WRITE);
462
463 if (umap->flags & UMAP_MAPPING_CACHED) {
464 umap->flags &= ~UMAP_MAPPING_CACHED;
465 pmap_remove(pmap_kernel(), va, va + ubc_winsize);
466 }
467 memset(pgs, 0, sizeof(pgs));
468 simple_lock(&uobj->vmobjlock);
469 error = (*uobj->pgops->pgo_get)(uobj, trunc_page(offset), pgs,
470 &npages, 0, VM_PROT_READ | VM_PROT_WRITE, 0, gpflags);
471 UVMHIST_LOG(ubchist, "faultbusy getpages %d", error, 0, 0, 0);
472 if (error) {
473 goto out;
474 }
475 for (i = 0; i < npages; i++) {
476 pmap_kenter_pa(va + slot_offset + (i << PAGE_SHIFT),
477 VM_PAGE_TO_PHYS(pgs[i]),
478 VM_PROT_READ | VM_PROT_WRITE);
479 }
480 pmap_update(pmap_kernel());
481 umap->flags |= UMAP_PAGES_LOCKED;
482 }
483
484 out:
485 return (void *)(va + slot_offset);
486 }
487
488 /*
489 * ubc_release: free a file mapping window.
490 */
491
492 void
493 ubc_release(void *va, int flags)
494 {
495 struct ubc_map *umap;
496 struct uvm_object *uobj;
497 vaddr_t umapva;
498 boolean_t unmapped;
499 UVMHIST_FUNC("ubc_release"); UVMHIST_CALLED(ubchist);
500
501 UVMHIST_LOG(ubchist, "va %p", va, 0, 0, 0);
502 umap = &ubc_object.umap[((char *)va - ubc_object.kva) >> ubc_winshift];
503 umapva = UBC_UMAP_ADDR(umap);
504 uobj = umap->uobj;
505 KASSERT(uobj != NULL);
506
507 if (umap->flags & UMAP_PAGES_LOCKED) {
508 int slot_offset = umap->writeoff;
509 int endoff = umap->writeoff + umap->writelen;
510 int zerolen = round_page(endoff) - endoff;
511 int npages = (int)(round_page(umap->writeoff + umap->writelen)
512 - trunc_page(umap->writeoff)) >> PAGE_SHIFT;
513 struct vm_page *pgs[npages];
514 paddr_t pa;
515 int i;
516 boolean_t rv;
517
518 if (zerolen) {
519 memset((char *)umapva + endoff, 0, zerolen);
520 }
521 umap->flags &= ~UMAP_PAGES_LOCKED;
522 uvm_lock_pageq();
523 for (i = 0; i < npages; i++) {
524 rv = pmap_extract(pmap_kernel(),
525 umapva + slot_offset + (i << PAGE_SHIFT), &pa);
526 KASSERT(rv);
527 pgs[i] = PHYS_TO_VM_PAGE(pa);
528 pgs[i]->flags &= ~(PG_FAKE|PG_CLEAN);
529 KASSERT(pgs[i]->loan_count == 0);
530 uvm_pageactivate(pgs[i]);
531 }
532 uvm_unlock_pageq();
533 pmap_kremove(umapva, ubc_winsize);
534 pmap_update(pmap_kernel());
535 simple_lock(&uobj->vmobjlock);
536 uvm_page_unbusy(pgs, npages);
537 simple_unlock(&uobj->vmobjlock);
538 unmapped = TRUE;
539 } else {
540 unmapped = FALSE;
541 }
542
543 simple_lock(&ubc_object.uobj.vmobjlock);
544 umap->writeoff = 0;
545 umap->writelen = 0;
546 umap->refcount--;
547 if (umap->refcount == 0) {
548 if (flags & UBC_UNMAP) {
549
550 /*
551 * Invalidate any cached mappings if requested.
552 * This is typically used to avoid leaving
553 * incompatible cache aliases around indefinitely.
554 */
555
556 pmap_remove(pmap_kernel(), umapva,
557 umapva + ubc_winsize);
558 umap->flags &= ~UMAP_MAPPING_CACHED;
559 pmap_update(pmap_kernel());
560 LIST_REMOVE(umap, hash);
561 umap->uobj = NULL;
562 TAILQ_INSERT_HEAD(UBC_QUEUE(umap->offset), umap,
563 inactive);
564 } else {
565 if (!unmapped) {
566 umap->flags |= UMAP_MAPPING_CACHED;
567 }
568 TAILQ_INSERT_TAIL(UBC_QUEUE(umap->offset), umap,
569 inactive);
570 }
571 }
572 UVMHIST_LOG(ubchist, "umap %p refs %d", umap, umap->refcount, 0, 0);
573 simple_unlock(&ubc_object.uobj.vmobjlock);
574 }
575
576
577 #if 0 /* notused */
578 /*
579 * removing a range of mappings from the ubc mapping cache.
580 */
581
582 void
583 ubc_flush(struct uvm_object *uobj, voff_t start, voff_t end)
584 {
585 struct ubc_map *umap;
586 vaddr_t va;
587 UVMHIST_FUNC("ubc_flush"); UVMHIST_CALLED(ubchist);
588
589 UVMHIST_LOG(ubchist, "uobj %p start 0x%lx end 0x%lx",
590 uobj, start, end, 0);
591
592 simple_lock(&ubc_object.uobj.vmobjlock);
593 for (umap = ubc_object.umap;
594 umap < &ubc_object.umap[ubc_nwins];
595 umap++) {
596
597 if (umap->uobj != uobj || umap->offset < start ||
598 (umap->offset >= end && end != 0) ||
599 umap->refcount > 0) {
600 continue;
601 }
602
603 /*
604 * remove from hash,
605 * move to head of inactive queue.
606 */
607
608 va = (vaddr_t)(ubc_object.kva +
609 ((umap - ubc_object.umap) << ubc_winshift));
610 pmap_remove(pmap_kernel(), va, va + ubc_winsize);
611
612 LIST_REMOVE(umap, hash);
613 umap->uobj = NULL;
614 TAILQ_REMOVE(UBC_QUEUE(umap->offset), umap, inactive);
615 TAILQ_INSERT_HEAD(UBC_QUEUE(umap->offset), umap, inactive);
616 }
617 pmap_update(pmap_kernel());
618 simple_unlock(&ubc_object.uobj.vmobjlock);
619 }
620 #endif /* notused */
621