uvm_bio.c revision 1.19 1 /* $NetBSD: uvm_bio.c,v 1.19 2001/09/28 11:59:55 chs Exp $ */
2
3 /*
4 * Copyright (c) 1998 Chuck Silvers.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. The name of the author may not be used to endorse or promote products
16 * derived from this software without specific prior written permission.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
23 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
24 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
25 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 * SUCH DAMAGE.
29 *
30 */
31
32 #include "opt_uvmhist.h"
33
34 /*
35 * uvm_bio.c: buffered i/o vnode mapping cache
36 */
37
38
39 #include <sys/param.h>
40 #include <sys/systm.h>
41 #include <sys/malloc.h>
42 #include <sys/kernel.h>
43 #include <sys/vnode.h>
44 #include <sys/proc.h>
45
46 #include <uvm/uvm.h>
47
48 /*
49 * global data structures
50 */
51
52 /*
53 * local functions
54 */
55
56 int ubc_fault __P((struct uvm_faultinfo *, vaddr_t, struct vm_page **, int,
57 int, vm_fault_t, vm_prot_t, int));
58 struct ubc_map *ubc_find_mapping __P((struct uvm_object *, voff_t));
59
60 /*
61 * local data structues
62 */
63
64 #define UBC_HASH(uobj, offset) \
65 (((((u_long)(uobj)) >> 8) + (((u_long)(offset)) >> PAGE_SHIFT)) & \
66 ubc_object.hashmask)
67
68 #define UBC_QUEUE(offset) \
69 (&ubc_object.inactive[(((u_long)(offset)) >> ubc_winshift) & \
70 (UBC_NQUEUES - 1)])
71
72 #define UBC_UMAP_ADDR(u) \
73 (vaddr_t)(ubc_object.kva + (((u) - ubc_object.umap) << ubc_winshift))
74
75
76 #define UMAP_PAGES_LOCKED 0x0001
77 #define UMAP_MAPPING_CACHED 0x0002
78
79 struct ubc_map
80 {
81 struct uvm_object * uobj; /* mapped object */
82 voff_t offset; /* offset into uobj */
83 voff_t writeoff; /* overwrite offset */
84 vsize_t writelen; /* overwrite len */
85 int refcount; /* refcount on mapping */
86 int flags; /* extra state */
87
88 LIST_ENTRY(ubc_map) hash; /* hash table */
89 TAILQ_ENTRY(ubc_map) inactive; /* inactive queue */
90 };
91
92 static struct ubc_object
93 {
94 struct uvm_object uobj; /* glue for uvm_map() */
95 char *kva; /* where ubc_object is mapped */
96 struct ubc_map *umap; /* array of ubc_map's */
97
98 LIST_HEAD(, ubc_map) *hash; /* hashtable for cached ubc_map's */
99 u_long hashmask; /* mask for hashtable */
100
101 TAILQ_HEAD(ubc_inactive_head, ubc_map) *inactive;
102 /* inactive queues for ubc_map's */
103
104 } ubc_object;
105
106 struct uvm_pagerops ubc_pager =
107 {
108 NULL, /* init */
109 NULL, /* reference */
110 NULL, /* detach */
111 ubc_fault, /* fault */
112 /* ... rest are NULL */
113 };
114
115 int ubc_nwins = UBC_NWINS;
116 int ubc_winshift = UBC_WINSHIFT;
117 int ubc_winsize;
118 #ifdef PMAP_PREFER
119 int ubc_nqueues;
120 boolean_t ubc_release_unmap = FALSE;
121 #define UBC_NQUEUES ubc_nqueues
122 #define UBC_RELEASE_UNMAP ubc_release_unmap
123 #else
124 #define UBC_NQUEUES 1
125 #define UBC_RELEASE_UNMAP FALSE
126 #endif
127
128 /*
129 * ubc_init
130 *
131 * init pager private data structures.
132 */
133
134 void
135 ubc_init(void)
136 {
137 struct ubc_map *umap;
138 vaddr_t va;
139 int i;
140
141 /*
142 * Make sure ubc_winshift is sane.
143 */
144 if (ubc_winshift < PAGE_SHIFT)
145 ubc_winshift = PAGE_SHIFT;
146
147 /*
148 * init ubc_object.
149 * alloc and init ubc_map's.
150 * init inactive queues.
151 * alloc and init hashtable.
152 * map in ubc_object.
153 */
154
155 simple_lock_init(&ubc_object.uobj.vmobjlock);
156 ubc_object.uobj.pgops = &ubc_pager;
157 TAILQ_INIT(&ubc_object.uobj.memq);
158 ubc_object.uobj.uo_npages = 0;
159 ubc_object.uobj.uo_refs = UVM_OBJ_KERN;
160
161 ubc_object.umap = malloc(ubc_nwins * sizeof(struct ubc_map),
162 M_TEMP, M_NOWAIT);
163 if (ubc_object.umap == NULL)
164 panic("ubc_init: failed to allocate ubc_map");
165 memset(ubc_object.umap, 0, ubc_nwins * sizeof(struct ubc_map));
166
167 if (ubc_winshift < PAGE_SHIFT) {
168 ubc_winshift = PAGE_SHIFT;
169 }
170 va = (vaddr_t)1L;
171 #ifdef PMAP_PREFER
172 PMAP_PREFER(0, &va);
173 ubc_nqueues = va >> ubc_winshift;
174 if (ubc_nqueues == 0) {
175 ubc_nqueues = 1;
176 }
177 if (ubc_nqueues != 1) {
178 ubc_release_unmap = TRUE;
179 }
180 #endif
181 ubc_winsize = 1 << ubc_winshift;
182 ubc_object.inactive = malloc(UBC_NQUEUES *
183 sizeof(struct ubc_inactive_head), M_TEMP, M_NOWAIT);
184 if (ubc_object.inactive == NULL)
185 panic("ubc_init: failed to allocate inactive queue heads");
186 for (i = 0; i < UBC_NQUEUES; i++) {
187 TAILQ_INIT(&ubc_object.inactive[i]);
188 }
189 for (i = 0; i < ubc_nwins; i++) {
190 umap = &ubc_object.umap[i];
191 TAILQ_INSERT_TAIL(&ubc_object.inactive[i & (UBC_NQUEUES - 1)],
192 umap, inactive);
193 }
194
195 ubc_object.hash = hashinit(ubc_nwins, HASH_LIST, M_TEMP, M_NOWAIT,
196 &ubc_object.hashmask);
197 for (i = 0; i <= ubc_object.hashmask; i++) {
198 LIST_INIT(&ubc_object.hash[i]);
199 }
200
201 if (uvm_map(kernel_map, (vaddr_t *)&ubc_object.kva,
202 ubc_nwins << ubc_winshift, &ubc_object.uobj, 0, (vsize_t)va,
203 UVM_MAPFLAG(UVM_PROT_ALL, UVM_PROT_ALL, UVM_INH_NONE,
204 UVM_ADV_RANDOM, UVM_FLAG_NOMERGE)) != 0) {
205 panic("ubc_init: failed to map ubc_object\n");
206 }
207 UVMHIST_INIT(ubchist, 300);
208 }
209
210 /*
211 * ubc_fault: fault routine for ubc mapping
212 */
213
214 int
215 ubc_fault(ufi, ign1, ign2, ign3, ign4, fault_type, access_type, flags)
216 struct uvm_faultinfo *ufi;
217 vaddr_t ign1;
218 struct vm_page **ign2;
219 int ign3, ign4;
220 vm_fault_t fault_type;
221 vm_prot_t access_type;
222 int flags;
223 {
224 struct uvm_object *uobj;
225 struct vnode *vp;
226 struct ubc_map *umap;
227 vaddr_t va, eva, ubc_offset, slot_offset;
228 int i, error, npages;
229 struct vm_page *pgs[ubc_winsize >> PAGE_SHIFT], *pg;
230 UVMHIST_FUNC("ubc_fault"); UVMHIST_CALLED(ubchist);
231
232 /*
233 * no need to try with PGO_LOCKED...
234 * we don't need to have the map locked since we know that
235 * no one will mess with it until our reference is released.
236 */
237
238 if (flags & PGO_LOCKED) {
239 uvmfault_unlockall(ufi, NULL, &ubc_object.uobj, NULL);
240 flags &= ~PGO_LOCKED;
241 }
242
243 va = ufi->orig_rvaddr;
244 ubc_offset = va - (vaddr_t)ubc_object.kva;
245
246 UVMHIST_LOG(ubchist, "va 0x%lx ubc_offset 0x%lx at %d",
247 va, ubc_offset, access_type,0);
248
249 umap = &ubc_object.umap[ubc_offset >> ubc_winshift];
250 KASSERT(umap->refcount != 0);
251 slot_offset = ubc_offset & (ubc_winsize - 1);
252
253 /* no umap locking needed since we have a ref on the umap */
254 uobj = umap->uobj;
255 vp = (struct vnode *)uobj;
256 KASSERT(vp != NULL);
257
258 npages = MIN(ubc_winsize - slot_offset,
259 (int)(round_page(MAX(vp->v_size, umap->offset +
260 umap->writeoff + umap->writelen)) -
261 umap->offset)) >> PAGE_SHIFT;
262
263 again:
264 memset(pgs, 0, sizeof (pgs));
265 simple_lock(&uobj->vmobjlock);
266
267 UVMHIST_LOG(ubchist, "slot_offset 0x%x writeoff 0x%x writelen 0x%x "
268 "v_size 0x%x", slot_offset, umap->writeoff, umap->writelen,
269 vp->v_size);
270 UVMHIST_LOG(ubchist, "getpages vp %p offset 0x%x npages %d",
271 uobj, umap->offset + slot_offset, npages, 0);
272
273 flags |= PGO_PASTEOF;
274 error = VOP_GETPAGES(vp, umap->offset + slot_offset, pgs, &npages, 0,
275 access_type, 0, flags);
276 UVMHIST_LOG(ubchist, "getpages error %d npages %d", error, npages,0,0);
277
278 if (error == EAGAIN) {
279 tsleep(&lbolt, PVM, "ubc_fault", 0);
280 goto again;
281 }
282 if (error) {
283 return error;
284 }
285
286 va = ufi->orig_rvaddr;
287 eva = ufi->orig_rvaddr + (npages << PAGE_SHIFT);
288
289 UVMHIST_LOG(ubchist, "va 0x%lx eva 0x%lx", va, eva, 0,0);
290 simple_lock(&uobj->vmobjlock);
291 uvm_lock_pageq();
292 for (i = 0; va < eva; i++, va += PAGE_SIZE) {
293 UVMHIST_LOG(ubchist, "pgs[%d] = %p", i, pgs[i],0,0);
294 pg = pgs[i];
295
296 if (pg == NULL || pg == PGO_DONTCARE) {
297 continue;
298 }
299 if (pg->flags & PG_WANTED) {
300 wakeup(pg);
301 }
302 KASSERT((pg->flags & PG_FAKE) == 0);
303 if (pg->flags & PG_RELEASED) {
304 uvm_pagefree(pg);
305 continue;
306 }
307 KASSERT(access_type == VM_PROT_READ ||
308 (pg->flags & PG_RDONLY) == 0);
309 pmap_enter(ufi->orig_map->pmap, va, VM_PAGE_TO_PHYS(pg),
310 VM_PROT_READ | VM_PROT_WRITE, access_type);
311 uvm_pageactivate(pg);
312 pg->flags &= ~(PG_BUSY);
313 UVM_PAGE_OWN(pg, NULL);
314 }
315 uvm_unlock_pageq();
316 simple_unlock(&uobj->vmobjlock);
317 pmap_update(ufi->orig_map->pmap);
318 return 0;
319 }
320
321 /*
322 * local functions
323 */
324
325 struct ubc_map *
326 ubc_find_mapping(uobj, offset)
327 struct uvm_object *uobj;
328 voff_t offset;
329 {
330 struct ubc_map *umap;
331
332 LIST_FOREACH(umap, &ubc_object.hash[UBC_HASH(uobj, offset)], hash) {
333 if (umap->uobj == uobj && umap->offset == offset) {
334 return umap;
335 }
336 }
337 return NULL;
338 }
339
340
341 /*
342 * ubc interface functions
343 */
344
345 /*
346 * ubc_alloc: allocate a file mapping window
347 */
348
349 void *
350 ubc_alloc(uobj, offset, lenp, flags)
351 struct uvm_object *uobj;
352 voff_t offset;
353 vsize_t *lenp;
354 int flags;
355 {
356 struct vnode *vp = (struct vnode *)uobj;
357 vaddr_t slot_offset, va;
358 struct ubc_map *umap;
359 voff_t umap_offset;
360 int error;
361 UVMHIST_FUNC("ubc_alloc"); UVMHIST_CALLED(ubchist);
362
363 UVMHIST_LOG(ubchist, "uobj %p offset 0x%lx len 0x%lx filesize 0x%x",
364 uobj, offset, *lenp, vp->v_size);
365
366 umap_offset = (offset & ~((voff_t)ubc_winsize - 1));
367 slot_offset = (vaddr_t)(offset & ((voff_t)ubc_winsize - 1));
368 *lenp = MIN(*lenp, ubc_winsize - slot_offset);
369
370 /*
371 * the vnode is always locked here, so we don't need to add a ref.
372 */
373
374 again:
375 simple_lock(&ubc_object.uobj.vmobjlock);
376 umap = ubc_find_mapping(uobj, umap_offset);
377 if (umap == NULL) {
378 umap = TAILQ_FIRST(UBC_QUEUE(offset));
379 if (umap == NULL) {
380 simple_unlock(&ubc_object.uobj.vmobjlock);
381 tsleep(&lbolt, PVM, "ubc_alloc", 0);
382 goto again;
383 }
384
385 /*
386 * remove from old hash (if any), add to new hash.
387 */
388
389 if (umap->uobj != NULL) {
390 LIST_REMOVE(umap, hash);
391 }
392 umap->uobj = uobj;
393 umap->offset = umap_offset;
394 LIST_INSERT_HEAD(&ubc_object.hash[UBC_HASH(uobj, umap_offset)],
395 umap, hash);
396 va = UBC_UMAP_ADDR(umap);
397 if (umap->flags & UMAP_MAPPING_CACHED) {
398 umap->flags &= ~UMAP_MAPPING_CACHED;
399 pmap_remove(pmap_kernel(), va, va + ubc_winsize);
400 pmap_update(pmap_kernel());
401 }
402 } else {
403 va = UBC_UMAP_ADDR(umap);
404 }
405
406 if (umap->refcount == 0) {
407 TAILQ_REMOVE(UBC_QUEUE(offset), umap, inactive);
408 }
409
410 #ifdef DIAGNOSTIC
411 if ((flags & UBC_WRITE) && (umap->writeoff || umap->writelen)) {
412 panic("ubc_fault: concurrent writes vp %p", uobj);
413 }
414 #endif
415 if (flags & UBC_WRITE) {
416 umap->writeoff = slot_offset;
417 umap->writelen = *lenp;
418 }
419
420 umap->refcount++;
421 simple_unlock(&ubc_object.uobj.vmobjlock);
422 UVMHIST_LOG(ubchist, "umap %p refs %d va %p flags 0x%x",
423 umap, umap->refcount, va, flags);
424
425 if (flags & UBC_FAULTBUSY) {
426 int npages = (*lenp + PAGE_SIZE - 1) >> PAGE_SHIFT;
427 struct vm_page *pgs[npages];
428 int gpflags = PGO_SYNCIO|PGO_OVERWRITE|PGO_PASTEOF;
429 int i;
430
431 if (umap->flags & UMAP_MAPPING_CACHED) {
432 umap->flags &= ~UMAP_MAPPING_CACHED;
433 pmap_remove(pmap_kernel(), va, va + ubc_winsize);
434 }
435 simple_lock(&uobj->vmobjlock);
436 error = VOP_GETPAGES(vp, trunc_page(offset), pgs, &npages, 0,
437 VM_PROT_READ|VM_PROT_WRITE, 0, gpflags);
438 UVMHIST_LOG(ubchist, "faultbusy getpages %d", error,0,0,0);
439 if (error) {
440 goto out;
441 }
442 for (i = 0; i < npages; i++) {
443 pmap_kenter_pa(va + slot_offset + (i << PAGE_SHIFT),
444 VM_PAGE_TO_PHYS(pgs[i]),
445 VM_PROT_READ | VM_PROT_WRITE);
446 }
447 pmap_update(pmap_kernel());
448 umap->flags |= UMAP_PAGES_LOCKED;
449 }
450
451 out:
452 return (void *)(va + slot_offset);
453 }
454
455 /*
456 * ubc_release: free a file mapping window.
457 */
458
459 void
460 ubc_release(va, flags)
461 void *va;
462 int flags;
463 {
464 struct ubc_map *umap;
465 struct uvm_object *uobj;
466 vaddr_t umapva;
467 boolean_t unmapped;
468 UVMHIST_FUNC("ubc_release"); UVMHIST_CALLED(ubchist);
469
470 UVMHIST_LOG(ubchist, "va %p", va,0,0,0);
471 umap = &ubc_object.umap[((char *)va - ubc_object.kva) >> ubc_winshift];
472 umapva = UBC_UMAP_ADDR(umap);
473 uobj = umap->uobj;
474 KASSERT(uobj != NULL);
475
476 if (umap->flags & UMAP_PAGES_LOCKED) {
477 int slot_offset = umap->writeoff;
478 int endoff = umap->writeoff + umap->writelen;
479 int zerolen = round_page(endoff) - endoff;
480 int npages = (int)(round_page(umap->writeoff + umap->writelen)
481 - trunc_page(umap->writeoff)) >> PAGE_SHIFT;
482 struct vm_page *pgs[npages];
483 paddr_t pa;
484 int i;
485 boolean_t rv;
486
487 if (zerolen) {
488 memset((char *)umapva + endoff, 0, zerolen);
489 }
490 umap->flags &= ~UMAP_PAGES_LOCKED;
491 uvm_lock_pageq();
492 for (i = 0; i < npages; i++) {
493 rv = pmap_extract(pmap_kernel(),
494 umapva + slot_offset + (i << PAGE_SHIFT), &pa);
495 KASSERT(rv);
496 pgs[i] = PHYS_TO_VM_PAGE(pa);
497 pgs[i]->flags &= ~(PG_FAKE|PG_CLEAN);
498 uvm_pageactivate(pgs[i]);
499 }
500 uvm_unlock_pageq();
501 pmap_kremove(umapva, ubc_winsize);
502 pmap_update(pmap_kernel());
503 uvm_page_unbusy(pgs, npages);
504 unmapped = TRUE;
505 } else {
506 unmapped = FALSE;
507 }
508
509 simple_lock(&ubc_object.uobj.vmobjlock);
510 umap->writeoff = 0;
511 umap->writelen = 0;
512 umap->refcount--;
513 if (umap->refcount == 0) {
514 if (UBC_RELEASE_UNMAP &&
515 (((struct vnode *)uobj)->v_flag & VTEXT)) {
516
517 /*
518 * if this file is the executable image of
519 * some process, that process will likely have
520 * the file mapped at an alignment other than
521 * what PMAP_PREFER() would like. we'd like
522 * to have process text be able to use the
523 * cache even if someone is also reading the
524 * file, so invalidate mappings of such files
525 * as soon as possible.
526 */
527
528 pmap_remove(pmap_kernel(), umapva,
529 umapva + ubc_winsize);
530 umap->flags &= ~UMAP_MAPPING_CACHED;
531 pmap_update(pmap_kernel());
532 LIST_REMOVE(umap, hash);
533 umap->uobj = NULL;
534 TAILQ_INSERT_HEAD(UBC_QUEUE(umap->offset), umap,
535 inactive);
536 } else {
537 if (!unmapped) {
538 umap->flags |= UMAP_MAPPING_CACHED;
539 }
540 TAILQ_INSERT_TAIL(UBC_QUEUE(umap->offset), umap,
541 inactive);
542 }
543 }
544 UVMHIST_LOG(ubchist, "umap %p refs %d", umap, umap->refcount,0,0);
545 simple_unlock(&ubc_object.uobj.vmobjlock);
546 }
547
548
549 /*
550 * removing a range of mappings from the ubc mapping cache.
551 */
552
553 void
554 ubc_flush(uobj, start, end)
555 struct uvm_object *uobj;
556 voff_t start, end;
557 {
558 struct ubc_map *umap;
559 vaddr_t va;
560 UVMHIST_FUNC("ubc_flush"); UVMHIST_CALLED(ubchist);
561
562 UVMHIST_LOG(ubchist, "uobj %p start 0x%lx end 0x%lx",
563 uobj, start, end,0);
564
565 simple_lock(&ubc_object.uobj.vmobjlock);
566 for (umap = ubc_object.umap;
567 umap < &ubc_object.umap[ubc_nwins];
568 umap++) {
569
570 if (umap->uobj != uobj || umap->offset < start ||
571 (umap->offset >= end && end != 0) ||
572 umap->refcount > 0) {
573 continue;
574 }
575
576 /*
577 * remove from hash,
578 * move to head of inactive queue.
579 */
580
581 va = (vaddr_t)(ubc_object.kva +
582 ((umap - ubc_object.umap) << ubc_winshift));
583 pmap_remove(pmap_kernel(), va, va + ubc_winsize);
584
585 LIST_REMOVE(umap, hash);
586 umap->uobj = NULL;
587 TAILQ_REMOVE(UBC_QUEUE(umap->offset), umap, inactive);
588 TAILQ_INSERT_HEAD(UBC_QUEUE(umap->offset), umap, inactive);
589 }
590 pmap_update(pmap_kernel());
591 simple_unlock(&ubc_object.uobj.vmobjlock);
592 }
593