genfs_vnops.c revision 1.41 1 /* $NetBSD: genfs_vnops.c,v 1.41 2001/11/30 15:18:39 christos Exp $ */
2
3 /*
4 * Copyright (c) 1982, 1986, 1989, 1993
5 * The Regents of the University of California. All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. All advertising materials mentioning features or use of this software
16 * must display the following acknowledgement:
17 * This product includes software developed by the University of
18 * California, Berkeley and its contributors.
19 * 4. Neither the name of the University nor the names of its contributors
20 * may be used to endorse or promote products derived from this software
21 * without specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * SUCH DAMAGE.
34 *
35 */
36
37 #include <sys/cdefs.h>
38 __KERNEL_RCSID(0, "$NetBSD: genfs_vnops.c,v 1.41 2001/11/30 15:18:39 christos Exp $");
39
40 #include "opt_nfsserver.h"
41
42 #include <sys/param.h>
43 #include <sys/systm.h>
44 #include <sys/proc.h>
45 #include <sys/kernel.h>
46 #include <sys/mount.h>
47 #include <sys/namei.h>
48 #include <sys/vnode.h>
49 #include <sys/fcntl.h>
50 #include <sys/malloc.h>
51 #include <sys/poll.h>
52 #include <sys/mman.h>
53
54 #include <miscfs/genfs/genfs.h>
55 #include <miscfs/genfs/genfs_node.h>
56 #include <miscfs/specfs/specdev.h>
57
58 #include <uvm/uvm.h>
59 #include <uvm/uvm_pager.h>
60
61 #ifdef NFSSERVER
62 #include <nfs/rpcv2.h>
63 #include <nfs/nfsproto.h>
64 #include <nfs/nfs.h>
65 #include <nfs/nqnfs.h>
66 #include <nfs/nfs_var.h>
67 #endif
68
69 #define MAX_READ_AHEAD 16 /* XXXUBC 16 */
70
71 int
72 genfs_poll(v)
73 void *v;
74 {
75 struct vop_poll_args /* {
76 struct vnode *a_vp;
77 int a_events;
78 struct proc *a_p;
79 } */ *ap = v;
80
81 return (ap->a_events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM));
82 }
83
84 int
85 genfs_fsync(v)
86 void *v;
87 {
88 struct vop_fsync_args /* {
89 struct vnode *a_vp;
90 struct ucred *a_cred;
91 int a_flags;
92 off_t offlo;
93 off_t offhi;
94 struct proc *a_p;
95 } */ *ap = v;
96 struct vnode *vp = ap->a_vp;
97 int wait;
98
99 wait = (ap->a_flags & FSYNC_WAIT) != 0;
100 vflushbuf(vp, wait);
101 if ((ap->a_flags & FSYNC_DATAONLY) != 0)
102 return (0);
103 else
104 return (VOP_UPDATE(vp, NULL, NULL, wait ? UPDATE_WAIT : 0));
105 }
106
107 int
108 genfs_seek(v)
109 void *v;
110 {
111 struct vop_seek_args /* {
112 struct vnode *a_vp;
113 off_t a_oldoff;
114 off_t a_newoff;
115 struct ucred *a_ucred;
116 } */ *ap = v;
117
118 if (ap->a_newoff < 0)
119 return (EINVAL);
120
121 return (0);
122 }
123
124 int
125 genfs_abortop(v)
126 void *v;
127 {
128 struct vop_abortop_args /* {
129 struct vnode *a_dvp;
130 struct componentname *a_cnp;
131 } */ *ap = v;
132
133 if ((ap->a_cnp->cn_flags & (HASBUF | SAVESTART)) == HASBUF)
134 PNBUF_PUT(ap->a_cnp->cn_pnbuf);
135 return (0);
136 }
137
138 int
139 genfs_fcntl(v)
140 void *v;
141 {
142 struct vop_fcntl_args /* {
143 struct vnode *a_vp;
144 u_int a_command;
145 caddr_t a_data;
146 int a_fflag;
147 struct ucred *a_cred;
148 struct proc *a_p;
149 } */ *ap = v;
150
151 if (ap->a_command == F_SETFL)
152 return (0);
153 else
154 return (EOPNOTSUPP);
155 }
156
157 /*ARGSUSED*/
158 int
159 genfs_badop(v)
160 void *v;
161 {
162
163 panic("genfs: bad op");
164 }
165
166 /*ARGSUSED*/
167 int
168 genfs_nullop(v)
169 void *v;
170 {
171
172 return (0);
173 }
174
175 /*ARGSUSED*/
176 int
177 genfs_einval(v)
178 void *v;
179 {
180
181 return (EINVAL);
182 }
183
184 /*ARGSUSED*/
185 int
186 genfs_eopnotsupp(v)
187 void *v;
188 {
189
190 return (EOPNOTSUPP);
191 }
192
193 /*
194 * Called when an fs doesn't support a particular vop but the vop needs to
195 * vrele, vput, or vunlock passed in vnodes.
196 */
197 int
198 genfs_eopnotsupp_rele(v)
199 void *v;
200 {
201 struct vop_generic_args /*
202 struct vnodeop_desc *a_desc;
203 / * other random data follows, presumably * /
204 } */ *ap = v;
205 struct vnodeop_desc *desc = ap->a_desc;
206 struct vnode *vp;
207 int flags, i, j, offset;
208
209 flags = desc->vdesc_flags;
210 for (i = 0; i < VDESC_MAX_VPS; flags >>=1, i++) {
211 if ((offset = desc->vdesc_vp_offsets[i]) == VDESC_NO_OFFSET)
212 break; /* stop at end of list */
213 if ((j = flags & VDESC_VP0_WILLPUT)) {
214 vp = *VOPARG_OFFSETTO(struct vnode**,offset,ap);
215 switch (j) {
216 case VDESC_VP0_WILLPUT:
217 vput(vp);
218 break;
219 case VDESC_VP0_WILLUNLOCK:
220 VOP_UNLOCK(vp, 0);
221 break;
222 case VDESC_VP0_WILLRELE:
223 vrele(vp);
224 break;
225 }
226 }
227 }
228
229 return (EOPNOTSUPP);
230 }
231
232 /*ARGSUSED*/
233 int
234 genfs_ebadf(v)
235 void *v;
236 {
237
238 return (EBADF);
239 }
240
241 /* ARGSUSED */
242 int
243 genfs_enoioctl(v)
244 void *v;
245 {
246
247 return (ENOTTY);
248 }
249
250
251 /*
252 * Eliminate all activity associated with the requested vnode
253 * and with all vnodes aliased to the requested vnode.
254 */
255 int
256 genfs_revoke(v)
257 void *v;
258 {
259 struct vop_revoke_args /* {
260 struct vnode *a_vp;
261 int a_flags;
262 } */ *ap = v;
263 struct vnode *vp, *vq;
264 struct proc *p = curproc; /* XXX */
265
266 #ifdef DIAGNOSTIC
267 if ((ap->a_flags & REVOKEALL) == 0)
268 panic("genfs_revoke: not revokeall");
269 #endif
270
271 vp = ap->a_vp;
272 simple_lock(&vp->v_interlock);
273
274 if (vp->v_flag & VALIASED) {
275 /*
276 * If a vgone (or vclean) is already in progress,
277 * wait until it is done and return.
278 */
279 if (vp->v_flag & VXLOCK) {
280 vp->v_flag |= VXWANT;
281 simple_unlock(&vp->v_interlock);
282 tsleep((caddr_t)vp, PINOD, "vop_revokeall", 0);
283 return (0);
284 }
285 /*
286 * Ensure that vp will not be vgone'd while we
287 * are eliminating its aliases.
288 */
289 vp->v_flag |= VXLOCK;
290 simple_unlock(&vp->v_interlock);
291 while (vp->v_flag & VALIASED) {
292 simple_lock(&spechash_slock);
293 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
294 if (vq->v_rdev != vp->v_rdev ||
295 vq->v_type != vp->v_type || vp == vq)
296 continue;
297 simple_unlock(&spechash_slock);
298 vgone(vq);
299 break;
300 }
301 if (vq == NULLVP)
302 simple_unlock(&spechash_slock);
303 }
304 /*
305 * Remove the lock so that vgone below will
306 * really eliminate the vnode after which time
307 * vgone will awaken any sleepers.
308 */
309 simple_lock(&vp->v_interlock);
310 vp->v_flag &= ~VXLOCK;
311 }
312 vgonel(vp, p);
313 return (0);
314 }
315
316 /*
317 * Lock the node.
318 */
319 int
320 genfs_lock(v)
321 void *v;
322 {
323 struct vop_lock_args /* {
324 struct vnode *a_vp;
325 int a_flags;
326 } */ *ap = v;
327 struct vnode *vp = ap->a_vp;
328
329 return (lockmgr(&vp->v_lock, ap->a_flags, &vp->v_interlock));
330 }
331
332 /*
333 * Unlock the node.
334 */
335 int
336 genfs_unlock(v)
337 void *v;
338 {
339 struct vop_unlock_args /* {
340 struct vnode *a_vp;
341 int a_flags;
342 } */ *ap = v;
343 struct vnode *vp = ap->a_vp;
344
345 return (lockmgr(&vp->v_lock, ap->a_flags | LK_RELEASE,
346 &vp->v_interlock));
347 }
348
349 /*
350 * Return whether or not the node is locked.
351 */
352 int
353 genfs_islocked(v)
354 void *v;
355 {
356 struct vop_islocked_args /* {
357 struct vnode *a_vp;
358 } */ *ap = v;
359 struct vnode *vp = ap->a_vp;
360
361 return (lockstatus(&vp->v_lock));
362 }
363
364 /*
365 * Stubs to use when there is no locking to be done on the underlying object.
366 */
367 int
368 genfs_nolock(v)
369 void *v;
370 {
371 struct vop_lock_args /* {
372 struct vnode *a_vp;
373 int a_flags;
374 struct proc *a_p;
375 } */ *ap = v;
376
377 /*
378 * Since we are not using the lock manager, we must clear
379 * the interlock here.
380 */
381 if (ap->a_flags & LK_INTERLOCK)
382 simple_unlock(&ap->a_vp->v_interlock);
383 return (0);
384 }
385
386 int
387 genfs_nounlock(v)
388 void *v;
389 {
390 return (0);
391 }
392
393 int
394 genfs_noislocked(v)
395 void *v;
396 {
397 return (0);
398 }
399
400 /*
401 * Local lease check for NFS servers. Just set up args and let
402 * nqsrv_getlease() do the rest. If NFSSERVER is not in the kernel,
403 * this is a null operation.
404 */
405 int
406 genfs_lease_check(v)
407 void *v;
408 {
409 #ifdef NFSSERVER
410 struct vop_lease_args /* {
411 struct vnode *a_vp;
412 struct proc *a_p;
413 struct ucred *a_cred;
414 int a_flag;
415 } */ *ap = v;
416 u_int32_t duration = 0;
417 int cache;
418 u_quad_t frev;
419
420 (void) nqsrv_getlease(ap->a_vp, &duration, ND_CHECK | ap->a_flag,
421 NQLOCALSLP, ap->a_p, (struct mbuf *)0, &cache, &frev, ap->a_cred);
422 return (0);
423 #else
424 return (0);
425 #endif /* NFSSERVER */
426 }
427
428 int
429 genfs_mmap(v)
430 void *v;
431 {
432 return 0;
433 }
434
435 /*
436 * generic VM getpages routine.
437 * Return PG_BUSY pages for the given range,
438 * reading from backing store if necessary.
439 */
440
441 int
442 genfs_getpages(v)
443 void *v;
444 {
445 struct vop_getpages_args /* {
446 struct vnode *a_vp;
447 voff_t a_offset;
448 struct vm_page **a_m;
449 int *a_count;
450 int a_centeridx;
451 vm_prot_t a_access_type;
452 int a_advice;
453 int a_flags;
454 } */ *ap = v;
455
456 off_t newsize, diskeof, memeof;
457 off_t offset, origoffset, startoffset, endoffset, raoffset;
458 daddr_t lbn, blkno;
459 int s, i, error, npages, orignpages, npgs, run, ridx, pidx, pcount;
460 int fs_bshift, fs_bsize, dev_bshift;
461 int flags = ap->a_flags;
462 size_t bytes, iobytes, tailbytes, totalbytes, skipbytes;
463 vaddr_t kva;
464 struct buf *bp, *mbp;
465 struct vnode *vp = ap->a_vp;
466 struct vnode *devvp;
467 struct genfs_node *gp = VTOG(vp);
468 struct uvm_object *uobj = &vp->v_uobj;
469 struct vm_page *pg, *pgs[MAX_READ_AHEAD];
470 struct ucred *cred = curproc->p_ucred; /* XXXUBC curproc */
471 boolean_t async = (flags & PGO_SYNCIO) == 0;
472 boolean_t write = (ap->a_access_type & VM_PROT_WRITE) != 0;
473 boolean_t sawhole = FALSE;
474 boolean_t overwrite = (flags & PGO_OVERWRITE) != 0;
475 UVMHIST_FUNC("genfs_getpages"); UVMHIST_CALLED(ubchist);
476
477 UVMHIST_LOG(ubchist, "vp %p off 0x%x/%x count %d",
478 vp, ap->a_offset >> 32, ap->a_offset, *ap->a_count);
479
480 /* XXXUBC temp limit */
481 if (*ap->a_count > MAX_READ_AHEAD) {
482 panic("genfs_getpages: too many pages");
483 }
484
485 error = 0;
486 origoffset = ap->a_offset;
487 orignpages = *ap->a_count;
488 GOP_SIZE(vp, vp->v_size, &diskeof);
489 if (flags & PGO_PASTEOF) {
490 newsize = MAX(vp->v_size,
491 origoffset + (orignpages << PAGE_SHIFT));
492 GOP_SIZE(vp, newsize, &memeof);
493 } else {
494 memeof = diskeof;
495 }
496 KASSERT(ap->a_centeridx >= 0 || ap->a_centeridx <= orignpages);
497 KASSERT((origoffset & (PAGE_SIZE - 1)) == 0 && origoffset >= 0);
498 KASSERT(orignpages > 0);
499
500 /*
501 * Bounds-check the request.
502 */
503
504 if (origoffset + (ap->a_centeridx << PAGE_SHIFT) >= memeof) {
505 if ((flags & PGO_LOCKED) == 0) {
506 simple_unlock(&uobj->vmobjlock);
507 }
508 UVMHIST_LOG(ubchist, "off 0x%x count %d goes past EOF 0x%x",
509 origoffset, *ap->a_count, memeof,0);
510 return EINVAL;
511 }
512
513 /*
514 * For PGO_LOCKED requests, just return whatever's in memory.
515 */
516
517 if (flags & PGO_LOCKED) {
518 uvn_findpages(uobj, origoffset, ap->a_count, ap->a_m,
519 UFP_NOWAIT|UFP_NOALLOC|UFP_NORDONLY);
520
521 return ap->a_m[ap->a_centeridx] == NULL ? EBUSY : 0;
522 }
523
524 /* vnode is VOP_LOCKed, uobj is locked */
525
526 if (write && (vp->v_flag & VONWORKLST) == 0) {
527 vn_syncer_add_to_worklist(vp, filedelay);
528 }
529
530 /*
531 * find the requested pages and make some simple checks.
532 * leave space in the page array for a whole block.
533 */
534
535 if (vp->v_type == VREG) {
536 fs_bshift = vp->v_mount->mnt_fs_bshift;
537 dev_bshift = vp->v_mount->mnt_dev_bshift;
538 } else {
539 fs_bshift = DEV_BSHIFT;
540 dev_bshift = DEV_BSHIFT;
541 }
542 fs_bsize = 1 << fs_bshift;
543
544 orignpages = MIN(orignpages,
545 round_page(memeof - origoffset) >> PAGE_SHIFT);
546 npages = orignpages;
547 startoffset = origoffset & ~(fs_bsize - 1);
548 endoffset = round_page((origoffset + (npages << PAGE_SHIFT)
549 + fs_bsize - 1) & ~(fs_bsize - 1));
550 endoffset = MIN(endoffset, round_page(memeof));
551 ridx = (origoffset - startoffset) >> PAGE_SHIFT;
552
553 memset(pgs, 0, sizeof(pgs));
554 uvn_findpages(uobj, origoffset, &npages, &pgs[ridx], UFP_ALL);
555
556 /*
557 * if the pages are already resident, just return them.
558 */
559
560 for (i = 0; i < npages; i++) {
561 struct vm_page *pg = pgs[ridx + i];
562
563 if ((pg->flags & PG_FAKE) ||
564 (write && (pg->flags & PG_RDONLY))) {
565 break;
566 }
567 }
568 if (i == npages) {
569 UVMHIST_LOG(ubchist, "returning cached pages", 0,0,0,0);
570 raoffset = origoffset + (orignpages << PAGE_SHIFT);
571 npages += ridx;
572 goto raout;
573 }
574
575 /*
576 * if PGO_OVERWRITE is set, don't bother reading the pages.
577 */
578
579 if (flags & PGO_OVERWRITE) {
580 UVMHIST_LOG(ubchist, "PGO_OVERWRITE",0,0,0,0);
581
582 for (i = 0; i < npages; i++) {
583 struct vm_page *pg = pgs[ridx + i];
584
585 pg->flags &= ~(PG_RDONLY|PG_CLEAN);
586 }
587 npages += ridx;
588 goto out;
589 }
590
591 /*
592 * the page wasn't resident and we're not overwriting,
593 * so we're going to have to do some i/o.
594 * find any additional pages needed to cover the expanded range.
595 */
596
597 npages = (endoffset - startoffset) >> PAGE_SHIFT;
598 if (startoffset != origoffset || npages != orignpages) {
599
600 /*
601 * we need to avoid deadlocks caused by locking
602 * additional pages at lower offsets than pages we
603 * already have locked. unlock them all and start over.
604 */
605
606 for (i = 0; i < orignpages; i++) {
607 struct vm_page *pg = pgs[ridx + i];
608
609 if (pg->flags & PG_FAKE) {
610 pg->flags |= PG_RELEASED;
611 }
612 }
613 uvm_page_unbusy(&pgs[ridx], orignpages);
614 memset(pgs, 0, sizeof(pgs));
615
616 UVMHIST_LOG(ubchist, "reset npages start 0x%x end 0x%x",
617 startoffset, endoffset, 0,0);
618 npgs = npages;
619 uvn_findpages(uobj, startoffset, &npgs, pgs, UFP_ALL);
620 }
621 simple_unlock(&uobj->vmobjlock);
622
623 /*
624 * read the desired page(s).
625 */
626
627 totalbytes = npages << PAGE_SHIFT;
628 bytes = MIN(totalbytes, MAX(diskeof - startoffset, 0));
629 tailbytes = totalbytes - bytes;
630 skipbytes = 0;
631
632 kva = uvm_pagermapin(pgs, npages, UVMPAGER_MAPIN_WAITOK |
633 UVMPAGER_MAPIN_READ);
634
635 s = splbio();
636 mbp = pool_get(&bufpool, PR_WAITOK);
637 splx(s);
638 mbp->b_bufsize = totalbytes;
639 mbp->b_data = (void *)kva;
640 mbp->b_resid = mbp->b_bcount = bytes;
641 mbp->b_flags = B_BUSY|B_READ| (async ? B_CALL : 0);
642 mbp->b_iodone = (async ? uvm_aio_biodone : 0);
643 mbp->b_vp = vp;
644 LIST_INIT(&mbp->b_dep);
645
646 /*
647 * if EOF is in the middle of the range, zero the part past EOF.
648 * if the page including EOF is not PG_FAKE, skip over it since
649 * in that case it has valid data that we need to preserve.
650 */
651
652 if (tailbytes > 0) {
653 size_t tailstart = bytes;
654
655 if ((pgs[bytes >> PAGE_SHIFT]->flags & PG_FAKE) == 0) {
656 tailstart = round_page(tailstart);
657 tailbytes -= tailstart - bytes;
658 }
659 UVMHIST_LOG(ubchist, "tailbytes %p 0x%x 0x%x",
660 kva, tailstart, tailbytes,0);
661 memset((void *)(kva + tailstart), 0, tailbytes);
662 }
663
664 /*
665 * now loop over the pages, reading as needed.
666 */
667
668 if (write) {
669 lockmgr(&gp->g_glock, LK_EXCLUSIVE, NULL);
670 } else {
671 lockmgr(&gp->g_glock, LK_SHARED, NULL);
672 }
673
674 bp = NULL;
675 for (offset = startoffset;
676 bytes > 0;
677 offset += iobytes, bytes -= iobytes) {
678
679 /*
680 * skip pages which don't need to be read.
681 */
682
683 pidx = (offset - startoffset) >> PAGE_SHIFT;
684 while ((pgs[pidx]->flags & (PG_FAKE|PG_RDONLY)) == 0) {
685 size_t b;
686
687 KASSERT((offset & (PAGE_SIZE - 1)) == 0);
688 b = MIN(PAGE_SIZE, bytes);
689 offset += b;
690 bytes -= b;
691 skipbytes += b;
692 pidx++;
693 UVMHIST_LOG(ubchist, "skipping, new offset 0x%x",
694 offset, 0,0,0);
695 if (bytes == 0) {
696 goto loopdone;
697 }
698 }
699
700 /*
701 * bmap the file to find out the blkno to read from and
702 * how much we can read in one i/o. if bmap returns an error,
703 * skip the rest of the top-level i/o.
704 */
705
706 lbn = offset >> fs_bshift;
707 error = VOP_BMAP(vp, lbn, &devvp, &blkno, &run);
708 if (error) {
709 UVMHIST_LOG(ubchist, "VOP_BMAP lbn 0x%x -> %d\n",
710 lbn, error,0,0);
711 skipbytes += bytes;
712 goto loopdone;
713 }
714
715 /*
716 * see how many pages can be read with this i/o.
717 * reduce the i/o size if necessary to avoid
718 * overwriting pages with valid data.
719 */
720
721 iobytes = MIN((((off_t)lbn + 1 + run) << fs_bshift) - offset,
722 bytes);
723 if (offset + iobytes > round_page(offset)) {
724 pcount = 1;
725 while (pidx + pcount < npages &&
726 pgs[pidx + pcount]->flags & PG_FAKE) {
727 pcount++;
728 }
729 iobytes = MIN(iobytes, (pcount << PAGE_SHIFT) -
730 (offset - trunc_page(offset)));
731 }
732
733 /*
734 * if this block isn't allocated, zero it instead of reading it.
735 * if this is a read access, mark the pages we zeroed PG_RDONLY.
736 */
737
738 if (blkno < 0) {
739 int holepages = (round_page(offset + iobytes) -
740 trunc_page(offset)) >> PAGE_SHIFT;
741 UVMHIST_LOG(ubchist, "lbn 0x%x -> HOLE", lbn,0,0,0);
742
743 sawhole = TRUE;
744 memset((char *)kva + (offset - startoffset), 0,
745 iobytes);
746 skipbytes += iobytes;
747
748 for (i = 0; i < holepages; i++) {
749 if (write) {
750 pgs[pidx + i]->flags &= ~PG_CLEAN;
751 } else {
752 pgs[pidx + i]->flags |= PG_RDONLY;
753 }
754 }
755 continue;
756 }
757
758 /*
759 * allocate a sub-buf for this piece of the i/o
760 * (or just use mbp if there's only 1 piece),
761 * and start it going.
762 */
763
764 if (offset == startoffset && iobytes == bytes) {
765 bp = mbp;
766 } else {
767 s = splbio();
768 bp = pool_get(&bufpool, PR_WAITOK);
769 splx(s);
770 bp->b_data = (char *)kva + offset - startoffset;
771 bp->b_resid = bp->b_bcount = iobytes;
772 bp->b_flags = B_BUSY|B_READ|B_CALL;
773 bp->b_iodone = uvm_aio_biodone1;
774 bp->b_vp = vp;
775 bp->b_proc = NULL;
776 LIST_INIT(&bp->b_dep);
777 }
778 bp->b_lblkno = 0;
779 bp->b_private = mbp;
780 if (devvp->v_type == VBLK) {
781 bp->b_dev = devvp->v_rdev;
782 }
783
784 /* adjust physical blkno for partial blocks */
785 bp->b_blkno = blkno + ((offset - ((off_t)lbn << fs_bshift)) >>
786 dev_bshift);
787
788 UVMHIST_LOG(ubchist, "bp %p offset 0x%x bcount 0x%x blkno 0x%x",
789 bp, offset, iobytes, bp->b_blkno);
790
791 VOP_STRATEGY(bp);
792 }
793
794 loopdone:
795 if (skipbytes) {
796 s = splbio();
797 if (error) {
798 mbp->b_flags |= B_ERROR;
799 mbp->b_error = error;
800 }
801 mbp->b_resid -= skipbytes;
802 if (mbp->b_resid == 0) {
803 biodone(mbp);
804 }
805 splx(s);
806 }
807
808 if (async) {
809 UVMHIST_LOG(ubchist, "returning 0 (async)",0,0,0,0);
810 lockmgr(&gp->g_glock, LK_RELEASE, NULL);
811 return 0;
812 }
813 if (bp != NULL) {
814 error = biowait(mbp);
815 }
816 s = splbio();
817 pool_put(&bufpool, mbp);
818 splx(s);
819 uvm_pagermapout(kva, npages);
820 raoffset = startoffset + totalbytes;
821
822 /*
823 * if this we encountered a hole then we have to do a little more work.
824 * for read faults, we marked the page PG_RDONLY so that future
825 * write accesses to the page will fault again.
826 * for write faults, we must make sure that the backing store for
827 * the page is completely allocated while the pages are locked.
828 */
829
830 if (!error && sawhole && write) {
831 for (i = 0; i < npages; i++) {
832 if (pgs[i] == NULL) {
833 continue;
834 }
835 pgs[i]->flags &= ~PG_CLEAN;
836 UVMHIST_LOG(ubchist, "mark dirty pg %p", pgs[i],0,0,0);
837 }
838 error = GOP_ALLOC(vp, startoffset, npages << PAGE_SHIFT, 0,
839 cred);
840 UVMHIST_LOG(ubchist, "gop_alloc off 0x%x/0x%x -> %d",
841 startoffset, npages << PAGE_SHIFT, error,0);
842 }
843 lockmgr(&gp->g_glock, LK_RELEASE, NULL);
844 simple_lock(&uobj->vmobjlock);
845
846 /*
847 * see if we want to start any readahead.
848 * XXXUBC for now, just read the next 128k on 64k boundaries.
849 * this is pretty nonsensical, but it is 50% faster than reading
850 * just the next 64k.
851 */
852
853 raout:
854 if (!error && !async && !write && ((int)raoffset & 0xffff) == 0 &&
855 PAGE_SHIFT <= 16) {
856 off_t rasize;
857 int racount;
858
859 /* XXXUBC temp limit, from above */
860 racount = MIN(1 << (16 - PAGE_SHIFT), MAX_READ_AHEAD);
861 rasize = racount << PAGE_SHIFT;
862 (void) VOP_GETPAGES(vp, raoffset, NULL, &racount, 0,
863 VM_PROT_READ, 0, 0);
864 simple_lock(&uobj->vmobjlock);
865
866 /* XXXUBC temp limit, from above */
867 racount = MIN(1 << (16 - PAGE_SHIFT), MAX_READ_AHEAD);
868 (void) VOP_GETPAGES(vp, raoffset + rasize, NULL, &racount, 0,
869 VM_PROT_READ, 0, 0);
870 simple_lock(&uobj->vmobjlock);
871 }
872
873 /*
874 * we're almost done! release the pages...
875 * for errors, we free the pages.
876 * otherwise we activate them and mark them as valid and clean.
877 * also, unbusy pages that were not actually requested.
878 */
879
880 if (error) {
881 for (i = 0; i < npages; i++) {
882 if (pgs[i] == NULL) {
883 continue;
884 }
885 UVMHIST_LOG(ubchist, "examining pg %p flags 0x%x",
886 pgs[i], pgs[i]->flags, 0,0);
887 if (pgs[i]->flags & PG_FAKE) {
888 pgs[i]->flags |= PG_RELEASED;
889 }
890 }
891 uvm_lock_pageq();
892 uvm_page_unbusy(pgs, npages);
893 uvm_unlock_pageq();
894 simple_unlock(&uobj->vmobjlock);
895 UVMHIST_LOG(ubchist, "returning error %d", error,0,0,0);
896 return error;
897 }
898
899 out:
900 UVMHIST_LOG(ubchist, "succeeding, npages %d", npages,0,0,0);
901 uvm_lock_pageq();
902 for (i = 0; i < npages; i++) {
903 pg = pgs[i];
904 if (pg == NULL) {
905 continue;
906 }
907 UVMHIST_LOG(ubchist, "examining pg %p flags 0x%x",
908 pg, pg->flags, 0,0);
909 if (pg->flags & PG_FAKE && !overwrite) {
910 pg->flags &= ~(PG_FAKE);
911 pmap_clear_modify(pgs[i]);
912 }
913 if (write) {
914 pg->flags &= ~(PG_RDONLY);
915 }
916 if (i < ridx || i >= ridx + orignpages || async) {
917 UVMHIST_LOG(ubchist, "unbusy pg %p offset 0x%x",
918 pg, pg->offset,0,0);
919 if (pg->flags & PG_WANTED) {
920 wakeup(pg);
921 }
922 if (pg->flags & PG_FAKE) {
923 KASSERT(overwrite);
924 uvm_pagezero(pg);
925 }
926 if (pg->flags & PG_RELEASED) {
927 uvm_pagefree(pg);
928 continue;
929 }
930 uvm_pageactivate(pg);
931 pg->flags &= ~(PG_WANTED|PG_BUSY|PG_FAKE);
932 UVM_PAGE_OWN(pg, NULL);
933 }
934 }
935 uvm_unlock_pageq();
936 simple_unlock(&uobj->vmobjlock);
937 if (ap->a_m != NULL) {
938 memcpy(ap->a_m, &pgs[ridx],
939 orignpages * sizeof(struct vm_page *));
940 }
941 return 0;
942 }
943
944 /*
945 * generic VM putpages routine.
946 * Write the given range of pages to backing store.
947 *
948 * => "offhi == 0" means flush all pages at or after "offlo".
949 * => object should be locked by caller. we may _unlock_ the object
950 * if (and only if) we need to clean a page (PGO_CLEANIT), or
951 * if PGO_SYNCIO is set and there are pages busy.
952 * we return with the object locked.
953 * => if PGO_CLEANIT or PGO_SYNCIO is set, we may block (due to I/O).
954 * thus, a caller might want to unlock higher level resources
955 * (e.g. vm_map) before calling flush.
956 * => if neither PGO_CLEANIT nor PGO_SYNCIO is set, then we will neither
957 * unlock the object nor block.
958 * => if PGO_ALLPAGES is set, then all pages in the object will be processed.
959 * => NOTE: we rely on the fact that the object's memq is a TAILQ and
960 * that new pages are inserted on the tail end of the list. thus,
961 * we can make a complete pass through the object in one go by starting
962 * at the head and working towards the tail (new pages are put in
963 * front of us).
964 * => NOTE: we are allowed to lock the page queues, so the caller
965 * must not be holding the page queue lock.
966 *
967 * note on "cleaning" object and PG_BUSY pages:
968 * this routine is holding the lock on the object. the only time
969 * that it can run into a PG_BUSY page that it does not own is if
970 * some other process has started I/O on the page (e.g. either
971 * a pagein, or a pageout). if the PG_BUSY page is being paged
972 * in, then it can not be dirty (!PG_CLEAN) because no one has
973 * had a chance to modify it yet. if the PG_BUSY page is being
974 * paged out then it means that someone else has already started
975 * cleaning the page for us (how nice!). in this case, if we
976 * have syncio specified, then after we make our pass through the
977 * object we need to wait for the other PG_BUSY pages to clear
978 * off (i.e. we need to do an iosync). also note that once a
979 * page is PG_BUSY it must stay in its object until it is un-busyed.
980 *
981 * note on page traversal:
982 * we can traverse the pages in an object either by going down the
983 * linked list in "uobj->memq", or we can go over the address range
984 * by page doing hash table lookups for each address. depending
985 * on how many pages are in the object it may be cheaper to do one
986 * or the other. we set "by_list" to true if we are using memq.
987 * if the cost of a hash lookup was equal to the cost of the list
988 * traversal we could compare the number of pages in the start->stop
989 * range to the total number of pages in the object. however, it
990 * seems that a hash table lookup is more expensive than the linked
991 * list traversal, so we multiply the number of pages in the
992 * range by an estimate of the relatively higher cost of the hash lookup.
993 */
994
995 int
996 genfs_putpages(v)
997 void *v;
998 {
999 struct vop_putpages_args /* {
1000 struct vnode *a_vp;
1001 voff_t a_offlo;
1002 voff_t a_offhi;
1003 int a_flags;
1004 } */ *ap = v;
1005 struct vnode *vp = ap->a_vp;
1006 struct uvm_object *uobj = &vp->v_uobj;
1007 off_t startoff = ap->a_offlo;
1008 off_t endoff = ap->a_offhi;
1009 off_t off;
1010 int flags = ap->a_flags;
1011 int n = MAXBSIZE >> PAGE_SHIFT;
1012 int i, s, error, npages, nback;
1013 int freeflag;
1014 struct vm_page *pgs[n], *pg, *nextpg, *tpg, curmp, endmp;
1015 boolean_t wasclean, by_list, needs_clean;
1016 boolean_t async = (flags & PGO_SYNCIO) == 0;
1017 UVMHIST_FUNC("genfs_putpages"); UVMHIST_CALLED(ubchist);
1018
1019 KASSERT(flags & (PGO_CLEANIT|PGO_FREE|PGO_DEACTIVATE));
1020 KASSERT((startoff & PAGE_MASK) == 0 && (endoff & PAGE_MASK) == 0);
1021 KASSERT(startoff < endoff || endoff == 0);
1022
1023 UVMHIST_LOG(ubchist, "vp %p pages %d off 0x%x len 0x%x",
1024 vp, uobj->uo_npages, startoff, endoff - startoff);
1025 if (uobj->uo_npages == 0) {
1026 if (LIST_FIRST(&vp->v_dirtyblkhd) == NULL &&
1027 (vp->v_flag & VONWORKLST)) {
1028 vp->v_flag &= ~VONWORKLST;
1029 LIST_REMOVE(vp, v_synclist);
1030 }
1031 simple_unlock(&uobj->vmobjlock);
1032 return 0;
1033 }
1034
1035 /*
1036 * the vnode has pages, set up to process the request.
1037 */
1038
1039 error = 0;
1040 wasclean = TRUE;
1041 off = startoff;
1042 if (endoff == 0 || flags & PGO_ALLPAGES) {
1043 endoff = trunc_page(LLONG_MAX);
1044 }
1045 by_list = (uobj->uo_npages <=
1046 ((endoff - startoff) >> PAGE_SHIFT) * UVM_PAGE_HASH_PENALTY);
1047
1048 /*
1049 * start the loop. when scanning by list, hold the last page
1050 * in the list before we start. pages allocated after we start
1051 * will be added to the end of the list, so we can stop at the
1052 * current last page.
1053 */
1054
1055 freeflag = (curproc == uvm.pagedaemon_proc) ? PG_PAGEOUT : PG_RELEASED;
1056 curmp.uobject = uobj;
1057 curmp.offset = (voff_t)-1;
1058 curmp.flags = PG_BUSY;
1059 endmp.uobject = uobj;
1060 endmp.offset = (voff_t)-1;
1061 endmp.flags = PG_BUSY;
1062 if (by_list) {
1063 pg = TAILQ_FIRST(&uobj->memq);
1064 TAILQ_INSERT_TAIL(&uobj->memq, &endmp, listq);
1065 PHOLD(curproc);
1066 } else {
1067 pg = uvm_pagelookup(uobj, off);
1068 }
1069 nextpg = NULL;
1070 while (by_list || off < endoff) {
1071
1072 /*
1073 * if the current page is not interesting, move on to the next.
1074 */
1075
1076 KASSERT(pg == NULL || pg->uobject == uobj);
1077 KASSERT(pg == NULL ||
1078 (pg->flags & (PG_RELEASED|PG_PAGEOUT)) == 0 ||
1079 (pg->flags & PG_BUSY) != 0);
1080 if (by_list) {
1081 if (pg == &endmp) {
1082 break;
1083 }
1084 if (pg->offset < startoff || pg->offset >= endoff ||
1085 pg->flags & (PG_RELEASED|PG_PAGEOUT)) {
1086 pg = TAILQ_NEXT(pg, listq);
1087 continue;
1088 }
1089 off = pg->offset;
1090 } else if (pg == NULL || pg->flags & (PG_RELEASED|PG_PAGEOUT)) {
1091 off += PAGE_SIZE;
1092 if (off < endoff) {
1093 pg = uvm_pagelookup(uobj, off);
1094 }
1095 continue;
1096 }
1097
1098 /*
1099 * if the current page needs to be cleaned and it's busy,
1100 * wait for it to become unbusy.
1101 */
1102
1103 if (flags & PGO_FREE) {
1104 pmap_page_protect(pg, VM_PROT_NONE);
1105 }
1106 if (flags & PGO_CLEANIT) {
1107 needs_clean = pmap_clear_modify(pg) ||
1108 (pg->flags & PG_CLEAN) == 0;
1109 pg->flags |= PG_CLEAN;
1110 } else {
1111 needs_clean = FALSE;
1112 }
1113 if (needs_clean && pg->flags & PG_BUSY) {
1114 KASSERT(curproc != uvm.pagedaemon_proc);
1115 UVMHIST_LOG(ubchist, "busy %p", pg,0,0,0);
1116 if (by_list) {
1117 TAILQ_INSERT_BEFORE(pg, &curmp, listq);
1118 UVMHIST_LOG(ubchist, "curmp next %p",
1119 TAILQ_NEXT(&curmp, listq), 0,0,0);
1120 }
1121 pg->flags |= PG_WANTED;
1122 pg->flags &= ~PG_CLEAN;
1123 UVM_UNLOCK_AND_WAIT(pg, &uobj->vmobjlock, 0,
1124 "genput", 0);
1125 simple_lock(&uobj->vmobjlock);
1126 if (by_list) {
1127 UVMHIST_LOG(ubchist, "after next %p",
1128 TAILQ_NEXT(&curmp, listq), 0,0,0);
1129 pg = TAILQ_NEXT(&curmp, listq);
1130 TAILQ_REMOVE(&uobj->memq, &curmp, listq);
1131 } else {
1132 pg = uvm_pagelookup(uobj, off);
1133 }
1134 continue;
1135 }
1136
1137 /*
1138 * if we're cleaning, build a cluster.
1139 * the cluster will consist of pages which are currently dirty,
1140 * but they will be returned to us marked clean.
1141 * if not cleaning, just operate on the one page.
1142 */
1143
1144 if (needs_clean) {
1145 wasclean = FALSE;
1146 memset(pgs, 0, sizeof(pgs));
1147 pg->flags |= PG_BUSY;
1148 UVM_PAGE_OWN(pg, "genfs_putpages");
1149
1150 /*
1151 * first look backward.
1152 */
1153
1154 npages = MIN(n >> 1, off >> PAGE_SHIFT);
1155 nback = npages;
1156 uvn_findpages(uobj, off - PAGE_SIZE, &nback, &pgs[0],
1157 UFP_NOWAIT|UFP_NOALLOC|UFP_DIRTYONLY|UFP_BACKWARD);
1158 if (nback) {
1159 memmove(&pgs[0], &pgs[npages - nback],
1160 nback * sizeof(pgs[0]));
1161 }
1162 n -= nback;
1163
1164 /*
1165 * then plug in our page of interest.
1166 */
1167
1168 pgs[nback] = pg;
1169
1170 /*
1171 * then look forward to fill in the remaining space in
1172 * the array of pages.
1173 */
1174
1175 npages = MIN(n, (endoff - off) >> PAGE_SHIFT) - 1;
1176 uvn_findpages(uobj, off + PAGE_SIZE, &npages,
1177 &pgs[nback + 1],
1178 UFP_NOWAIT|UFP_NOALLOC|UFP_DIRTYONLY);
1179 npages += nback + 1;
1180 } else {
1181 pgs[0] = pg;
1182 npages = 1;
1183 }
1184
1185 /*
1186 * apply FREE or DEACTIVATE options if requested.
1187 */
1188
1189 if (flags & (PGO_DEACTIVATE|PGO_FREE)) {
1190 uvm_lock_pageq();
1191 }
1192 for (i = 0; i < npages; i++) {
1193 tpg = pgs[i];
1194 KASSERT(tpg->uobject == uobj);
1195 if (flags & PGO_DEACTIVATE &&
1196 (tpg->pqflags & PQ_INACTIVE) == 0 &&
1197 tpg->wire_count == 0) {
1198 (void) pmap_clear_reference(tpg);
1199 uvm_pagedeactivate(tpg);
1200 } else if (flags & PGO_FREE) {
1201 pmap_page_protect(tpg, VM_PROT_NONE);
1202 if (tpg->flags & PG_BUSY) {
1203 tpg->flags |= freeflag;
1204 if (freeflag == PG_PAGEOUT) {
1205 uvmexp.paging++;
1206 uvm_pagedequeue(tpg);
1207 }
1208 } else {
1209 nextpg = TAILQ_NEXT(tpg, listq);
1210 uvm_pagefree(tpg);
1211 }
1212 }
1213 }
1214 if (flags & (PGO_DEACTIVATE|PGO_FREE)) {
1215 uvm_unlock_pageq();
1216 }
1217 if (needs_clean) {
1218
1219 /*
1220 * start the i/o. if we're traversing by list,
1221 * keep our place in the list with a marker page.
1222 */
1223
1224 if (by_list) {
1225 TAILQ_INSERT_AFTER(&uobj->memq, pg, &curmp,
1226 listq);
1227 }
1228 simple_unlock(&uobj->vmobjlock);
1229 error = GOP_WRITE(vp, pgs, npages, flags);
1230 simple_lock(&uobj->vmobjlock);
1231 if (by_list) {
1232 pg = TAILQ_NEXT(&curmp, listq);
1233 TAILQ_REMOVE(&uobj->memq, &curmp, listq);
1234 }
1235 if (error == ENOMEM) {
1236 for (i = 0; i < npages; i++) {
1237 tpg = pgs[i];
1238 if (tpg->flags & PG_PAGEOUT) {
1239 tpg->flags &= ~PG_PAGEOUT;
1240 uvmexp.paging--;
1241 }
1242 tpg->flags &= ~PG_CLEAN;
1243 uvm_pageactivate(tpg);
1244 }
1245 uvm_page_unbusy(pgs, npages);
1246 }
1247 if (error) {
1248 break;
1249 }
1250 if (by_list) {
1251 continue;
1252 }
1253 }
1254
1255 /*
1256 * find the next page and continue if there was no error.
1257 */
1258
1259 if (by_list) {
1260 if (nextpg) {
1261 pg = nextpg;
1262 nextpg = NULL;
1263 } else {
1264 pg = TAILQ_NEXT(pg, listq);
1265 }
1266 } else {
1267 off += PAGE_SIZE;
1268 if (off < endoff) {
1269 pg = uvm_pagelookup(uobj, off);
1270 }
1271 }
1272 }
1273 if (by_list) {
1274 TAILQ_REMOVE(&uobj->memq, &endmp, listq);
1275 PRELE(curproc);
1276 }
1277
1278 /*
1279 * if we're cleaning and there was nothing to clean,
1280 * take us off the syncer list. if we started any i/o
1281 * and we're doing sync i/o, wait for all writes to finish.
1282 */
1283
1284 if ((flags & PGO_CLEANIT) && wasclean &&
1285 startoff == 0 && endoff == trunc_page(LLONG_MAX) &&
1286 LIST_FIRST(&vp->v_dirtyblkhd) == NULL &&
1287 (vp->v_flag & VONWORKLST)) {
1288 vp->v_flag &= ~VONWORKLST;
1289 LIST_REMOVE(vp, v_synclist);
1290 }
1291 if (!wasclean && !async) {
1292 s = splbio();
1293 while (vp->v_numoutput != 0) {
1294 vp->v_flag |= VBWAIT;
1295 UVM_UNLOCK_AND_WAIT(&vp->v_numoutput, &uobj->vmobjlock,
1296 FALSE, "genput2",0);
1297 simple_lock(&uobj->vmobjlock);
1298 }
1299 splx(s);
1300 }
1301 simple_unlock(&uobj->vmobjlock);
1302 return error;
1303 }
1304
1305 int
1306 genfs_gop_write(struct vnode *vp, struct vm_page **pgs, int npages, int flags)
1307 {
1308 int s, error, run;
1309 int fs_bshift, dev_bshift;
1310 vaddr_t kva;
1311 off_t eof, offset, startoffset;
1312 size_t bytes, iobytes, skipbytes;
1313 daddr_t lbn, blkno;
1314 struct vm_page *pg;
1315 struct buf *mbp, *bp;
1316 struct vnode *devvp;
1317 boolean_t async = (flags & PGO_SYNCIO) == 0;
1318 UVMHIST_FUNC("genfs_gop_write"); UVMHIST_CALLED(ubchist);
1319
1320 UVMHIST_LOG(ubchist, "vp %p pgs %p npages %d flags 0x%x",
1321 vp, pgs, npages, flags);
1322
1323 GOP_SIZE(vp, vp->v_size, &eof);
1324 if (vp->v_type == VREG) {
1325 fs_bshift = vp->v_mount->mnt_fs_bshift;
1326 dev_bshift = vp->v_mount->mnt_dev_bshift;
1327 } else {
1328 fs_bshift = DEV_BSHIFT;
1329 dev_bshift = DEV_BSHIFT;
1330 }
1331 error = 0;
1332 pg = pgs[0];
1333 startoffset = pg->offset;
1334 bytes = MIN(npages << PAGE_SHIFT, eof - startoffset);
1335 skipbytes = 0;
1336 KASSERT(bytes != 0);
1337
1338 kva = uvm_pagermapin(pgs, npages, UVMPAGER_MAPIN_WRITE |
1339 UVMPAGER_MAPIN_WAITOK);
1340
1341 s = splbio();
1342 vp->v_numoutput += 2;
1343 mbp = pool_get(&bufpool, PR_WAITOK);
1344 UVMHIST_LOG(ubchist, "vp %p mbp %p num now %d bytes 0x%x",
1345 vp, mbp, vp->v_numoutput, bytes);
1346 splx(s);
1347 mbp->b_bufsize = npages << PAGE_SHIFT;
1348 mbp->b_data = (void *)kva;
1349 mbp->b_resid = mbp->b_bcount = bytes;
1350 mbp->b_flags = B_BUSY|B_WRITE|B_AGE| (async ? B_CALL : 0);
1351 mbp->b_iodone = uvm_aio_biodone;
1352 mbp->b_vp = vp;
1353 LIST_INIT(&mbp->b_dep);
1354
1355 bp = NULL;
1356 for (offset = startoffset;
1357 bytes > 0;
1358 offset += iobytes, bytes -= iobytes) {
1359 lbn = offset >> fs_bshift;
1360 error = VOP_BMAP(vp, lbn, &devvp, &blkno, &run);
1361 if (error) {
1362 UVMHIST_LOG(ubchist, "VOP_BMAP() -> %d", error,0,0,0);
1363 skipbytes += bytes;
1364 bytes = 0;
1365 break;
1366 }
1367
1368 iobytes = MIN((((off_t)lbn + 1 + run) << fs_bshift) - offset,
1369 bytes);
1370 if (blkno == (daddr_t)-1) {
1371 skipbytes += iobytes;
1372 continue;
1373 }
1374
1375 /* if it's really one i/o, don't make a second buf */
1376 if (offset == startoffset && iobytes == bytes) {
1377 bp = mbp;
1378 } else {
1379 s = splbio();
1380 vp->v_numoutput++;
1381 bp = pool_get(&bufpool, PR_WAITOK);
1382 UVMHIST_LOG(ubchist, "vp %p bp %p num now %d",
1383 vp, bp, vp->v_numoutput, 0);
1384 splx(s);
1385 bp->b_data = (char *)kva +
1386 (vaddr_t)(offset - pg->offset);
1387 bp->b_resid = bp->b_bcount = iobytes;
1388 bp->b_flags = B_BUSY|B_WRITE|B_CALL;
1389 bp->b_iodone = uvm_aio_biodone1;
1390 bp->b_vp = vp;
1391 LIST_INIT(&bp->b_dep);
1392 }
1393 bp->b_lblkno = 0;
1394 bp->b_private = mbp;
1395 if (devvp->v_type == VBLK) {
1396 bp->b_dev = devvp->v_rdev;
1397 }
1398
1399 /* adjust physical blkno for partial blocks */
1400 bp->b_blkno = blkno + ((offset - ((off_t)lbn << fs_bshift)) >>
1401 dev_bshift);
1402 UVMHIST_LOG(ubchist, "vp %p offset 0x%x bcount 0x%x blkno 0x%x",
1403 vp, offset, bp->b_bcount, bp->b_blkno);
1404 VOP_STRATEGY(bp);
1405 }
1406 if (skipbytes) {
1407 UVMHIST_LOG(ubchist, "skipbytes %d", skipbytes, 0,0,0);
1408 s = splbio();
1409 if (error) {
1410 mbp->b_flags |= B_ERROR;
1411 mbp->b_error = error;
1412 }
1413 mbp->b_resid -= skipbytes;
1414 if (mbp->b_resid == 0) {
1415 biodone(mbp);
1416 }
1417 splx(s);
1418 }
1419 if (async) {
1420 UVMHIST_LOG(ubchist, "returning 0 (async)", 0,0,0,0);
1421 return 0;
1422 }
1423 UVMHIST_LOG(ubchist, "waiting for mbp %p", mbp,0,0,0);
1424 error = biowait(mbp);
1425 uvm_aio_aiodone(mbp);
1426 UVMHIST_LOG(ubchist, "returning, error %d", error,0,0,0);
1427 return error;
1428 }
1429
1430 void
1431 genfs_node_init(struct vnode *vp, struct genfs_ops *ops)
1432 {
1433 struct genfs_node *gp = VTOG(vp);
1434
1435 lockinit(&gp->g_glock, PINOD, "glock", 0, 0);
1436 gp->g_op = ops;
1437 }
1438
1439 void
1440 genfs_size(struct vnode *vp, off_t size, off_t *eobp)
1441 {
1442 int bsize;
1443
1444 bsize = 1 << vp->v_mount->mnt_fs_bshift;
1445 *eobp = (size + bsize - 1) & ~(bsize - 1);
1446 }
1447