genfs_vnops.c revision 1.51 1 /* $NetBSD: genfs_vnops.c,v 1.51 2002/03/17 19:41:09 atatat Exp $ */
2
3 /*
4 * Copyright (c) 1982, 1986, 1989, 1993
5 * The Regents of the University of California. All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. All advertising materials mentioning features or use of this software
16 * must display the following acknowledgement:
17 * This product includes software developed by the University of
18 * California, Berkeley and its contributors.
19 * 4. Neither the name of the University nor the names of its contributors
20 * may be used to endorse or promote products derived from this software
21 * without specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * SUCH DAMAGE.
34 *
35 */
36
37 #include <sys/cdefs.h>
38 __KERNEL_RCSID(0, "$NetBSD: genfs_vnops.c,v 1.51 2002/03/17 19:41:09 atatat Exp $");
39
40 #include "opt_nfsserver.h"
41
42 #include <sys/param.h>
43 #include <sys/systm.h>
44 #include <sys/proc.h>
45 #include <sys/kernel.h>
46 #include <sys/mount.h>
47 #include <sys/namei.h>
48 #include <sys/vnode.h>
49 #include <sys/fcntl.h>
50 #include <sys/malloc.h>
51 #include <sys/poll.h>
52 #include <sys/mman.h>
53
54 #include <miscfs/genfs/genfs.h>
55 #include <miscfs/genfs/genfs_node.h>
56 #include <miscfs/specfs/specdev.h>
57
58 #include <uvm/uvm.h>
59 #include <uvm/uvm_pager.h>
60
61 #ifdef NFSSERVER
62 #include <nfs/rpcv2.h>
63 #include <nfs/nfsproto.h>
64 #include <nfs/nfs.h>
65 #include <nfs/nqnfs.h>
66 #include <nfs/nfs_var.h>
67 #endif
68
69 #define MAX_READ_AHEAD 16 /* XXXUBC 16 */
70
71 int
72 genfs_poll(v)
73 void *v;
74 {
75 struct vop_poll_args /* {
76 struct vnode *a_vp;
77 int a_events;
78 struct proc *a_p;
79 } */ *ap = v;
80
81 return (ap->a_events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM));
82 }
83
84 int
85 genfs_fsync(v)
86 void *v;
87 {
88 struct vop_fsync_args /* {
89 struct vnode *a_vp;
90 struct ucred *a_cred;
91 int a_flags;
92 off_t offlo;
93 off_t offhi;
94 struct proc *a_p;
95 } */ *ap = v;
96 struct vnode *vp = ap->a_vp;
97 int wait;
98
99 wait = (ap->a_flags & FSYNC_WAIT) != 0;
100 vflushbuf(vp, wait);
101 if ((ap->a_flags & FSYNC_DATAONLY) != 0)
102 return (0);
103 else
104 return (VOP_UPDATE(vp, NULL, NULL, wait ? UPDATE_WAIT : 0));
105 }
106
107 int
108 genfs_seek(v)
109 void *v;
110 {
111 struct vop_seek_args /* {
112 struct vnode *a_vp;
113 off_t a_oldoff;
114 off_t a_newoff;
115 struct ucred *a_ucred;
116 } */ *ap = v;
117
118 if (ap->a_newoff < 0)
119 return (EINVAL);
120
121 return (0);
122 }
123
124 int
125 genfs_abortop(v)
126 void *v;
127 {
128 struct vop_abortop_args /* {
129 struct vnode *a_dvp;
130 struct componentname *a_cnp;
131 } */ *ap = v;
132
133 if ((ap->a_cnp->cn_flags & (HASBUF | SAVESTART)) == HASBUF)
134 PNBUF_PUT(ap->a_cnp->cn_pnbuf);
135 return (0);
136 }
137
138 int
139 genfs_fcntl(v)
140 void *v;
141 {
142 struct vop_fcntl_args /* {
143 struct vnode *a_vp;
144 u_int a_command;
145 caddr_t a_data;
146 int a_fflag;
147 struct ucred *a_cred;
148 struct proc *a_p;
149 } */ *ap = v;
150
151 if (ap->a_command == F_SETFL)
152 return (0);
153 else
154 return (EOPNOTSUPP);
155 }
156
157 /*ARGSUSED*/
158 int
159 genfs_badop(v)
160 void *v;
161 {
162
163 panic("genfs: bad op");
164 }
165
166 /*ARGSUSED*/
167 int
168 genfs_nullop(v)
169 void *v;
170 {
171
172 return (0);
173 }
174
175 /*ARGSUSED*/
176 int
177 genfs_einval(v)
178 void *v;
179 {
180
181 return (EINVAL);
182 }
183
184 /*ARGSUSED*/
185 int
186 genfs_eopnotsupp(v)
187 void *v;
188 {
189
190 return (EOPNOTSUPP);
191 }
192
193 /*
194 * Called when an fs doesn't support a particular vop but the vop needs to
195 * vrele, vput, or vunlock passed in vnodes.
196 */
197 int
198 genfs_eopnotsupp_rele(v)
199 void *v;
200 {
201 struct vop_generic_args /*
202 struct vnodeop_desc *a_desc;
203 / * other random data follows, presumably * /
204 } */ *ap = v;
205 struct vnodeop_desc *desc = ap->a_desc;
206 struct vnode *vp;
207 int flags, i, j, offset;
208
209 flags = desc->vdesc_flags;
210 for (i = 0; i < VDESC_MAX_VPS; flags >>=1, i++) {
211 if ((offset = desc->vdesc_vp_offsets[i]) == VDESC_NO_OFFSET)
212 break; /* stop at end of list */
213 if ((j = flags & VDESC_VP0_WILLPUT)) {
214 vp = *VOPARG_OFFSETTO(struct vnode**,offset,ap);
215 switch (j) {
216 case VDESC_VP0_WILLPUT:
217 vput(vp);
218 break;
219 case VDESC_VP0_WILLUNLOCK:
220 VOP_UNLOCK(vp, 0);
221 break;
222 case VDESC_VP0_WILLRELE:
223 vrele(vp);
224 break;
225 }
226 }
227 }
228
229 return (EOPNOTSUPP);
230 }
231
232 /*ARGSUSED*/
233 int
234 genfs_ebadf(v)
235 void *v;
236 {
237
238 return (EBADF);
239 }
240
241 /* ARGSUSED */
242 int
243 genfs_enoioctl(v)
244 void *v;
245 {
246
247 return (EPASSTHROUGH);
248 }
249
250
251 /*
252 * Eliminate all activity associated with the requested vnode
253 * and with all vnodes aliased to the requested vnode.
254 */
255 int
256 genfs_revoke(v)
257 void *v;
258 {
259 struct vop_revoke_args /* {
260 struct vnode *a_vp;
261 int a_flags;
262 } */ *ap = v;
263 struct vnode *vp, *vq;
264 struct proc *p = curproc; /* XXX */
265
266 #ifdef DIAGNOSTIC
267 if ((ap->a_flags & REVOKEALL) == 0)
268 panic("genfs_revoke: not revokeall");
269 #endif
270
271 vp = ap->a_vp;
272 simple_lock(&vp->v_interlock);
273
274 if (vp->v_flag & VALIASED) {
275 /*
276 * If a vgone (or vclean) is already in progress,
277 * wait until it is done and return.
278 */
279 if (vp->v_flag & VXLOCK) {
280 vp->v_flag |= VXWANT;
281 simple_unlock(&vp->v_interlock);
282 tsleep((caddr_t)vp, PINOD, "vop_revokeall", 0);
283 return (0);
284 }
285 /*
286 * Ensure that vp will not be vgone'd while we
287 * are eliminating its aliases.
288 */
289 vp->v_flag |= VXLOCK;
290 simple_unlock(&vp->v_interlock);
291 while (vp->v_flag & VALIASED) {
292 simple_lock(&spechash_slock);
293 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
294 if (vq->v_rdev != vp->v_rdev ||
295 vq->v_type != vp->v_type || vp == vq)
296 continue;
297 simple_unlock(&spechash_slock);
298 vgone(vq);
299 break;
300 }
301 if (vq == NULLVP)
302 simple_unlock(&spechash_slock);
303 }
304 /*
305 * Remove the lock so that vgone below will
306 * really eliminate the vnode after which time
307 * vgone will awaken any sleepers.
308 */
309 simple_lock(&vp->v_interlock);
310 vp->v_flag &= ~VXLOCK;
311 }
312 vgonel(vp, p);
313 return (0);
314 }
315
316 /*
317 * Lock the node.
318 */
319 int
320 genfs_lock(v)
321 void *v;
322 {
323 struct vop_lock_args /* {
324 struct vnode *a_vp;
325 int a_flags;
326 } */ *ap = v;
327 struct vnode *vp = ap->a_vp;
328
329 return (lockmgr(&vp->v_lock, ap->a_flags, &vp->v_interlock));
330 }
331
332 /*
333 * Unlock the node.
334 */
335 int
336 genfs_unlock(v)
337 void *v;
338 {
339 struct vop_unlock_args /* {
340 struct vnode *a_vp;
341 int a_flags;
342 } */ *ap = v;
343 struct vnode *vp = ap->a_vp;
344
345 return (lockmgr(&vp->v_lock, ap->a_flags | LK_RELEASE,
346 &vp->v_interlock));
347 }
348
349 /*
350 * Return whether or not the node is locked.
351 */
352 int
353 genfs_islocked(v)
354 void *v;
355 {
356 struct vop_islocked_args /* {
357 struct vnode *a_vp;
358 } */ *ap = v;
359 struct vnode *vp = ap->a_vp;
360
361 return (lockstatus(&vp->v_lock));
362 }
363
364 /*
365 * Stubs to use when there is no locking to be done on the underlying object.
366 */
367 int
368 genfs_nolock(v)
369 void *v;
370 {
371 struct vop_lock_args /* {
372 struct vnode *a_vp;
373 int a_flags;
374 struct proc *a_p;
375 } */ *ap = v;
376
377 /*
378 * Since we are not using the lock manager, we must clear
379 * the interlock here.
380 */
381 if (ap->a_flags & LK_INTERLOCK)
382 simple_unlock(&ap->a_vp->v_interlock);
383 return (0);
384 }
385
386 int
387 genfs_nounlock(v)
388 void *v;
389 {
390 return (0);
391 }
392
393 int
394 genfs_noislocked(v)
395 void *v;
396 {
397 return (0);
398 }
399
400 /*
401 * Local lease check for NFS servers. Just set up args and let
402 * nqsrv_getlease() do the rest. If NFSSERVER is not in the kernel,
403 * this is a null operation.
404 */
405 int
406 genfs_lease_check(v)
407 void *v;
408 {
409 #ifdef NFSSERVER
410 struct vop_lease_args /* {
411 struct vnode *a_vp;
412 struct proc *a_p;
413 struct ucred *a_cred;
414 int a_flag;
415 } */ *ap = v;
416 u_int32_t duration = 0;
417 int cache;
418 u_quad_t frev;
419
420 (void) nqsrv_getlease(ap->a_vp, &duration, ND_CHECK | ap->a_flag,
421 NQLOCALSLP, ap->a_p, (struct mbuf *)0, &cache, &frev, ap->a_cred);
422 return (0);
423 #else
424 return (0);
425 #endif /* NFSSERVER */
426 }
427
428 int
429 genfs_mmap(v)
430 void *v;
431 {
432 return 0;
433 }
434
435 /*
436 * generic VM getpages routine.
437 * Return PG_BUSY pages for the given range,
438 * reading from backing store if necessary.
439 */
440
441 int
442 genfs_getpages(v)
443 void *v;
444 {
445 struct vop_getpages_args /* {
446 struct vnode *a_vp;
447 voff_t a_offset;
448 struct vm_page **a_m;
449 int *a_count;
450 int a_centeridx;
451 vm_prot_t a_access_type;
452 int a_advice;
453 int a_flags;
454 } */ *ap = v;
455
456 off_t newsize, diskeof, memeof;
457 off_t offset, origoffset, startoffset, endoffset, raoffset;
458 daddr_t lbn, blkno;
459 int s, i, error, npages, orignpages, npgs, run, ridx, pidx, pcount;
460 int fs_bshift, fs_bsize, dev_bshift;
461 int flags = ap->a_flags;
462 size_t bytes, iobytes, tailbytes, totalbytes, skipbytes;
463 vaddr_t kva;
464 struct buf *bp, *mbp;
465 struct vnode *vp = ap->a_vp;
466 struct vnode *devvp;
467 struct genfs_node *gp = VTOG(vp);
468 struct uvm_object *uobj = &vp->v_uobj;
469 struct vm_page *pg, *pgs[MAX_READ_AHEAD];
470 struct ucred *cred = curproc->p_ucred; /* XXXUBC curproc */
471 boolean_t async = (flags & PGO_SYNCIO) == 0;
472 boolean_t write = (ap->a_access_type & VM_PROT_WRITE) != 0;
473 boolean_t sawhole = FALSE;
474 boolean_t overwrite = (flags & PGO_OVERWRITE) != 0;
475 UVMHIST_FUNC("genfs_getpages"); UVMHIST_CALLED(ubchist);
476
477 UVMHIST_LOG(ubchist, "vp %p off 0x%x/%x count %d",
478 vp, ap->a_offset >> 32, ap->a_offset, *ap->a_count);
479
480 /* XXXUBC temp limit */
481 if (*ap->a_count > MAX_READ_AHEAD) {
482 panic("genfs_getpages: too many pages");
483 }
484
485 error = 0;
486 origoffset = ap->a_offset;
487 orignpages = *ap->a_count;
488 GOP_SIZE(vp, vp->v_size, &diskeof);
489 if (flags & PGO_PASTEOF) {
490 newsize = MAX(vp->v_size,
491 origoffset + (orignpages << PAGE_SHIFT));
492 GOP_SIZE(vp, newsize, &memeof);
493 } else {
494 memeof = diskeof;
495 }
496 KASSERT(ap->a_centeridx >= 0 || ap->a_centeridx <= orignpages);
497 KASSERT((origoffset & (PAGE_SIZE - 1)) == 0 && origoffset >= 0);
498 KASSERT(orignpages > 0);
499
500 /*
501 * Bounds-check the request.
502 */
503
504 if (origoffset + (ap->a_centeridx << PAGE_SHIFT) >= memeof) {
505 if ((flags & PGO_LOCKED) == 0) {
506 simple_unlock(&uobj->vmobjlock);
507 }
508 UVMHIST_LOG(ubchist, "off 0x%x count %d goes past EOF 0x%x",
509 origoffset, *ap->a_count, memeof,0);
510 return EINVAL;
511 }
512
513 /*
514 * For PGO_LOCKED requests, just return whatever's in memory.
515 */
516
517 if (flags & PGO_LOCKED) {
518 uvn_findpages(uobj, origoffset, ap->a_count, ap->a_m,
519 UFP_NOWAIT|UFP_NOALLOC|UFP_NORDONLY);
520
521 return ap->a_m[ap->a_centeridx] == NULL ? EBUSY : 0;
522 }
523
524 /* vnode is VOP_LOCKed, uobj is locked */
525
526 if (write && (vp->v_flag & VONWORKLST) == 0) {
527 vn_syncer_add_to_worklist(vp, filedelay);
528 }
529
530 /*
531 * find the requested pages and make some simple checks.
532 * leave space in the page array for a whole block.
533 */
534
535 if (vp->v_type == VREG) {
536 fs_bshift = vp->v_mount->mnt_fs_bshift;
537 dev_bshift = vp->v_mount->mnt_dev_bshift;
538 } else {
539 fs_bshift = DEV_BSHIFT;
540 dev_bshift = DEV_BSHIFT;
541 }
542 fs_bsize = 1 << fs_bshift;
543
544 orignpages = MIN(orignpages,
545 round_page(memeof - origoffset) >> PAGE_SHIFT);
546 npages = orignpages;
547 startoffset = origoffset & ~(fs_bsize - 1);
548 endoffset = round_page((origoffset + (npages << PAGE_SHIFT)
549 + fs_bsize - 1) & ~(fs_bsize - 1));
550 endoffset = MIN(endoffset, round_page(memeof));
551 ridx = (origoffset - startoffset) >> PAGE_SHIFT;
552
553 memset(pgs, 0, sizeof(pgs));
554 uvn_findpages(uobj, origoffset, &npages, &pgs[ridx], UFP_ALL);
555
556 /*
557 * if the pages are already resident, just return them.
558 */
559
560 for (i = 0; i < npages; i++) {
561 struct vm_page *pg = pgs[ridx + i];
562
563 if ((pg->flags & PG_FAKE) ||
564 (write && (pg->flags & PG_RDONLY))) {
565 break;
566 }
567 }
568 if (i == npages) {
569 UVMHIST_LOG(ubchist, "returning cached pages", 0,0,0,0);
570 raoffset = origoffset + (orignpages << PAGE_SHIFT);
571 npages += ridx;
572 goto raout;
573 }
574
575 /*
576 * if PGO_OVERWRITE is set, don't bother reading the pages.
577 */
578
579 if (flags & PGO_OVERWRITE) {
580 UVMHIST_LOG(ubchist, "PGO_OVERWRITE",0,0,0,0);
581
582 for (i = 0; i < npages; i++) {
583 struct vm_page *pg = pgs[ridx + i];
584
585 pg->flags &= ~(PG_RDONLY|PG_CLEAN);
586 }
587 npages += ridx;
588 goto out;
589 }
590
591 /*
592 * the page wasn't resident and we're not overwriting,
593 * so we're going to have to do some i/o.
594 * find any additional pages needed to cover the expanded range.
595 */
596
597 npages = (endoffset - startoffset) >> PAGE_SHIFT;
598 if (startoffset != origoffset || npages != orignpages) {
599
600 /*
601 * we need to avoid deadlocks caused by locking
602 * additional pages at lower offsets than pages we
603 * already have locked. unlock them all and start over.
604 */
605
606 for (i = 0; i < orignpages; i++) {
607 struct vm_page *pg = pgs[ridx + i];
608
609 if (pg->flags & PG_FAKE) {
610 pg->flags |= PG_RELEASED;
611 }
612 }
613 uvm_page_unbusy(&pgs[ridx], orignpages);
614 memset(pgs, 0, sizeof(pgs));
615
616 UVMHIST_LOG(ubchist, "reset npages start 0x%x end 0x%x",
617 startoffset, endoffset, 0,0);
618 npgs = npages;
619 uvn_findpages(uobj, startoffset, &npgs, pgs, UFP_ALL);
620 }
621 simple_unlock(&uobj->vmobjlock);
622
623 /*
624 * read the desired page(s).
625 */
626
627 totalbytes = npages << PAGE_SHIFT;
628 bytes = MIN(totalbytes, MAX(diskeof - startoffset, 0));
629 tailbytes = totalbytes - bytes;
630 skipbytes = 0;
631
632 kva = uvm_pagermapin(pgs, npages, UVMPAGER_MAPIN_WAITOK |
633 UVMPAGER_MAPIN_READ);
634
635 s = splbio();
636 mbp = pool_get(&bufpool, PR_WAITOK);
637 splx(s);
638 mbp->b_bufsize = totalbytes;
639 mbp->b_data = (void *)kva;
640 mbp->b_resid = mbp->b_bcount = bytes;
641 mbp->b_flags = B_BUSY|B_READ| (async ? B_CALL : 0);
642 mbp->b_iodone = (async ? uvm_aio_biodone : 0);
643 mbp->b_vp = vp;
644 LIST_INIT(&mbp->b_dep);
645
646 /*
647 * if EOF is in the middle of the range, zero the part past EOF.
648 * if the page including EOF is not PG_FAKE, skip over it since
649 * in that case it has valid data that we need to preserve.
650 */
651
652 if (tailbytes > 0) {
653 size_t tailstart = bytes;
654
655 if ((pgs[bytes >> PAGE_SHIFT]->flags & PG_FAKE) == 0) {
656 tailstart = round_page(tailstart);
657 tailbytes -= tailstart - bytes;
658 }
659 UVMHIST_LOG(ubchist, "tailbytes %p 0x%x 0x%x",
660 kva, tailstart, tailbytes,0);
661 memset((void *)(kva + tailstart), 0, tailbytes);
662 }
663
664 /*
665 * now loop over the pages, reading as needed.
666 */
667
668 if (write) {
669 lockmgr(&gp->g_glock, LK_EXCLUSIVE, NULL);
670 } else {
671 lockmgr(&gp->g_glock, LK_SHARED, NULL);
672 }
673
674 bp = NULL;
675 for (offset = startoffset;
676 bytes > 0;
677 offset += iobytes, bytes -= iobytes) {
678
679 /*
680 * skip pages which don't need to be read.
681 */
682
683 pidx = (offset - startoffset) >> PAGE_SHIFT;
684 while ((pgs[pidx]->flags & (PG_FAKE|PG_RDONLY)) == 0) {
685 size_t b;
686
687 KASSERT((offset & (PAGE_SIZE - 1)) == 0);
688 b = MIN(PAGE_SIZE, bytes);
689 offset += b;
690 bytes -= b;
691 skipbytes += b;
692 pidx++;
693 UVMHIST_LOG(ubchist, "skipping, new offset 0x%x",
694 offset, 0,0,0);
695 if (bytes == 0) {
696 goto loopdone;
697 }
698 }
699
700 /*
701 * bmap the file to find out the blkno to read from and
702 * how much we can read in one i/o. if bmap returns an error,
703 * skip the rest of the top-level i/o.
704 */
705
706 lbn = offset >> fs_bshift;
707 error = VOP_BMAP(vp, lbn, &devvp, &blkno, &run);
708 if (error) {
709 UVMHIST_LOG(ubchist, "VOP_BMAP lbn 0x%x -> %d\n",
710 lbn, error,0,0);
711 skipbytes += bytes;
712 goto loopdone;
713 }
714
715 /*
716 * see how many pages can be read with this i/o.
717 * reduce the i/o size if necessary to avoid
718 * overwriting pages with valid data.
719 */
720
721 iobytes = MIN((((off_t)lbn + 1 + run) << fs_bshift) - offset,
722 bytes);
723 if (offset + iobytes > round_page(offset)) {
724 pcount = 1;
725 while (pidx + pcount < npages &&
726 pgs[pidx + pcount]->flags & PG_FAKE) {
727 pcount++;
728 }
729 iobytes = MIN(iobytes, (pcount << PAGE_SHIFT) -
730 (offset - trunc_page(offset)));
731 }
732
733 /*
734 * if this block isn't allocated, zero it instead of reading it.
735 * if this is a read access, mark the pages we zeroed PG_RDONLY.
736 */
737
738 if (blkno < 0) {
739 int holepages = (round_page(offset + iobytes) -
740 trunc_page(offset)) >> PAGE_SHIFT;
741 UVMHIST_LOG(ubchist, "lbn 0x%x -> HOLE", lbn,0,0,0);
742
743 sawhole = TRUE;
744 memset((char *)kva + (offset - startoffset), 0,
745 iobytes);
746 skipbytes += iobytes;
747
748 for (i = 0; i < holepages; i++) {
749 if (write) {
750 pgs[pidx + i]->flags &= ~PG_CLEAN;
751 } else {
752 pgs[pidx + i]->flags |= PG_RDONLY;
753 }
754 }
755 continue;
756 }
757
758 /*
759 * allocate a sub-buf for this piece of the i/o
760 * (or just use mbp if there's only 1 piece),
761 * and start it going.
762 */
763
764 if (offset == startoffset && iobytes == bytes) {
765 bp = mbp;
766 } else {
767 s = splbio();
768 bp = pool_get(&bufpool, PR_WAITOK);
769 splx(s);
770 bp->b_data = (char *)kva + offset - startoffset;
771 bp->b_resid = bp->b_bcount = iobytes;
772 bp->b_flags = B_BUSY|B_READ|B_CALL;
773 bp->b_iodone = uvm_aio_biodone1;
774 bp->b_vp = vp;
775 bp->b_proc = NULL;
776 LIST_INIT(&bp->b_dep);
777 }
778 bp->b_lblkno = 0;
779 bp->b_private = mbp;
780 if (devvp->v_type == VBLK) {
781 bp->b_dev = devvp->v_rdev;
782 }
783
784 /* adjust physical blkno for partial blocks */
785 bp->b_blkno = blkno + ((offset - ((off_t)lbn << fs_bshift)) >>
786 dev_bshift);
787
788 UVMHIST_LOG(ubchist, "bp %p offset 0x%x bcount 0x%x blkno 0x%x",
789 bp, offset, iobytes, bp->b_blkno);
790
791 VOP_STRATEGY(bp);
792 }
793
794 loopdone:
795 if (skipbytes) {
796 s = splbio();
797 if (error) {
798 mbp->b_flags |= B_ERROR;
799 mbp->b_error = error;
800 }
801 mbp->b_resid -= skipbytes;
802 if (mbp->b_resid == 0) {
803 biodone(mbp);
804 }
805 splx(s);
806 }
807
808 if (async) {
809 UVMHIST_LOG(ubchist, "returning 0 (async)",0,0,0,0);
810 lockmgr(&gp->g_glock, LK_RELEASE, NULL);
811 return 0;
812 }
813 if (bp != NULL) {
814 error = biowait(mbp);
815 }
816 s = splbio();
817 pool_put(&bufpool, mbp);
818 splx(s);
819 uvm_pagermapout(kva, npages);
820 raoffset = startoffset + totalbytes;
821
822 /*
823 * if this we encountered a hole then we have to do a little more work.
824 * for read faults, we marked the page PG_RDONLY so that future
825 * write accesses to the page will fault again.
826 * for write faults, we must make sure that the backing store for
827 * the page is completely allocated while the pages are locked.
828 */
829
830 if (!error && sawhole && write) {
831 for (i = 0; i < npages; i++) {
832 if (pgs[i] == NULL) {
833 continue;
834 }
835 pgs[i]->flags &= ~PG_CLEAN;
836 UVMHIST_LOG(ubchist, "mark dirty pg %p", pgs[i],0,0,0);
837 }
838 error = GOP_ALLOC(vp, startoffset, npages << PAGE_SHIFT, 0,
839 cred);
840 UVMHIST_LOG(ubchist, "gop_alloc off 0x%x/0x%x -> %d",
841 startoffset, npages << PAGE_SHIFT, error,0);
842 }
843 lockmgr(&gp->g_glock, LK_RELEASE, NULL);
844 simple_lock(&uobj->vmobjlock);
845
846 /*
847 * see if we want to start any readahead.
848 * XXXUBC for now, just read the next 128k on 64k boundaries.
849 * this is pretty nonsensical, but it is 50% faster than reading
850 * just the next 64k.
851 */
852
853 raout:
854 if (!error && !async && !write && ((int)raoffset & 0xffff) == 0 &&
855 PAGE_SHIFT <= 16) {
856 off_t rasize;
857 int racount;
858
859 /* XXXUBC temp limit, from above */
860 racount = MIN(1 << (16 - PAGE_SHIFT), MAX_READ_AHEAD);
861 rasize = racount << PAGE_SHIFT;
862 (void) VOP_GETPAGES(vp, raoffset, NULL, &racount, 0,
863 VM_PROT_READ, 0, 0);
864 simple_lock(&uobj->vmobjlock);
865
866 /* XXXUBC temp limit, from above */
867 racount = MIN(1 << (16 - PAGE_SHIFT), MAX_READ_AHEAD);
868 (void) VOP_GETPAGES(vp, raoffset + rasize, NULL, &racount, 0,
869 VM_PROT_READ, 0, 0);
870 simple_lock(&uobj->vmobjlock);
871 }
872
873 /*
874 * we're almost done! release the pages...
875 * for errors, we free the pages.
876 * otherwise we activate them and mark them as valid and clean.
877 * also, unbusy pages that were not actually requested.
878 */
879
880 if (error) {
881 for (i = 0; i < npages; i++) {
882 if (pgs[i] == NULL) {
883 continue;
884 }
885 UVMHIST_LOG(ubchist, "examining pg %p flags 0x%x",
886 pgs[i], pgs[i]->flags, 0,0);
887 if (pgs[i]->flags & PG_FAKE) {
888 pgs[i]->flags |= PG_RELEASED;
889 }
890 }
891 uvm_lock_pageq();
892 uvm_page_unbusy(pgs, npages);
893 uvm_unlock_pageq();
894 simple_unlock(&uobj->vmobjlock);
895 UVMHIST_LOG(ubchist, "returning error %d", error,0,0,0);
896 return error;
897 }
898
899 out:
900 UVMHIST_LOG(ubchist, "succeeding, npages %d", npages,0,0,0);
901 uvm_lock_pageq();
902 for (i = 0; i < npages; i++) {
903 pg = pgs[i];
904 if (pg == NULL) {
905 continue;
906 }
907 UVMHIST_LOG(ubchist, "examining pg %p flags 0x%x",
908 pg, pg->flags, 0,0);
909 if (pg->flags & PG_FAKE && !overwrite) {
910 pg->flags &= ~(PG_FAKE);
911 pmap_clear_modify(pgs[i]);
912 }
913 if (write) {
914 pg->flags &= ~(PG_RDONLY);
915 }
916 if (i < ridx || i >= ridx + orignpages || async) {
917 UVMHIST_LOG(ubchist, "unbusy pg %p offset 0x%x",
918 pg, pg->offset,0,0);
919 if (pg->flags & PG_WANTED) {
920 wakeup(pg);
921 }
922 if (pg->flags & PG_FAKE) {
923 KASSERT(overwrite);
924 uvm_pagezero(pg);
925 }
926 if (pg->flags & PG_RELEASED) {
927 uvm_pagefree(pg);
928 continue;
929 }
930 uvm_pageactivate(pg);
931 pg->flags &= ~(PG_WANTED|PG_BUSY|PG_FAKE);
932 UVM_PAGE_OWN(pg, NULL);
933 }
934 }
935 uvm_unlock_pageq();
936 simple_unlock(&uobj->vmobjlock);
937 if (ap->a_m != NULL) {
938 memcpy(ap->a_m, &pgs[ridx],
939 orignpages * sizeof(struct vm_page *));
940 }
941 return 0;
942 }
943
944 /*
945 * generic VM putpages routine.
946 * Write the given range of pages to backing store.
947 *
948 * => "offhi == 0" means flush all pages at or after "offlo".
949 * => object should be locked by caller. we may _unlock_ the object
950 * if (and only if) we need to clean a page (PGO_CLEANIT), or
951 * if PGO_SYNCIO is set and there are pages busy.
952 * we return with the object locked.
953 * => if PGO_CLEANIT or PGO_SYNCIO is set, we may block (due to I/O).
954 * thus, a caller might want to unlock higher level resources
955 * (e.g. vm_map) before calling flush.
956 * => if neither PGO_CLEANIT nor PGO_SYNCIO is set, then we will neither
957 * unlock the object nor block.
958 * => if PGO_ALLPAGES is set, then all pages in the object will be processed.
959 * => NOTE: we rely on the fact that the object's memq is a TAILQ and
960 * that new pages are inserted on the tail end of the list. thus,
961 * we can make a complete pass through the object in one go by starting
962 * at the head and working towards the tail (new pages are put in
963 * front of us).
964 * => NOTE: we are allowed to lock the page queues, so the caller
965 * must not be holding the page queue lock.
966 *
967 * note on "cleaning" object and PG_BUSY pages:
968 * this routine is holding the lock on the object. the only time
969 * that it can run into a PG_BUSY page that it does not own is if
970 * some other process has started I/O on the page (e.g. either
971 * a pagein, or a pageout). if the PG_BUSY page is being paged
972 * in, then it can not be dirty (!PG_CLEAN) because no one has
973 * had a chance to modify it yet. if the PG_BUSY page is being
974 * paged out then it means that someone else has already started
975 * cleaning the page for us (how nice!). in this case, if we
976 * have syncio specified, then after we make our pass through the
977 * object we need to wait for the other PG_BUSY pages to clear
978 * off (i.e. we need to do an iosync). also note that once a
979 * page is PG_BUSY it must stay in its object until it is un-busyed.
980 *
981 * note on page traversal:
982 * we can traverse the pages in an object either by going down the
983 * linked list in "uobj->memq", or we can go over the address range
984 * by page doing hash table lookups for each address. depending
985 * on how many pages are in the object it may be cheaper to do one
986 * or the other. we set "by_list" to true if we are using memq.
987 * if the cost of a hash lookup was equal to the cost of the list
988 * traversal we could compare the number of pages in the start->stop
989 * range to the total number of pages in the object. however, it
990 * seems that a hash table lookup is more expensive than the linked
991 * list traversal, so we multiply the number of pages in the
992 * range by an estimate of the relatively higher cost of the hash lookup.
993 */
994
995 int
996 genfs_putpages(v)
997 void *v;
998 {
999 struct vop_putpages_args /* {
1000 struct vnode *a_vp;
1001 voff_t a_offlo;
1002 voff_t a_offhi;
1003 int a_flags;
1004 } */ *ap = v;
1005 struct vnode *vp = ap->a_vp;
1006 struct uvm_object *uobj = &vp->v_uobj;
1007 struct simplelock *slock = &uobj->vmobjlock;
1008 off_t startoff = ap->a_offlo;
1009 off_t endoff = ap->a_offhi;
1010 off_t off;
1011 int flags = ap->a_flags;
1012 int n = MAXBSIZE >> PAGE_SHIFT;
1013 int i, s, error, npages, nback;
1014 int freeflag;
1015 struct vm_page *pgs[n], *pg, *nextpg, *tpg, curmp, endmp;
1016 boolean_t wasclean, by_list, needs_clean, yield;
1017 boolean_t async = (flags & PGO_SYNCIO) == 0;
1018 UVMHIST_FUNC("genfs_putpages"); UVMHIST_CALLED(ubchist);
1019
1020 KASSERT(flags & (PGO_CLEANIT|PGO_FREE|PGO_DEACTIVATE));
1021 KASSERT((startoff & PAGE_MASK) == 0 && (endoff & PAGE_MASK) == 0);
1022 KASSERT(startoff < endoff || endoff == 0);
1023
1024 UVMHIST_LOG(ubchist, "vp %p pages %d off 0x%x len 0x%x",
1025 vp, uobj->uo_npages, startoff, endoff - startoff);
1026 if (uobj->uo_npages == 0) {
1027 if (LIST_FIRST(&vp->v_dirtyblkhd) == NULL &&
1028 (vp->v_flag & VONWORKLST)) {
1029 vp->v_flag &= ~VONWORKLST;
1030 LIST_REMOVE(vp, v_synclist);
1031 }
1032 simple_unlock(slock);
1033 return 0;
1034 }
1035
1036 /*
1037 * the vnode has pages, set up to process the request.
1038 */
1039
1040 error = 0;
1041 s = splbio();
1042 wasclean = (vp->v_numoutput == 0);
1043 splx(s);
1044 off = startoff;
1045 if (endoff == 0 || flags & PGO_ALLPAGES) {
1046 endoff = trunc_page(LLONG_MAX);
1047 }
1048 by_list = (uobj->uo_npages <=
1049 ((endoff - startoff) >> PAGE_SHIFT) * UVM_PAGE_HASH_PENALTY);
1050
1051 /*
1052 * start the loop. when scanning by list, hold the last page
1053 * in the list before we start. pages allocated after we start
1054 * will be added to the end of the list, so we can stop at the
1055 * current last page.
1056 */
1057
1058 freeflag = (curproc == uvm.pagedaemon_proc) ? PG_PAGEOUT : PG_RELEASED;
1059 curmp.uobject = uobj;
1060 curmp.offset = (voff_t)-1;
1061 curmp.flags = PG_BUSY;
1062 endmp.uobject = uobj;
1063 endmp.offset = (voff_t)-1;
1064 endmp.flags = PG_BUSY;
1065 if (by_list) {
1066 pg = TAILQ_FIRST(&uobj->memq);
1067 TAILQ_INSERT_TAIL(&uobj->memq, &endmp, listq);
1068 PHOLD(curproc);
1069 } else {
1070 pg = uvm_pagelookup(uobj, off);
1071 }
1072 nextpg = NULL;
1073 while (by_list || off < endoff) {
1074
1075 /*
1076 * if the current page is not interesting, move on to the next.
1077 */
1078
1079 KASSERT(pg == NULL || pg->uobject == uobj);
1080 KASSERT(pg == NULL ||
1081 (pg->flags & (PG_RELEASED|PG_PAGEOUT)) == 0 ||
1082 (pg->flags & PG_BUSY) != 0);
1083 if (by_list) {
1084 if (pg == &endmp) {
1085 break;
1086 }
1087 if (pg->offset < startoff || pg->offset >= endoff ||
1088 pg->flags & (PG_RELEASED|PG_PAGEOUT)) {
1089 pg = TAILQ_NEXT(pg, listq);
1090 continue;
1091 }
1092 off = pg->offset;
1093 } else if (pg == NULL || pg->flags & (PG_RELEASED|PG_PAGEOUT)) {
1094 off += PAGE_SIZE;
1095 if (off < endoff) {
1096 pg = uvm_pagelookup(uobj, off);
1097 }
1098 continue;
1099 }
1100
1101 /*
1102 * if the current page needs to be cleaned and it's busy,
1103 * wait for it to become unbusy.
1104 */
1105
1106 yield = (curproc->p_cpu->ci_schedstate.spc_flags &
1107 SPCF_SHOULDYIELD) && curproc != uvm.pagedaemon_proc;
1108 if (pg->flags & PG_BUSY || yield) {
1109 KASSERT(curproc != uvm.pagedaemon_proc);
1110 UVMHIST_LOG(ubchist, "busy %p", pg,0,0,0);
1111 if (by_list) {
1112 TAILQ_INSERT_BEFORE(pg, &curmp, listq);
1113 UVMHIST_LOG(ubchist, "curmp next %p",
1114 TAILQ_NEXT(&curmp, listq), 0,0,0);
1115 }
1116 if (yield) {
1117 simple_unlock(slock);
1118 preempt(NULL);
1119 simple_lock(slock);
1120 } else {
1121 pg->flags |= PG_WANTED;
1122 UVM_UNLOCK_AND_WAIT(pg, slock, 0, "genput", 0);
1123 simple_lock(slock);
1124 }
1125 if (by_list) {
1126 UVMHIST_LOG(ubchist, "after next %p",
1127 TAILQ_NEXT(&curmp, listq), 0,0,0);
1128 pg = TAILQ_NEXT(&curmp, listq);
1129 TAILQ_REMOVE(&uobj->memq, &curmp, listq);
1130 } else {
1131 pg = uvm_pagelookup(uobj, off);
1132 }
1133 continue;
1134 }
1135
1136 /*
1137 * if we're freeing, remove all mappings of the page now.
1138 * if we're cleaning, check if the page is needs to be cleaned.
1139 */
1140
1141 if (flags & PGO_FREE) {
1142 pmap_page_protect(pg, VM_PROT_NONE);
1143 }
1144 if (flags & PGO_CLEANIT) {
1145 needs_clean = pmap_clear_modify(pg) ||
1146 (pg->flags & PG_CLEAN) == 0;
1147 pg->flags |= PG_CLEAN;
1148 } else {
1149 needs_clean = FALSE;
1150 }
1151
1152 /*
1153 * if we're cleaning, build a cluster.
1154 * the cluster will consist of pages which are currently dirty,
1155 * but they will be returned to us marked clean.
1156 * if not cleaning, just operate on the one page.
1157 */
1158
1159 if (needs_clean) {
1160 wasclean = FALSE;
1161 memset(pgs, 0, sizeof(pgs));
1162 pg->flags |= PG_BUSY;
1163 UVM_PAGE_OWN(pg, "genfs_putpages");
1164
1165 /*
1166 * first look backward.
1167 */
1168
1169 npages = MIN(n >> 1, off >> PAGE_SHIFT);
1170 nback = npages;
1171 uvn_findpages(uobj, off - PAGE_SIZE, &nback, &pgs[0],
1172 UFP_NOWAIT|UFP_NOALLOC|UFP_DIRTYONLY|UFP_BACKWARD);
1173 if (nback) {
1174 memmove(&pgs[0], &pgs[npages - nback],
1175 nback * sizeof(pgs[0]));
1176 if (npages - nback < nback)
1177 memset(&pgs[nback], 0,
1178 (npages - nback) * sizeof(pgs[0]));
1179 else
1180 memset(&pgs[npages - nback], 0,
1181 nback * sizeof(pgs[0]));
1182 n -= nback;
1183 }
1184
1185 /*
1186 * then plug in our page of interest.
1187 */
1188
1189 pgs[nback] = pg;
1190
1191 /*
1192 * then look forward to fill in the remaining space in
1193 * the array of pages.
1194 */
1195
1196 npages = MIN(n, (endoff - off) >> PAGE_SHIFT) - 1;
1197 uvn_findpages(uobj, off + PAGE_SIZE, &npages,
1198 &pgs[nback + 1],
1199 UFP_NOWAIT|UFP_NOALLOC|UFP_DIRTYONLY);
1200 npages += nback + 1;
1201 } else {
1202 pgs[0] = pg;
1203 npages = 1;
1204 }
1205
1206 /*
1207 * apply FREE or DEACTIVATE options if requested.
1208 */
1209
1210 if (flags & (PGO_DEACTIVATE|PGO_FREE)) {
1211 uvm_lock_pageq();
1212 }
1213 for (i = 0; i < npages; i++) {
1214 tpg = pgs[i];
1215 KASSERT(tpg->uobject == uobj);
1216 if (flags & PGO_DEACTIVATE &&
1217 (tpg->pqflags & PQ_INACTIVE) == 0 &&
1218 tpg->wire_count == 0) {
1219 (void) pmap_clear_reference(tpg);
1220 uvm_pagedeactivate(tpg);
1221 } else if (flags & PGO_FREE) {
1222 pmap_page_protect(tpg, VM_PROT_NONE);
1223 if (tpg->flags & PG_BUSY) {
1224 tpg->flags |= freeflag;
1225 if (freeflag == PG_PAGEOUT) {
1226 uvmexp.paging++;
1227 uvm_pagedequeue(tpg);
1228 }
1229 } else {
1230 nextpg = TAILQ_NEXT(tpg, listq);
1231 uvm_pagefree(tpg);
1232 }
1233 }
1234 }
1235 if (flags & (PGO_DEACTIVATE|PGO_FREE)) {
1236 uvm_unlock_pageq();
1237 }
1238 if (needs_clean) {
1239
1240 /*
1241 * start the i/o. if we're traversing by list,
1242 * keep our place in the list with a marker page.
1243 */
1244
1245 if (by_list) {
1246 TAILQ_INSERT_AFTER(&uobj->memq, pg, &curmp,
1247 listq);
1248 }
1249 simple_unlock(slock);
1250 error = GOP_WRITE(vp, pgs, npages, flags);
1251 simple_lock(slock);
1252 if (by_list) {
1253 pg = TAILQ_NEXT(&curmp, listq);
1254 TAILQ_REMOVE(&uobj->memq, &curmp, listq);
1255 }
1256 if (error == ENOMEM) {
1257 for (i = 0; i < npages; i++) {
1258 tpg = pgs[i];
1259 if (tpg->flags & PG_PAGEOUT) {
1260 tpg->flags &= ~PG_PAGEOUT;
1261 uvmexp.paging--;
1262 }
1263 tpg->flags &= ~PG_CLEAN;
1264 uvm_pageactivate(tpg);
1265 }
1266 uvm_page_unbusy(pgs, npages);
1267 }
1268 if (error) {
1269 break;
1270 }
1271 if (by_list) {
1272 continue;
1273 }
1274 }
1275
1276 /*
1277 * find the next page and continue if there was no error.
1278 */
1279
1280 if (by_list) {
1281 if (nextpg) {
1282 pg = nextpg;
1283 nextpg = NULL;
1284 } else {
1285 pg = TAILQ_NEXT(pg, listq);
1286 }
1287 } else {
1288 off += npages << PAGE_SHIFT;
1289 if (off < endoff) {
1290 pg = uvm_pagelookup(uobj, off);
1291 }
1292 }
1293 }
1294 if (by_list) {
1295 TAILQ_REMOVE(&uobj->memq, &endmp, listq);
1296 PRELE(curproc);
1297 }
1298
1299 /*
1300 * if we're cleaning and there was nothing to clean,
1301 * take us off the syncer list. if we started any i/o
1302 * and we're doing sync i/o, wait for all writes to finish.
1303 */
1304
1305 if ((flags & PGO_CLEANIT) && wasclean &&
1306 startoff == 0 && endoff == trunc_page(LLONG_MAX) &&
1307 LIST_FIRST(&vp->v_dirtyblkhd) == NULL &&
1308 (vp->v_flag & VONWORKLST)) {
1309 vp->v_flag &= ~VONWORKLST;
1310 LIST_REMOVE(vp, v_synclist);
1311 }
1312 if (!wasclean && !async) {
1313 s = splbio();
1314 while (vp->v_numoutput != 0) {
1315 vp->v_flag |= VBWAIT;
1316 UVM_UNLOCK_AND_WAIT(&vp->v_numoutput, slock, FALSE,
1317 "genput2", 0);
1318 simple_lock(slock);
1319 }
1320 splx(s);
1321 }
1322 simple_unlock(&uobj->vmobjlock);
1323 return error;
1324 }
1325
1326 int
1327 genfs_gop_write(struct vnode *vp, struct vm_page **pgs, int npages, int flags)
1328 {
1329 int s, error, run;
1330 int fs_bshift, dev_bshift;
1331 vaddr_t kva;
1332 off_t eof, offset, startoffset;
1333 size_t bytes, iobytes, skipbytes;
1334 daddr_t lbn, blkno;
1335 struct vm_page *pg;
1336 struct buf *mbp, *bp;
1337 struct vnode *devvp;
1338 boolean_t async = (flags & PGO_SYNCIO) == 0;
1339 UVMHIST_FUNC("genfs_gop_write"); UVMHIST_CALLED(ubchist);
1340
1341 UVMHIST_LOG(ubchist, "vp %p pgs %p npages %d flags 0x%x",
1342 vp, pgs, npages, flags);
1343
1344 GOP_SIZE(vp, vp->v_size, &eof);
1345 if (vp->v_type == VREG) {
1346 fs_bshift = vp->v_mount->mnt_fs_bshift;
1347 dev_bshift = vp->v_mount->mnt_dev_bshift;
1348 } else {
1349 fs_bshift = DEV_BSHIFT;
1350 dev_bshift = DEV_BSHIFT;
1351 }
1352 error = 0;
1353 pg = pgs[0];
1354 startoffset = pg->offset;
1355 bytes = MIN(npages << PAGE_SHIFT, eof - startoffset);
1356 skipbytes = 0;
1357 KASSERT(bytes != 0);
1358
1359 kva = uvm_pagermapin(pgs, npages, UVMPAGER_MAPIN_WRITE |
1360 UVMPAGER_MAPIN_WAITOK);
1361
1362 s = splbio();
1363 vp->v_numoutput += 2;
1364 mbp = pool_get(&bufpool, PR_WAITOK);
1365 UVMHIST_LOG(ubchist, "vp %p mbp %p num now %d bytes 0x%x",
1366 vp, mbp, vp->v_numoutput, bytes);
1367 splx(s);
1368 mbp->b_bufsize = npages << PAGE_SHIFT;
1369 mbp->b_data = (void *)kva;
1370 mbp->b_resid = mbp->b_bcount = bytes;
1371 mbp->b_flags = B_BUSY|B_WRITE|B_AGE| (async ? (B_CALL|B_ASYNC) : 0);
1372 mbp->b_iodone = uvm_aio_biodone;
1373 mbp->b_vp = vp;
1374 LIST_INIT(&mbp->b_dep);
1375
1376 bp = NULL;
1377 for (offset = startoffset;
1378 bytes > 0;
1379 offset += iobytes, bytes -= iobytes) {
1380 lbn = offset >> fs_bshift;
1381 error = VOP_BMAP(vp, lbn, &devvp, &blkno, &run);
1382 if (error) {
1383 UVMHIST_LOG(ubchist, "VOP_BMAP() -> %d", error,0,0,0);
1384 skipbytes += bytes;
1385 bytes = 0;
1386 break;
1387 }
1388
1389 iobytes = MIN((((off_t)lbn + 1 + run) << fs_bshift) - offset,
1390 bytes);
1391 if (blkno == (daddr_t)-1) {
1392 skipbytes += iobytes;
1393 continue;
1394 }
1395
1396 /* if it's really one i/o, don't make a second buf */
1397 if (offset == startoffset && iobytes == bytes) {
1398 bp = mbp;
1399 } else {
1400 s = splbio();
1401 vp->v_numoutput++;
1402 bp = pool_get(&bufpool, PR_WAITOK);
1403 UVMHIST_LOG(ubchist, "vp %p bp %p num now %d",
1404 vp, bp, vp->v_numoutput, 0);
1405 splx(s);
1406 bp->b_data = (char *)kva +
1407 (vaddr_t)(offset - pg->offset);
1408 bp->b_resid = bp->b_bcount = iobytes;
1409 bp->b_flags = B_BUSY|B_WRITE|B_CALL|B_ASYNC;
1410 bp->b_iodone = uvm_aio_biodone1;
1411 bp->b_vp = vp;
1412 LIST_INIT(&bp->b_dep);
1413 }
1414 bp->b_lblkno = 0;
1415 bp->b_private = mbp;
1416 if (devvp->v_type == VBLK) {
1417 bp->b_dev = devvp->v_rdev;
1418 }
1419
1420 /* adjust physical blkno for partial blocks */
1421 bp->b_blkno = blkno + ((offset - ((off_t)lbn << fs_bshift)) >>
1422 dev_bshift);
1423 UVMHIST_LOG(ubchist, "vp %p offset 0x%x bcount 0x%x blkno 0x%x",
1424 vp, offset, bp->b_bcount, bp->b_blkno);
1425 VOP_STRATEGY(bp);
1426 }
1427 if (skipbytes) {
1428 UVMHIST_LOG(ubchist, "skipbytes %d", skipbytes, 0,0,0);
1429 s = splbio();
1430 if (error) {
1431 mbp->b_flags |= B_ERROR;
1432 mbp->b_error = error;
1433 }
1434 mbp->b_resid -= skipbytes;
1435 if (mbp->b_resid == 0) {
1436 biodone(mbp);
1437 }
1438 splx(s);
1439 }
1440 if (async) {
1441 UVMHIST_LOG(ubchist, "returning 0 (async)", 0,0,0,0);
1442 return 0;
1443 }
1444 UVMHIST_LOG(ubchist, "waiting for mbp %p", mbp,0,0,0);
1445 error = biowait(mbp);
1446 uvm_aio_aiodone(mbp);
1447 UVMHIST_LOG(ubchist, "returning, error %d", error,0,0,0);
1448 return error;
1449 }
1450
1451 /*
1452 * VOP_PUTPAGES() for vnodes which never have pages.
1453 */
1454
1455 int
1456 genfs_null_putpages(void *v)
1457 {
1458 struct vop_putpages_args /* {
1459 struct vnode *a_vp;
1460 voff_t a_offlo;
1461 voff_t a_offhi;
1462 int a_flags;
1463 } */ *ap = v;
1464 struct vnode *vp = ap->a_vp;
1465
1466 KASSERT(vp->v_uobj.uo_npages == 0);
1467 simple_unlock(&vp->v_interlock);
1468 return (0);
1469 }
1470
1471 void
1472 genfs_node_init(struct vnode *vp, struct genfs_ops *ops)
1473 {
1474 struct genfs_node *gp = VTOG(vp);
1475
1476 lockinit(&gp->g_glock, PINOD, "glock", 0, 0);
1477 gp->g_op = ops;
1478 }
1479
1480 void
1481 genfs_size(struct vnode *vp, off_t size, off_t *eobp)
1482 {
1483 int bsize;
1484
1485 bsize = 1 << vp->v_mount->mnt_fs_bshift;
1486 *eobp = (size + bsize - 1) & ~(bsize - 1);
1487 }
1488
1489 int
1490 genfs_compat_getpages(void *v)
1491 {
1492 struct vop_getpages_args /* {
1493 struct vnode *a_vp;
1494 voff_t a_offset;
1495 struct vm_page **a_m;
1496 int *a_count;
1497 int a_centeridx;
1498 vm_prot_t a_access_type;
1499 int a_advice;
1500 int a_flags;
1501 } */ *ap = v;
1502
1503 off_t origoffset;
1504 struct vnode *vp = ap->a_vp;
1505 struct uvm_object *uobj = &vp->v_uobj;
1506 struct vm_page *pg, **pgs;
1507 vaddr_t kva;
1508 int i, error, orignpages, npages;
1509 struct iovec iov;
1510 struct uio uio;
1511 struct ucred *cred = curproc->p_ucred;
1512 boolean_t write = (ap->a_access_type & VM_PROT_WRITE) != 0;
1513
1514 error = 0;
1515 origoffset = ap->a_offset;
1516 orignpages = *ap->a_count;
1517 pgs = ap->a_m;
1518
1519 if (write && (vp->v_flag & VONWORKLST) == 0) {
1520 vn_syncer_add_to_worklist(vp, filedelay);
1521 }
1522 if (ap->a_flags & PGO_LOCKED) {
1523 uvn_findpages(uobj, origoffset, ap->a_count, ap->a_m,
1524 UFP_NOWAIT|UFP_NOALLOC|UFP_NORDONLY);
1525
1526 return ap->a_m[ap->a_centeridx] == NULL ? EBUSY : 0;
1527 }
1528 if (origoffset + (ap->a_centeridx << PAGE_SHIFT) >= vp->v_size) {
1529 simple_unlock(&uobj->vmobjlock);
1530 return EINVAL;
1531 }
1532 npages = orignpages;
1533 uvn_findpages(uobj, origoffset, &npages, pgs, UFP_ALL);
1534 simple_unlock(&uobj->vmobjlock);
1535 kva = uvm_pagermapin(pgs, npages, UVMPAGER_MAPIN_WAITOK |
1536 UVMPAGER_MAPIN_READ);
1537 for (i = 0; i < npages; i++) {
1538 pg = pgs[i];
1539 if ((pg->flags & PG_FAKE) == 0) {
1540 continue;
1541 }
1542 iov.iov_base = (char *)kva + (i << PAGE_SHIFT);
1543 iov.iov_len = PAGE_SIZE;
1544 uio.uio_iov = &iov;
1545 uio.uio_iovcnt = 1;
1546 uio.uio_offset = origoffset + (i << PAGE_SHIFT);
1547 uio.uio_segflg = UIO_SYSSPACE;
1548 uio.uio_rw = UIO_READ;
1549 uio.uio_resid = PAGE_SIZE;
1550 uio.uio_procp = curproc;
1551 error = VOP_READ(vp, &uio, 0, cred);
1552 if (error) {
1553 break;
1554 }
1555 }
1556 uvm_pagermapout(kva, npages);
1557 simple_lock(&uobj->vmobjlock);
1558 uvm_lock_pageq();
1559 for (i = 0; i < npages; i++) {
1560 pg = pgs[i];
1561 if (error && (pg->flags & PG_FAKE) != 0) {
1562 pg->flags |= PG_RELEASED;
1563 } else {
1564 pmap_clear_modify(pg);
1565 uvm_pageactivate(pg);
1566 }
1567 }
1568 if (error) {
1569 uvm_page_unbusy(pgs, npages);
1570 }
1571 uvm_unlock_pageq();
1572 simple_unlock(&uobj->vmobjlock);
1573 return error;
1574 }
1575
1576 int
1577 genfs_compat_gop_write(struct vnode *vp, struct vm_page **pgs, int npages,
1578 int flags)
1579 {
1580 off_t offset;
1581 struct iovec iov;
1582 struct uio uio;
1583 struct ucred *cred = curproc->p_ucred;
1584 struct buf *bp;
1585 vaddr_t kva;
1586 int s, error;
1587
1588 offset = pgs[0]->offset;
1589 kva = uvm_pagermapin(pgs, npages, UVMPAGER_MAPIN_WRITE |
1590 UVMPAGER_MAPIN_WAITOK);
1591
1592 iov.iov_base = (void *)kva;
1593 iov.iov_len = npages << PAGE_SHIFT;
1594 uio.uio_iov = &iov;
1595 uio.uio_iovcnt = npages;
1596 uio.uio_offset = offset;
1597 uio.uio_segflg = UIO_SYSSPACE;
1598 uio.uio_rw = UIO_WRITE;
1599 uio.uio_resid = npages << PAGE_SHIFT;
1600 uio.uio_procp = curproc;
1601 error = VOP_WRITE(vp, &uio, 0, cred);
1602
1603 s = splbio();
1604 vp->v_numoutput++;
1605 bp = pool_get(&bufpool, PR_WAITOK);
1606 splx(s);
1607
1608 bp->b_flags = B_BUSY | B_WRITE | B_AGE;
1609 bp->b_vp = vp;
1610 bp->b_lblkno = offset >> vp->v_mount->mnt_fs_bshift;
1611 bp->b_data = (char *)kva;
1612 bp->b_bcount = npages << PAGE_SHIFT;
1613 bp->b_bufsize = npages << PAGE_SHIFT;
1614 bp->b_resid = 0;
1615 LIST_INIT(&bp->b_dep);
1616 if (error) {
1617 bp->b_flags |= B_ERROR;
1618 bp->b_error = error;
1619 }
1620 uvm_aio_aiodone(bp);
1621 return error;
1622 }
1623