nfs_bio.c revision 1.34 1 /* $NetBSD: nfs_bio.c,v 1.34 1997/10/10 01:53:18 fvdl Exp $ */
2
3 /*
4 * Copyright (c) 1989, 1993
5 * The Regents of the University of California. All rights reserved.
6 *
7 * This code is derived from software contributed to Berkeley by
8 * Rick Macklem at The University of Guelph.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 * must display the following acknowledgement:
20 * This product includes software developed by the University of
21 * California, Berkeley and its contributors.
22 * 4. Neither the name of the University nor the names of its contributors
23 * may be used to endorse or promote products derived from this software
24 * without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * SUCH DAMAGE.
37 *
38 * @(#)nfs_bio.c 8.9 (Berkeley) 3/30/95
39 */
40
41
42 #include <sys/param.h>
43 #include <sys/systm.h>
44 #include <sys/resourcevar.h>
45 #include <sys/signalvar.h>
46 #include <sys/proc.h>
47 #include <sys/buf.h>
48 #include <sys/vnode.h>
49 #include <sys/trace.h>
50 #include <sys/mount.h>
51 #include <sys/kernel.h>
52 #include <sys/namei.h>
53 #include <sys/dirent.h>
54
55 #include <vm/vm.h>
56
57 #include <nfs/rpcv2.h>
58 #include <nfs/nfsproto.h>
59 #include <nfs/nfs.h>
60 #include <nfs/nfsmount.h>
61 #include <nfs/nqnfs.h>
62 #include <nfs/nfsnode.h>
63 #include <nfs/nfs_var.h>
64
65 extern int nfs_numasync;
66 extern struct nfsstats nfsstats;
67
68 /*
69 * Vnode op for read using bio
70 * Any similarity to readip() is purely coincidental
71 */
72 int
73 nfs_bioread(vp, uio, ioflag, cred, cflag)
74 register struct vnode *vp;
75 register struct uio *uio;
76 int ioflag, cflag;
77 struct ucred *cred;
78 {
79 register struct nfsnode *np = VTONFS(vp);
80 register int biosize, diff;
81 struct buf *bp = NULL, *rabp;
82 struct vattr vattr;
83 struct proc *p;
84 struct nfsmount *nmp = VFSTONFS(vp->v_mount);
85 struct nfsdircache *ndp = NULL;
86 daddr_t lbn, bn, rabn;
87 caddr_t baddr, ep, edp;
88 int got_buf = 0, nra, error = 0, n = 0, on = 0, not_readin, en, enn;
89 int enough = 0;
90 struct dirent *dp, *pdp;
91 off_t curoff = 0;
92
93 #ifdef DIAGNOSTIC
94 if (uio->uio_rw != UIO_READ)
95 panic("nfs_read mode");
96 #endif
97 if (uio->uio_resid == 0)
98 return (0);
99 if (vp->v_type != VDIR && uio->uio_offset < 0)
100 return (EINVAL);
101 p = uio->uio_procp;
102 if ((nmp->nm_flag & NFSMNT_NFSV3) &&
103 !(nmp->nm_iflag & NFSMNT_GOTFSINFO))
104 (void)nfs_fsinfo(nmp, vp, cred, p);
105 if (vp->v_type != VDIR &&
106 (uio->uio_offset + uio->uio_resid) > nmp->nm_maxfilesize)
107 return (EFBIG);
108 biosize = nmp->nm_rsize;
109 /*
110 * For nfs, cache consistency can only be maintained approximately.
111 * Although RFC1094 does not specify the criteria, the following is
112 * believed to be compatible with the reference port.
113 * For nqnfs, full cache consistency is maintained within the loop.
114 * For nfs:
115 * If the file's modify time on the server has changed since the
116 * last read rpc or you have written to the file,
117 * you may have lost data cache consistency with the
118 * server, so flush all of the file's data out of the cache.
119 * Then force a getattr rpc to ensure that you have up to date
120 * attributes.
121 * NB: This implies that cache data can be read when up to
122 * NFS_ATTRTIMEO seconds out of date. If you find that you need current
123 * attributes this could be forced by setting n_attrstamp to 0 before
124 * the VOP_GETATTR() call.
125 */
126 if ((nmp->nm_flag & NFSMNT_NQNFS) == 0 && vp->v_type != VLNK) {
127 if (np->n_flag & NMODIFIED) {
128 if (vp->v_type != VREG) {
129 if (vp->v_type != VDIR)
130 panic("nfs: bioread, not dir");
131 nfs_invaldircache(vp);
132 error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
133 if (error)
134 return (error);
135 }
136 np->n_attrstamp = 0;
137 error = VOP_GETATTR(vp, &vattr, cred, p);
138 if (error)
139 return (error);
140 np->n_mtime = vattr.va_mtime.tv_sec;
141 } else {
142 error = VOP_GETATTR(vp, &vattr, cred, p);
143 if (error)
144 return (error);
145 if (np->n_mtime != vattr.va_mtime.tv_sec) {
146 if (vp->v_type == VDIR)
147 nfs_invaldircache(vp);
148 error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
149 if (error)
150 return (error);
151 np->n_mtime = vattr.va_mtime.tv_sec;
152 }
153 }
154 }
155 do {
156
157 /*
158 * Get a valid lease. If cached data is stale, flush it.
159 */
160 if (nmp->nm_flag & NFSMNT_NQNFS) {
161 if (NQNFS_CKINVALID(vp, np, ND_READ)) {
162 do {
163 error = nqnfs_getlease(vp, ND_READ, cred, p);
164 } while (error == NQNFS_EXPIRED);
165 if (error)
166 return (error);
167 if (np->n_lrev != np->n_brev ||
168 (np->n_flag & NQNFSNONCACHE) ||
169 ((np->n_flag & NMODIFIED) && vp->v_type == VDIR)) {
170 if (vp->v_type == VDIR)
171 nfs_invaldircache(vp);
172 error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
173 if (error)
174 return (error);
175 np->n_brev = np->n_lrev;
176 }
177 } else if (vp->v_type == VDIR && (np->n_flag & NMODIFIED)) {
178 nfs_invaldircache(vp);
179 error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
180 if (error)
181 return (error);
182 }
183 }
184 /*
185 * Don't cache symlinks.
186 */
187 if (np->n_flag & NQNFSNONCACHE
188 || ((vp->v_flag & VROOT) && vp->v_type == VLNK)) {
189 switch (vp->v_type) {
190 case VREG:
191 return (nfs_readrpc(vp, uio, cred));
192 case VLNK:
193 return (nfs_readlinkrpc(vp, uio, cred));
194 case VDIR:
195 break;
196 default:
197 printf(" NQNFSNONCACHE: type %x unexpected\n",
198 vp->v_type);
199 };
200 }
201 baddr = (caddr_t)0;
202 switch (vp->v_type) {
203 case VREG:
204 nfsstats.biocache_reads++;
205 lbn = uio->uio_offset / biosize;
206 on = uio->uio_offset & (biosize - 1);
207 bn = lbn * (biosize / DEV_BSIZE);
208 not_readin = 1;
209
210 /*
211 * Start the read ahead(s), as required.
212 */
213 if (nfs_numasync > 0 && nmp->nm_readahead > 0) {
214 for (nra = 0; nra < nmp->nm_readahead &&
215 (lbn + 1 + nra) * biosize < np->n_size; nra++) {
216 rabn = (lbn + 1 + nra) * (biosize / DEV_BSIZE);
217 if (!incore(vp, rabn)) {
218 rabp = nfs_getcacheblk(vp, rabn, biosize, p);
219 if (!rabp)
220 return (EINTR);
221 if ((rabp->b_flags & (B_DELWRI | B_DONE)) == 0) {
222 rabp->b_flags |= (B_READ | B_ASYNC);
223 if (nfs_asyncio(rabp, cred)) {
224 rabp->b_flags |= B_INVAL;
225 brelse(rabp);
226 }
227 } else
228 brelse(rabp);
229 }
230 }
231 }
232
233 /*
234 * If the block is in the cache and has the required data
235 * in a valid region, just copy it out.
236 * Otherwise, get the block and write back/read in,
237 * as required.
238 */
239 if ((bp = incore(vp, bn)) &&
240 (bp->b_flags & (B_BUSY | B_WRITEINPROG)) ==
241 (B_BUSY | B_WRITEINPROG))
242 got_buf = 0;
243 else {
244 again:
245 bp = nfs_getcacheblk(vp, bn, biosize, p);
246 if (!bp)
247 return (EINTR);
248 got_buf = 1;
249 if ((bp->b_flags & (B_DONE | B_DELWRI)) == 0) {
250 bp->b_flags |= B_READ;
251 not_readin = 0;
252 error = nfs_doio(bp, cred, p);
253 if (error) {
254 brelse(bp);
255 return (error);
256 }
257 }
258 }
259 n = min((unsigned)(biosize - on), uio->uio_resid);
260 diff = np->n_size - uio->uio_offset;
261 if (diff < n)
262 n = diff;
263 if (not_readin && n > 0) {
264 if (on < bp->b_validoff || (on + n) > bp->b_validend) {
265 if (!got_buf) {
266 bp = nfs_getcacheblk(vp, bn, biosize, p);
267 if (!bp)
268 return (EINTR);
269 got_buf = 1;
270 }
271 bp->b_flags |= B_INVAFTERWRITE;
272 if (bp->b_dirtyend > 0) {
273 if ((bp->b_flags & B_DELWRI) == 0)
274 panic("nfsbioread");
275 if (VOP_BWRITE(bp) == EINTR)
276 return (EINTR);
277 } else
278 brelse(bp);
279 goto again;
280 }
281 }
282 vp->v_lastr = lbn;
283 diff = (on >= bp->b_validend) ? 0 : (bp->b_validend - on);
284 if (diff < n)
285 n = diff;
286 break;
287 case VLNK:
288 nfsstats.biocache_readlinks++;
289 bp = nfs_getcacheblk(vp, (daddr_t)0, NFS_MAXPATHLEN, p);
290 if (!bp)
291 return (EINTR);
292 if ((bp->b_flags & B_DONE) == 0) {
293 bp->b_flags |= B_READ;
294 error = nfs_doio(bp, cred, p);
295 if (error) {
296 brelse(bp);
297 return (error);
298 }
299 }
300 n = min(uio->uio_resid, NFS_MAXPATHLEN - bp->b_resid);
301 got_buf = 1;
302 on = 0;
303 break;
304 case VDIR:
305 diragain:
306 nfsstats.biocache_readdirs++;
307 if (uio->uio_offset != 0 &&
308 uio->uio_offset == np->n_direofoffset)
309 return (0);
310 ndp = nfs_lookdircache(vp, uio->uio_offset, 0, 0, 1);
311 #ifdef DIAGNOSTIC
312 if (!ndp)
313 panic("nfs_bioread: bad dir cache");
314 #endif
315 bp = nfs_getcacheblk(vp, ndp->dc_blkno, NFS_DIRBLKSIZ, p);
316 if (!bp)
317 return (EINTR);
318 if ((bp->b_flags & B_DONE) == 0) {
319 bp->b_flags |= B_READ;
320 bp->b_dcookie = ndp->dc_cookie;
321 error = nfs_doio(bp, cred, p);
322 if (error) {
323 /*
324 * Yuck! The directory has been modified on the
325 * server. Punt and let the userland code
326 * deal with it.
327 */
328 brelse(bp);
329 if (error == NFSERR_BAD_COOKIE) {
330 nfs_invaldircache(vp);
331 nfs_vinvalbuf(vp, 0, cred, p, 1);
332 error = EINVAL;
333 }
334 return (error);
335 }
336 }
337
338 /*
339 * Find the entry we were looking for in the block.
340 */
341
342 en = ndp->dc_entry;
343
344 pdp = dp = (struct dirent *)bp->b_data;
345 edp = bp->b_data + bp->b_validend;
346 enn = 0;
347 while (enn < en && (caddr_t)dp < edp) {
348 pdp = dp;
349 dp = (struct dirent *)((caddr_t)dp + dp->d_reclen);
350 enn++;
351 }
352
353 /*
354 * If the entry number was bigger than the number of
355 * entries in the block, or the cookie of the previous
356 * entry doesn't match, the directory cache is
357 * stale. Flush it and try again (i.e. go to
358 * the server).
359 */
360 if ((caddr_t)dp >= edp || (caddr_t)dp + dp->d_reclen > edp ||
361 (en > 0 && NFS_GETCOOKIE(pdp) != uio->uio_offset)) {
362 #ifdef DEBUG
363 printf("invalid cache: %p %p %p len %u off %lx %lx\n",
364 pdp, dp, edp, dp->d_reclen,
365 (unsigned long)uio->uio_offset,
366 (unsigned long)NFS_GETCOOKIE(pdp));
367 #endif
368 brelse(bp);
369 nfs_invaldircache(vp);
370 nfs_vinvalbuf(vp, 0, cred, p, 0);
371 goto diragain;
372 }
373
374 on = (caddr_t)dp - bp->b_data;
375
376 /*
377 * Cache all entries that may be exported to the
378 * user, as they may be thrown back at us. The
379 * NFSBIO_CACHECOOKIES flag indicates that all
380 * entries are being 'exported', so cache them all.
381 */
382
383 if (en == 0 && pdp == dp) {
384 dp = (struct dirent *)
385 ((caddr_t)dp + dp->d_reclen);
386 enn++;
387 }
388
389 if (uio->uio_resid < (bp->b_validend - on)) {
390 n = uio->uio_resid;
391 enough = 1;
392 } else
393 n = bp->b_validend - on;
394
395 ep = bp->b_data + on + n;
396
397 /*
398 * Find last complete entry to copy, caching entries
399 * (if requested) as we go.
400 */
401
402 while ((caddr_t)dp < ep && (caddr_t)dp + dp->d_reclen <= ep) {
403 if (cflag & NFSBIO_CACHECOOKIES)
404 nfs_lookdircache(vp, NFS_GETCOOKIE(pdp), enn,
405 bp->b_lblkno, 1);
406 pdp = dp;
407 dp = (struct dirent *)((caddr_t)dp + dp->d_reclen);
408 enn++;
409 }
410
411 /*
412 * If the last requested entry was not the last in the
413 * buffer (happens if NFS_DIRFRAGSIZ < NFS_DIRBLKSIZ),
414 * cache the cookie of the last requested one, and
415 * set of the offset to it.
416 */
417
418 if ((on + n) < bp->b_validend) {
419 curoff = NFS_GETCOOKIE(pdp);
420 nfs_lookdircache(vp, curoff, enn, bp->b_lblkno, 1);
421 } else
422 curoff = bp->b_dcookie;
423
424 n = ((caddr_t)pdp + pdp->d_reclen) - (bp->b_data + on);
425
426 /*
427 * If not eof and read aheads are enabled, start one.
428 * (You need the current block first, so that you have the
429 * directory offset cookie of the next block.)
430 */
431 if (nfs_numasync > 0 && nmp->nm_readahead > 0 &&
432 np->n_direofoffset == 0 && !(np->n_flag & NQNFSNONCACHE)) {
433 ndp = nfs_lookdircache(vp, bp->b_dcookie, 0, 0, 1);
434 rabp = nfs_getcacheblk(vp, ndp->dc_blkno,
435 NFS_DIRBLKSIZ, p);
436 if (rabp) {
437 if ((rabp->b_flags & (B_DONE | B_DELWRI)) == 0) {
438 rabp->b_dcookie = ndp->dc_cookie;
439 rabp->b_flags |= (B_READ | B_ASYNC);
440 if (nfs_asyncio(rabp, cred)) {
441 rabp->b_flags |= B_INVAL;
442 brelse(rabp);
443 }
444 } else
445 brelse(rabp);
446 }
447 }
448 got_buf = 1;
449 break;
450 default:
451 printf(" nfsbioread: type %x unexpected\n",vp->v_type);
452 break;
453 };
454
455 if (n > 0) {
456 if (!baddr)
457 baddr = bp->b_data;
458 error = uiomove(baddr + on, (int)n, uio);
459 }
460 switch (vp->v_type) {
461 case VREG:
462 break;
463 case VLNK:
464 n = 0;
465 break;
466 case VDIR:
467 if (np->n_flag & NQNFSNONCACHE)
468 bp->b_flags |= B_INVAL;
469 uio->uio_offset = curoff;
470 if (enough)
471 n = 0;
472 break;
473 default:
474 printf(" nfsbioread: type %x unexpected\n",vp->v_type);
475 }
476 if (got_buf)
477 brelse(bp);
478 } while (error == 0 && uio->uio_resid > 0 && n > 0);
479 return (error);
480 }
481
482 /*
483 * Vnode op for write using bio
484 */
485 int
486 nfs_write(v)
487 void *v;
488 {
489 struct vop_write_args /* {
490 struct vnode *a_vp;
491 struct uio *a_uio;
492 int a_ioflag;
493 struct ucred *a_cred;
494 } */ *ap = v;
495 register int biosize;
496 register struct uio *uio = ap->a_uio;
497 struct proc *p = uio->uio_procp;
498 register struct vnode *vp = ap->a_vp;
499 struct nfsnode *np = VTONFS(vp);
500 register struct ucred *cred = ap->a_cred;
501 int ioflag = ap->a_ioflag;
502 struct buf *bp;
503 struct vattr vattr;
504 struct nfsmount *nmp = VFSTONFS(vp->v_mount);
505 daddr_t lbn, bn;
506 int n, on, error = 0, iomode, must_commit;
507
508 #ifdef DIAGNOSTIC
509 if (uio->uio_rw != UIO_WRITE)
510 panic("nfs_write mode");
511 if (uio->uio_segflg == UIO_USERSPACE && uio->uio_procp != curproc)
512 panic("nfs_write proc");
513 #endif
514 if (vp->v_type != VREG)
515 return (EIO);
516 if (np->n_flag & NWRITEERR) {
517 np->n_flag &= ~NWRITEERR;
518 return (np->n_error);
519 }
520 if ((nmp->nm_flag & NFSMNT_NFSV3) &&
521 !(nmp->nm_iflag & NFSMNT_GOTFSINFO))
522 (void)nfs_fsinfo(nmp, vp, cred, p);
523 if (ioflag & (IO_APPEND | IO_SYNC)) {
524 if (np->n_flag & NMODIFIED) {
525 np->n_attrstamp = 0;
526 error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
527 if (error)
528 return (error);
529 }
530 if (ioflag & IO_APPEND) {
531 np->n_attrstamp = 0;
532 error = VOP_GETATTR(vp, &vattr, cred, p);
533 if (error)
534 return (error);
535 uio->uio_offset = np->n_size;
536 }
537 }
538 if (uio->uio_offset < 0)
539 return (EINVAL);
540 if ((uio->uio_offset + uio->uio_resid) > nmp->nm_maxfilesize)
541 return (EFBIG);
542 if (uio->uio_resid == 0)
543 return (0);
544 /*
545 * Maybe this should be above the vnode op call, but so long as
546 * file servers have no limits, i don't think it matters
547 */
548 if (p && uio->uio_offset + uio->uio_resid >
549 p->p_rlimit[RLIMIT_FSIZE].rlim_cur) {
550 psignal(p, SIGXFSZ);
551 return (EFBIG);
552 }
553 /*
554 * I use nm_rsize, not nm_wsize so that all buffer cache blocks
555 * will be the same size within a filesystem. nfs_writerpc will
556 * still use nm_wsize when sizing the rpc's.
557 */
558 biosize = nmp->nm_rsize;
559 do {
560
561 /*
562 * XXX make sure we aren't cached in the VM page cache
563 */
564 (void)vnode_pager_uncache(vp);
565
566 /*
567 * Check for a valid write lease.
568 */
569 if ((nmp->nm_flag & NFSMNT_NQNFS) &&
570 NQNFS_CKINVALID(vp, np, ND_WRITE)) {
571 do {
572 error = nqnfs_getlease(vp, ND_WRITE, cred, p);
573 } while (error == NQNFS_EXPIRED);
574 if (error)
575 return (error);
576 if (np->n_lrev != np->n_brev ||
577 (np->n_flag & NQNFSNONCACHE)) {
578 error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
579 if (error)
580 return (error);
581 np->n_brev = np->n_lrev;
582 }
583 }
584 if ((np->n_flag & NQNFSNONCACHE) && uio->uio_iovcnt == 1) {
585 iomode = NFSV3WRITE_FILESYNC;
586 error = nfs_writerpc(vp, uio, cred, &iomode, &must_commit);
587 if (must_commit)
588 nfs_clearcommit(vp->v_mount);
589 return (error);
590 }
591 nfsstats.biocache_writes++;
592 lbn = uio->uio_offset / biosize;
593 on = uio->uio_offset & (biosize-1);
594 n = min((unsigned)(biosize - on), uio->uio_resid);
595 bn = lbn * (biosize / DEV_BSIZE);
596 again:
597 bp = nfs_getcacheblk(vp, bn, biosize, p);
598 if (!bp)
599 return (EINTR);
600 if (bp->b_wcred == NOCRED) {
601 crhold(cred);
602 bp->b_wcred = cred;
603 }
604 np->n_flag |= NMODIFIED;
605 if (uio->uio_offset + n > np->n_size) {
606 np->n_size = uio->uio_offset + n;
607 vnode_pager_setsize(vp, np->n_size);
608 }
609
610 /*
611 * If the new write will leave a contiguous dirty
612 * area, just update the b_dirtyoff and b_dirtyend,
613 * otherwise force a write rpc of the old dirty area.
614 */
615 if (bp->b_dirtyend > 0 &&
616 (on > bp->b_dirtyend || (on + n) < bp->b_dirtyoff)) {
617 bp->b_proc = p;
618 if (VOP_BWRITE(bp) == EINTR)
619 return (EINTR);
620 goto again;
621 }
622
623 /*
624 * Check for valid write lease and get one as required.
625 * In case getblk() and/or bwrite() delayed us.
626 */
627 if ((nmp->nm_flag & NFSMNT_NQNFS) &&
628 NQNFS_CKINVALID(vp, np, ND_WRITE)) {
629 do {
630 error = nqnfs_getlease(vp, ND_WRITE, cred, p);
631 } while (error == NQNFS_EXPIRED);
632 if (error) {
633 brelse(bp);
634 return (error);
635 }
636 if (np->n_lrev != np->n_brev ||
637 (np->n_flag & NQNFSNONCACHE)) {
638 brelse(bp);
639 error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
640 if (error)
641 return (error);
642 np->n_brev = np->n_lrev;
643 goto again;
644 }
645 }
646 error = uiomove((char *)bp->b_data + on, n, uio);
647 if (error) {
648 bp->b_flags |= B_ERROR;
649 brelse(bp);
650 return (error);
651 }
652 if (bp->b_dirtyend > 0) {
653 bp->b_dirtyoff = min(on, bp->b_dirtyoff);
654 bp->b_dirtyend = max((on + n), bp->b_dirtyend);
655 } else {
656 bp->b_dirtyoff = on;
657 bp->b_dirtyend = on + n;
658 }
659 if (bp->b_validend == 0 || bp->b_validend < bp->b_dirtyoff ||
660 bp->b_validoff > bp->b_dirtyend) {
661 bp->b_validoff = bp->b_dirtyoff;
662 bp->b_validend = bp->b_dirtyend;
663 } else {
664 bp->b_validoff = min(bp->b_validoff, bp->b_dirtyoff);
665 bp->b_validend = max(bp->b_validend, bp->b_dirtyend);
666 }
667
668 /*
669 * Since this block is being modified, it must be written
670 * again and not just committed.
671 */
672 bp->b_flags &= ~B_NEEDCOMMIT;
673
674 /*
675 * If the lease is non-cachable or IO_SYNC do bwrite().
676 */
677 if ((np->n_flag & NQNFSNONCACHE) || (ioflag & IO_SYNC)) {
678 bp->b_proc = p;
679 error = VOP_BWRITE(bp);
680 if (error)
681 return (error);
682 if (np->n_flag & NQNFSNONCACHE) {
683 error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
684 if (error)
685 return (error);
686 }
687 } else if ((n + on) == biosize &&
688 (nmp->nm_flag & NFSMNT_NQNFS) == 0) {
689 bp->b_proc = (struct proc *)0;
690 bp->b_flags |= B_ASYNC;
691 (void)nfs_writebp(bp, 0);
692 } else {
693 bdwrite(bp);
694 }
695 } while (uio->uio_resid > 0 && n > 0);
696 return (0);
697 }
698
699 /*
700 * Get an nfs cache block.
701 * Allocate a new one if the block isn't currently in the cache
702 * and return the block marked busy. If the calling process is
703 * interrupted by a signal for an interruptible mount point, return
704 * NULL.
705 */
706 struct buf *
707 nfs_getcacheblk(vp, bn, size, p)
708 struct vnode *vp;
709 daddr_t bn;
710 int size;
711 struct proc *p;
712 {
713 register struct buf *bp;
714 struct nfsmount *nmp = VFSTONFS(vp->v_mount);
715
716 if (nmp->nm_flag & NFSMNT_INT) {
717 bp = getblk(vp, bn, size, PCATCH, 0);
718 while (bp == (struct buf *)0) {
719 if (nfs_sigintr(nmp, (struct nfsreq *)0, p))
720 return ((struct buf *)0);
721 bp = getblk(vp, bn, size, 0, 2 * hz);
722 }
723 } else
724 bp = getblk(vp, bn, size, 0, 0);
725 return (bp);
726 }
727
728 /*
729 * Flush and invalidate all dirty buffers. If another process is already
730 * doing the flush, just wait for completion.
731 */
732 int
733 nfs_vinvalbuf(vp, flags, cred, p, intrflg)
734 struct vnode *vp;
735 int flags;
736 struct ucred *cred;
737 struct proc *p;
738 int intrflg;
739 {
740 register struct nfsnode *np = VTONFS(vp);
741 struct nfsmount *nmp = VFSTONFS(vp->v_mount);
742 int error = 0, slpflag, slptimeo;
743
744 if ((nmp->nm_flag & NFSMNT_INT) == 0)
745 intrflg = 0;
746 if (intrflg) {
747 slpflag = PCATCH;
748 slptimeo = 2 * hz;
749 } else {
750 slpflag = 0;
751 slptimeo = 0;
752 }
753 /*
754 * First wait for any other process doing a flush to complete.
755 */
756 while (np->n_flag & NFLUSHINPROG) {
757 np->n_flag |= NFLUSHWANT;
758 error = tsleep((caddr_t)&np->n_flag, PRIBIO + 2, "nfsvinval",
759 slptimeo);
760 if (error && intrflg && nfs_sigintr(nmp, (struct nfsreq *)0, p))
761 return (EINTR);
762 }
763
764 /*
765 * Now, flush as required.
766 */
767 np->n_flag |= NFLUSHINPROG;
768 error = vinvalbuf(vp, flags, cred, p, slpflag, 0);
769 while (error) {
770 if (intrflg && nfs_sigintr(nmp, (struct nfsreq *)0, p)) {
771 np->n_flag &= ~NFLUSHINPROG;
772 if (np->n_flag & NFLUSHWANT) {
773 np->n_flag &= ~NFLUSHWANT;
774 wakeup((caddr_t)&np->n_flag);
775 }
776 return (EINTR);
777 }
778 error = vinvalbuf(vp, flags, cred, p, 0, slptimeo);
779 }
780 np->n_flag &= ~(NMODIFIED | NFLUSHINPROG);
781 if (np->n_flag & NFLUSHWANT) {
782 np->n_flag &= ~NFLUSHWANT;
783 wakeup((caddr_t)&np->n_flag);
784 }
785 return (0);
786 }
787
788 /*
789 * Initiate asynchronous I/O. Return an error if no nfsiods are available.
790 * This is mainly to avoid queueing async I/O requests when the nfsiods
791 * are all hung on a dead server.
792 */
793 int
794 nfs_asyncio(bp, cred)
795 register struct buf *bp;
796 struct ucred *cred;
797 {
798 register int i;
799 register struct nfsmount *nmp;
800 int gotiod, slpflag = 0, slptimeo = 0, error;
801
802 if (nfs_numasync == 0)
803 return (EIO);
804
805
806 nmp = VFSTONFS(bp->b_vp->v_mount);
807 again:
808 if (nmp->nm_flag & NFSMNT_INT)
809 slpflag = PCATCH;
810 gotiod = FALSE;
811
812 /*
813 * Find a free iod to process this request.
814 */
815
816 for (i = 0; i < NFS_MAXASYNCDAEMON; i++)
817 if (nfs_iodwant[i]) {
818 /*
819 * Found one, so wake it up and tell it which
820 * mount to process.
821 */
822 nfs_iodwant[i] = (struct proc *)0;
823 nfs_iodmount[i] = nmp;
824 nmp->nm_bufqiods++;
825 wakeup((caddr_t)&nfs_iodwant[i]);
826 gotiod = TRUE;
827 break;
828 }
829 /*
830 * If none are free, we may already have an iod working on this mount
831 * point. If so, it will process our request.
832 */
833 if (!gotiod && nmp->nm_bufqiods > 0)
834 gotiod = TRUE;
835
836 /*
837 * If we have an iod which can process the request, then queue
838 * the buffer.
839 */
840 if (gotiod) {
841 /*
842 * Ensure that the queue never grows too large.
843 */
844 while (nmp->nm_bufqlen >= 2*nfs_numasync) {
845 nmp->nm_bufqwant = TRUE;
846 error = tsleep(&nmp->nm_bufq, slpflag | PRIBIO,
847 "nfsaio", slptimeo);
848 if (error) {
849 if (nfs_sigintr(nmp, NULL, bp->b_proc))
850 return (EINTR);
851 if (slpflag == PCATCH) {
852 slpflag = 0;
853 slptimeo = 2 * hz;
854 }
855 }
856 /*
857 * We might have lost our iod while sleeping,
858 * so check and loop if nescessary.
859 */
860 if (nmp->nm_bufqiods == 0)
861 goto again;
862 }
863
864 if (bp->b_flags & B_READ) {
865 if (bp->b_rcred == NOCRED && cred != NOCRED) {
866 crhold(cred);
867 bp->b_rcred = cred;
868 }
869 } else {
870 bp->b_flags |= B_WRITEINPROG;
871 if (bp->b_wcred == NOCRED && cred != NOCRED) {
872 crhold(cred);
873 bp->b_wcred = cred;
874 }
875 }
876
877 TAILQ_INSERT_TAIL(&nmp->nm_bufq, bp, b_freelist);
878 nmp->nm_bufqlen++;
879 return (0);
880 }
881
882 /*
883 * All the iods are busy on other mounts, so return EIO to
884 * force the caller to process the i/o synchronously.
885 */
886 return (EIO);
887 }
888
889 /*
890 * Do an I/O operation to/from a cache block. This may be called
891 * synchronously or from an nfsiod.
892 */
893 int
894 nfs_doio(bp, cr, p)
895 register struct buf *bp;
896 struct ucred *cr;
897 struct proc *p;
898 {
899 register struct uio *uiop;
900 register struct vnode *vp;
901 struct nfsnode *np;
902 struct nfsmount *nmp;
903 int error = 0, diff, len, iomode, must_commit = 0;
904 struct uio uio;
905 struct iovec io;
906
907 vp = bp->b_vp;
908 np = VTONFS(vp);
909 nmp = VFSTONFS(vp->v_mount);
910 uiop = &uio;
911 uiop->uio_iov = &io;
912 uiop->uio_iovcnt = 1;
913 uiop->uio_segflg = UIO_SYSSPACE;
914 uiop->uio_procp = p;
915
916 /*
917 * Historically, paging was done with physio, but no more...
918 */
919 if (bp->b_flags & B_PHYS) {
920 /*
921 * ...though reading /dev/drum still gets us here.
922 */
923 io.iov_len = uiop->uio_resid = bp->b_bcount;
924 /* mapping was done by vmapbuf() */
925 io.iov_base = bp->b_data;
926 uiop->uio_offset = ((off_t)bp->b_blkno) * DEV_BSIZE;
927 if (bp->b_flags & B_READ) {
928 uiop->uio_rw = UIO_READ;
929 nfsstats.read_physios++;
930 error = nfs_readrpc(vp, uiop, cr);
931 } else {
932 iomode = NFSV3WRITE_DATASYNC;
933 uiop->uio_rw = UIO_WRITE;
934 nfsstats.write_physios++;
935 error = nfs_writerpc(vp, uiop, cr, &iomode, &must_commit);
936 }
937 if (error) {
938 bp->b_flags |= B_ERROR;
939 bp->b_error = error;
940 }
941 } else if (bp->b_flags & B_READ) {
942 io.iov_len = uiop->uio_resid = bp->b_bcount;
943 io.iov_base = bp->b_data;
944 uiop->uio_rw = UIO_READ;
945 switch (vp->v_type) {
946 case VREG:
947 uiop->uio_offset = ((off_t)bp->b_blkno) * DEV_BSIZE;
948 nfsstats.read_bios++;
949 error = nfs_readrpc(vp, uiop, cr);
950 if (!error) {
951 bp->b_validoff = 0;
952 if (uiop->uio_resid) {
953 /*
954 * If len > 0, there is a hole in the file and
955 * no writes after the hole have been pushed to
956 * the server yet.
957 * Just zero fill the rest of the valid area.
958 */
959 diff = bp->b_bcount - uiop->uio_resid;
960 len = np->n_size - (((u_quad_t)bp->b_blkno) * DEV_BSIZE
961 + diff);
962 if (len > 0) {
963 len = min(len, uiop->uio_resid);
964 bzero((char *)bp->b_data + diff, len);
965 bp->b_validend = diff + len;
966 } else
967 bp->b_validend = diff;
968 } else
969 bp->b_validend = bp->b_bcount;
970 }
971 if (p && (vp->v_flag & VTEXT) &&
972 (((nmp->nm_flag & NFSMNT_NQNFS) &&
973 NQNFS_CKINVALID(vp, np, ND_READ) &&
974 np->n_lrev != np->n_brev) ||
975 (!(nmp->nm_flag & NFSMNT_NQNFS) &&
976 np->n_mtime != np->n_vattr.va_mtime.tv_sec))) {
977 uprintf("Process killed due to text file modification\n");
978 psignal(p, SIGKILL);
979 p->p_holdcnt++;
980 }
981 break;
982 case VLNK:
983 uiop->uio_offset = (off_t)0;
984 nfsstats.readlink_bios++;
985 error = nfs_readlinkrpc(vp, uiop, cr);
986 break;
987 case VDIR:
988 nfsstats.readdir_bios++;
989 uiop->uio_offset = bp->b_dcookie;
990 if (nmp->nm_flag & NFSMNT_RDIRPLUS) {
991 error = nfs_readdirplusrpc(vp, uiop, cr);
992 if (error == NFSERR_NOTSUPP)
993 nmp->nm_flag &= ~NFSMNT_RDIRPLUS;
994 }
995 if ((nmp->nm_flag & NFSMNT_RDIRPLUS) == 0)
996 error = nfs_readdirrpc(vp, uiop, cr);
997 if (!error) {
998 bp->b_dcookie = uiop->uio_offset;
999 bp->b_validoff = 0;
1000 bp->b_validend = bp->b_bcount - uiop->uio_resid;
1001 }
1002 break;
1003 default:
1004 printf("nfs_doio: type %x unexpected\n",vp->v_type);
1005 break;
1006 };
1007 if (error) {
1008 bp->b_flags |= B_ERROR;
1009 bp->b_error = error;
1010 }
1011 } else {
1012 io.iov_len = uiop->uio_resid = bp->b_dirtyend
1013 - bp->b_dirtyoff;
1014 uiop->uio_offset = ((off_t)bp->b_blkno) * DEV_BSIZE
1015 + bp->b_dirtyoff;
1016 io.iov_base = (char *)bp->b_data + bp->b_dirtyoff;
1017 uiop->uio_rw = UIO_WRITE;
1018 nfsstats.write_bios++;
1019 if ((bp->b_flags & (B_ASYNC | B_NEEDCOMMIT | B_NOCACHE)) == B_ASYNC)
1020 iomode = NFSV3WRITE_UNSTABLE;
1021 else
1022 iomode = NFSV3WRITE_FILESYNC;
1023 bp->b_flags |= B_WRITEINPROG;
1024 #ifdef fvdl_debug
1025 printf("nfs_doio(%x): bp %x doff %d dend %d\n",
1026 vp, bp, bp->b_dirtyoff, bp->b_dirtyend);
1027 #endif
1028 error = nfs_writerpc(vp, uiop, cr, &iomode, &must_commit);
1029 if (!error && iomode == NFSV3WRITE_UNSTABLE)
1030 bp->b_flags |= B_NEEDCOMMIT;
1031 else
1032 bp->b_flags &= ~B_NEEDCOMMIT;
1033 bp->b_flags &= ~B_WRITEINPROG;
1034
1035 /*
1036 * For an interrupted write, the buffer is still valid and the
1037 * write hasn't been pushed to the server yet, so we can't set
1038 * B_ERROR and report the interruption by setting B_EINTR. For
1039 * the B_ASYNC case, B_EINTR is not relevant, so the rpc attempt
1040 * is essentially a noop.
1041 * For the case of a V3 write rpc not being committed to stable
1042 * storage, the block is still dirty and requires either a commit
1043 * rpc or another write rpc with iomode == NFSV3WRITE_FILESYNC
1044 * before the block is reused. This is indicated by setting the
1045 * B_DELWRI and B_NEEDCOMMIT flags.
1046 */
1047 if (error == EINTR || (!error && (bp->b_flags & B_NEEDCOMMIT))) {
1048 bp->b_flags |= B_DELWRI;
1049
1050 /*
1051 * Since for the B_ASYNC case, nfs_bwrite() has reassigned the
1052 * buffer to the clean list, we have to reassign it back to the
1053 * dirty one. Ugh.
1054 */
1055 if (bp->b_flags & B_ASYNC)
1056 reassignbuf(bp, vp);
1057 else if (error)
1058 bp->b_flags |= B_EINTR;
1059 } else {
1060 if (error) {
1061 bp->b_flags |= B_ERROR;
1062 bp->b_error = np->n_error = error;
1063 np->n_flag |= NWRITEERR;
1064 }
1065 bp->b_dirtyoff = bp->b_dirtyend = 0;
1066 }
1067 }
1068 bp->b_resid = uiop->uio_resid;
1069 if (must_commit)
1070 nfs_clearcommit(vp->v_mount);
1071 biodone(bp);
1072 return (error);
1073 }
1074