nfs_bio.c revision 1.13 1 /*
2 * Copyright (c) 1989, 1993
3 * The Regents of the University of California. All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * Rick Macklem at The University of Guelph.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. All advertising materials mentioning features or use of this software
17 * must display the following acknowledgement:
18 * This product includes software developed by the University of
19 * California, Berkeley and its contributors.
20 * 4. Neither the name of the University nor the names of its contributors
21 * may be used to endorse or promote products derived from this software
22 * without specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
35 *
36 * from: @(#)nfs_bio.c 8.5 (Berkeley) 1/4/94
37 * $Id: nfs_bio.c,v 1.13 1994/06/15 19:59:52 mycroft Exp $
38 */
39
40 #include <sys/param.h>
41 #include <sys/systm.h>
42 #include <sys/resourcevar.h>
43 #include <sys/proc.h>
44 #include <sys/buf.h>
45 #include <sys/vnode.h>
46 #include <sys/trace.h>
47 #include <sys/mount.h>
48 #include <sys/kernel.h>
49
50 #include <vm/vm.h>
51
52 #include <nfs/nfsnode.h>
53 #include <nfs/rpcv2.h>
54 #include <nfs/nfsv2.h>
55 #include <nfs/nfs.h>
56 #include <nfs/nfsmount.h>
57 #include <nfs/nqnfs.h>
58
59 struct buf *incore(), *nfs_getcacheblk();
60 extern struct proc *nfs_iodwant[NFS_MAXASYNCDAEMON];
61 extern int nfs_numasync;
62
63 /*
64 * Vnode op for read using bio
65 * Any similarity to readip() is purely coincidental
66 */
67 nfs_bioread(vp, uio, ioflag, cred)
68 register struct vnode *vp;
69 register struct uio *uio;
70 int ioflag;
71 struct ucred *cred;
72 {
73 register struct nfsnode *np = VTONFS(vp);
74 register int biosize, diff;
75 struct buf *bp, *rabp;
76 struct vattr vattr;
77 struct proc *p;
78 struct nfsmount *nmp;
79 daddr_t lbn, bn, rabn;
80 caddr_t baddr;
81 int got_buf, nra, error = 0, n, on, not_readin;
82
83 #ifdef lint
84 ioflag = ioflag;
85 #endif /* lint */
86 #ifdef DIAGNOSTIC
87 if (uio->uio_rw != UIO_READ)
88 panic("nfs_read mode");
89 #endif
90 if (uio->uio_resid == 0)
91 return (0);
92 if (uio->uio_offset < 0 && vp->v_type != VDIR)
93 return (EINVAL);
94 nmp = VFSTONFS(vp->v_mount);
95 biosize = nmp->nm_rsize;
96 p = uio->uio_procp;
97 /*
98 * For nfs, cache consistency can only be maintained approximately.
99 * Although RFC1094 does not specify the criteria, the following is
100 * believed to be compatible with the reference port.
101 * For nqnfs, full cache consistency is maintained within the loop.
102 * For nfs:
103 * If the file's modify time on the server has changed since the
104 * last read rpc or you have written to the file,
105 * you may have lost data cache consistency with the
106 * server, so flush all of the file's data out of the cache.
107 * Then force a getattr rpc to ensure that you have up to date
108 * attributes.
109 * The mount flag NFSMNT_MYWRITE says "Assume that my writes are
110 * the ones changing the modify time.
111 * NB: This implies that cache data can be read when up to
112 * NFS_ATTRTIMEO seconds out of date. If you find that you need current
113 * attributes this could be forced by setting n_attrstamp to 0 before
114 * the VOP_GETATTR() call.
115 */
116 if ((nmp->nm_flag & NFSMNT_NQNFS) == 0 && vp->v_type != VLNK) {
117 if (np->n_flag & NMODIFIED) {
118 if ((nmp->nm_flag & NFSMNT_MYWRITE) == 0 ||
119 vp->v_type != VREG) {
120 if (error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1))
121 return (error);
122 }
123 np->n_attrstamp = 0;
124 np->n_direofoffset = 0;
125 if (error = VOP_GETATTR(vp, &vattr, cred, p))
126 return (error);
127 np->n_mtime = vattr.va_mtime.ts_sec;
128 } else {
129 if (error = VOP_GETATTR(vp, &vattr, cred, p))
130 return (error);
131 if (np->n_mtime != vattr.va_mtime.ts_sec) {
132 np->n_direofoffset = 0;
133 if (error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1))
134 return (error);
135 np->n_mtime = vattr.va_mtime.ts_sec;
136 }
137 }
138 }
139 do {
140
141 /*
142 * Get a valid lease. If cached data is stale, flush it.
143 */
144 if (nmp->nm_flag & NFSMNT_NQNFS) {
145 if (NQNFS_CKINVALID(vp, np, NQL_READ)) {
146 do {
147 error = nqnfs_getlease(vp, NQL_READ, cred, p);
148 } while (error == NQNFS_EXPIRED);
149 if (error)
150 return (error);
151 if (np->n_lrev != np->n_brev ||
152 (np->n_flag & NQNFSNONCACHE) ||
153 ((np->n_flag & NMODIFIED) && vp->v_type == VDIR)) {
154 if (vp->v_type == VDIR) {
155 np->n_direofoffset = 0;
156 cache_purge(vp);
157 }
158 if (error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1))
159 return (error);
160 np->n_brev = np->n_lrev;
161 }
162 } else if (vp->v_type == VDIR && (np->n_flag & NMODIFIED)) {
163 np->n_direofoffset = 0;
164 cache_purge(vp);
165 if (error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1))
166 return (error);
167 }
168 }
169 if (np->n_flag & NQNFSNONCACHE) {
170 switch (vp->v_type) {
171 case VREG:
172 error = nfs_readrpc(vp, uio, cred);
173 break;
174 case VLNK:
175 error = nfs_readlinkrpc(vp, uio, cred);
176 break;
177 case VDIR:
178 error = nfs_readdirrpc(vp, uio, cred);
179 break;
180 };
181 return (error);
182 }
183 baddr = (caddr_t)0;
184 switch (vp->v_type) {
185 case VREG:
186 nfsstats.biocache_reads++;
187 lbn = uio->uio_offset / biosize;
188 on = uio->uio_offset & (biosize-1);
189 bn = lbn * (biosize / DEV_BSIZE);
190 not_readin = 1;
191
192 /*
193 * Start the read ahead(s), as required.
194 */
195 if (nfs_numasync > 0 && nmp->nm_readahead > 0 &&
196 lbn == vp->v_lastr + 1) {
197 for (nra = 0; nra < nmp->nm_readahead &&
198 (lbn + 1 + nra) * biosize < np->n_size; nra++) {
199 rabn = (lbn + 1 + nra) * (biosize / DEV_BSIZE);
200 if (!incore(vp, rabn)) {
201 rabp = nfs_getcacheblk(vp, rabn, biosize, p);
202 if (!rabp)
203 return (EINTR);
204 if ((rabp->b_flags & (B_DELWRI | B_DONE)) == 0) {
205 rabp->b_flags |= (B_READ | B_ASYNC);
206 if (nfs_asyncio(rabp, cred)) {
207 rabp->b_flags |= B_INVAL;
208 brelse(rabp);
209 }
210 }
211 }
212 }
213 }
214
215 /*
216 * If the block is in the cache and has the required data
217 * in a valid region, just copy it out.
218 * Otherwise, get the block and write back/read in,
219 * as required.
220 */
221 if ((bp = incore(vp, bn)) &&
222 (bp->b_flags & (B_BUSY | B_WRITEINPROG)) ==
223 (B_BUSY | B_WRITEINPROG))
224 got_buf = 0;
225 else {
226 again:
227 bp = nfs_getcacheblk(vp, bn, biosize, p);
228 if (!bp)
229 return (EINTR);
230 got_buf = 1;
231 if ((bp->b_flags & (B_DONE | B_DELWRI)) == 0) {
232 bp->b_flags |= B_READ;
233 not_readin = 0;
234 if (error = nfs_doio(bp, cred, p)) {
235 brelse(bp);
236 return (error);
237 }
238 }
239 }
240 n = min((unsigned)(biosize - on), uio->uio_resid);
241 diff = np->n_size - uio->uio_offset;
242 if (diff < n)
243 n = diff;
244 if (not_readin && n > 0) {
245 if (on < bp->b_validoff || (on + n) > bp->b_validend) {
246 if (!got_buf) {
247 bp = nfs_getcacheblk(vp, bn, biosize, p);
248 if (!bp)
249 return (EINTR);
250 got_buf = 1;
251 }
252 bp->b_flags |= B_INVAL;
253 if (bp->b_dirtyend > 0) {
254 if ((bp->b_flags & B_DELWRI) == 0)
255 panic("nfsbioread");
256 if (VOP_BWRITE(bp) == EINTR)
257 return (EINTR);
258 } else
259 brelse(bp);
260 goto again;
261 }
262 }
263 vp->v_lastr = lbn;
264 diff = (on >= bp->b_validend) ? 0 : (bp->b_validend - on);
265 if (diff < n)
266 n = diff;
267 break;
268 case VLNK:
269 nfsstats.biocache_readlinks++;
270 bp = nfs_getcacheblk(vp, (daddr_t)0, NFS_MAXPATHLEN, p);
271 if (!bp)
272 return (EINTR);
273 if ((bp->b_flags & B_DONE) == 0) {
274 bp->b_flags |= B_READ;
275 if (error = nfs_doio(bp, cred, p)) {
276 brelse(bp);
277 return (error);
278 }
279 }
280 n = min(uio->uio_resid, NFS_MAXPATHLEN - bp->b_resid);
281 got_buf = 1;
282 on = 0;
283 break;
284 case VDIR:
285 nfsstats.biocache_readdirs++;
286 bn = (daddr_t)uio->uio_offset;
287 bp = nfs_getcacheblk(vp, bn, NFS_DIRBLKSIZ, p);
288 if (!bp)
289 return (EINTR);
290 if ((bp->b_flags & B_DONE) == 0) {
291 bp->b_flags |= B_READ;
292 if (error = nfs_doio(bp, cred, p)) {
293 brelse(bp);
294 return (error);
295 }
296 }
297
298 /*
299 * If not eof and read aheads are enabled, start one.
300 * (You need the current block first, so that you have the
301 * directory offset cookie of the next block.
302 */
303 rabn = bp->b_blkno;
304 if (nfs_numasync > 0 && nmp->nm_readahead > 0 &&
305 rabn != 0 && rabn != np->n_direofoffset &&
306 !incore(vp, rabn)) {
307 rabp = nfs_getcacheblk(vp, rabn, NFS_DIRBLKSIZ, p);
308 if (rabp) {
309 if ((rabp->b_flags & (B_DONE | B_DELWRI)) == 0) {
310 rabp->b_flags |= (B_READ | B_ASYNC);
311 if (nfs_asyncio(rabp, cred)) {
312 rabp->b_flags |= B_INVAL;
313 brelse(rabp);
314 }
315 }
316 }
317 }
318 on = 0;
319 n = min(uio->uio_resid, NFS_DIRBLKSIZ - bp->b_resid);
320 got_buf = 1;
321 break;
322 };
323
324 if (n > 0) {
325 if (!baddr)
326 baddr = bp->b_data;
327 error = uiomove(baddr + on, (int)n, uio);
328 }
329 switch (vp->v_type) {
330 case VREG:
331 if (n + on == biosize || uio->uio_offset == np->n_size)
332 bp->b_flags |= B_AGE;
333 break;
334 case VLNK:
335 n = 0;
336 break;
337 case VDIR:
338 uio->uio_offset = bp->b_blkno;
339 break;
340 };
341 if (got_buf)
342 brelse(bp);
343 } while (error == 0 && uio->uio_resid > 0 && n > 0);
344 return (error);
345 }
346
347 /*
348 * Vnode op for write using bio
349 */
350 nfs_write(ap)
351 struct vop_write_args /* {
352 struct vnode *a_vp;
353 struct uio *a_uio;
354 int a_ioflag;
355 struct ucred *a_cred;
356 } */ *ap;
357 {
358 register int biosize;
359 register struct uio *uio = ap->a_uio;
360 struct proc *p = uio->uio_procp;
361 register struct vnode *vp = ap->a_vp;
362 struct nfsnode *np = VTONFS(vp);
363 register struct ucred *cred = ap->a_cred;
364 int ioflag = ap->a_ioflag;
365 struct buf *bp;
366 struct vattr vattr;
367 struct nfsmount *nmp;
368 daddr_t lbn, bn;
369 int n, on, error = 0;
370
371 #ifdef DIAGNOSTIC
372 if (uio->uio_rw != UIO_WRITE)
373 panic("nfs_write mode");
374 if (uio->uio_segflg == UIO_USERSPACE && uio->uio_procp != curproc)
375 panic("nfs_write proc");
376 #endif
377 if (vp->v_type != VREG)
378 return (EIO);
379 if (np->n_flag & NWRITEERR) {
380 np->n_flag &= ~NWRITEERR;
381 return (np->n_error);
382 }
383 if (ioflag & (IO_APPEND | IO_SYNC)) {
384 if (np->n_flag & NMODIFIED) {
385 np->n_attrstamp = 0;
386 if (error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1))
387 return (error);
388 }
389 if (ioflag & IO_APPEND) {
390 np->n_attrstamp = 0;
391 if (error = VOP_GETATTR(vp, &vattr, cred, p))
392 return (error);
393 uio->uio_offset = np->n_size;
394 }
395 }
396 nmp = VFSTONFS(vp->v_mount);
397 if (uio->uio_offset < 0)
398 return (EINVAL);
399 if (uio->uio_resid == 0)
400 return (0);
401 /*
402 * Maybe this should be above the vnode op call, but so long as
403 * file servers have no limits, i don't think it matters
404 */
405 if (p && uio->uio_offset + uio->uio_resid >
406 p->p_rlimit[RLIMIT_FSIZE].rlim_cur) {
407 psignal(p, SIGXFSZ);
408 return (EFBIG);
409 }
410 /*
411 * I use nm_rsize, not nm_wsize so that all buffer cache blocks
412 * will be the same size within a filesystem. nfs_writerpc will
413 * still use nm_wsize when sizing the rpc's.
414 */
415 biosize = nmp->nm_rsize;
416 do {
417
418 /*
419 * Check for a valid write lease.
420 * If non-cachable, just do the rpc
421 */
422 if ((nmp->nm_flag & NFSMNT_NQNFS) &&
423 NQNFS_CKINVALID(vp, np, NQL_WRITE)) {
424 do {
425 error = nqnfs_getlease(vp, NQL_WRITE, cred, p);
426 } while (error == NQNFS_EXPIRED);
427 if (error)
428 return (error);
429 if (np->n_lrev != np->n_brev ||
430 (np->n_flag & NQNFSNONCACHE)) {
431 if (error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1))
432 return (error);
433 np->n_brev = np->n_lrev;
434 }
435 }
436 if (np->n_flag & NQNFSNONCACHE)
437 return (nfs_writerpc(vp, uio, cred, ioflag));
438 nfsstats.biocache_writes++;
439 lbn = uio->uio_offset / biosize;
440 on = uio->uio_offset & (biosize-1);
441 n = min((unsigned)(biosize - on), uio->uio_resid);
442 bn = lbn * (biosize / DEV_BSIZE);
443 again:
444 bp = nfs_getcacheblk(vp, bn, biosize, p);
445 if (!bp)
446 return (EINTR);
447 if (bp->b_wcred == NOCRED) {
448 crhold(cred);
449 bp->b_wcred = cred;
450 }
451 np->n_flag |= NMODIFIED;
452 if (uio->uio_offset + n > np->n_size) {
453 np->n_size = uio->uio_offset + n;
454 vnode_pager_setsize(vp, (u_long)np->n_size);
455 }
456
457 /*
458 * If the new write will leave a contiguous dirty
459 * area, just update the b_dirtyoff and b_dirtyend,
460 * otherwise force a write rpc of the old dirty area.
461 */
462 if (bp->b_dirtyend > 0 &&
463 (on > bp->b_dirtyend || (on + n) < bp->b_dirtyoff)) {
464 bp->b_proc = p;
465 if (VOP_BWRITE(bp) == EINTR)
466 return (EINTR);
467 goto again;
468 }
469
470 /*
471 * Check for valid write lease and get one as required.
472 * In case getblk() and/or bwrite() delayed us.
473 */
474 if ((nmp->nm_flag & NFSMNT_NQNFS) &&
475 NQNFS_CKINVALID(vp, np, NQL_WRITE)) {
476 do {
477 error = nqnfs_getlease(vp, NQL_WRITE, cred, p);
478 } while (error == NQNFS_EXPIRED);
479 if (error) {
480 brelse(bp);
481 return (error);
482 }
483 if (np->n_lrev != np->n_brev ||
484 (np->n_flag & NQNFSNONCACHE)) {
485 brelse(bp);
486 if (error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1))
487 return (error);
488 np->n_brev = np->n_lrev;
489 goto again;
490 }
491 }
492 if (error = uiomove((char *)bp->b_data + on, n, uio)) {
493 bp->b_flags |= B_ERROR;
494 brelse(bp);
495 return (error);
496 }
497 if (bp->b_dirtyend > 0) {
498 bp->b_dirtyoff = min(on, bp->b_dirtyoff);
499 bp->b_dirtyend = max((on + n), bp->b_dirtyend);
500 } else {
501 bp->b_dirtyoff = on;
502 bp->b_dirtyend = on + n;
503 }
504 #ifndef notdef
505 if (bp->b_validend == 0 || bp->b_validend < bp->b_dirtyoff ||
506 bp->b_validoff > bp->b_dirtyend) {
507 bp->b_validoff = bp->b_dirtyoff;
508 bp->b_validend = bp->b_dirtyend;
509 } else {
510 bp->b_validoff = min(bp->b_validoff, bp->b_dirtyoff);
511 bp->b_validend = max(bp->b_validend, bp->b_dirtyend);
512 }
513 #else
514 bp->b_validoff = bp->b_dirtyoff;
515 bp->b_validend = bp->b_dirtyend;
516 #endif
517 if (ioflag & IO_APPEND)
518 bp->b_flags |= B_APPENDWRITE;
519
520 /*
521 * If the lease is non-cachable or IO_SYNC do bwrite().
522 */
523 if ((np->n_flag & NQNFSNONCACHE) || (ioflag & IO_SYNC)) {
524 bp->b_proc = p;
525 if (error = VOP_BWRITE(bp))
526 return (error);
527 } else if ((n + on) == biosize &&
528 (nmp->nm_flag & NFSMNT_NQNFS) == 0) {
529 bp->b_proc = (struct proc *)0;
530 bawrite(bp);
531 } else
532 bdwrite(bp);
533 } while (uio->uio_resid > 0 && n > 0);
534 return (0);
535 }
536
537 /*
538 * Get an nfs cache block.
539 * Allocate a new one if the block isn't currently in the cache
540 * and return the block marked busy. If the calling process is
541 * interrupted by a signal for an interruptible mount point, return
542 * NULL.
543 */
544 struct buf *
545 nfs_getcacheblk(vp, bn, size, p)
546 struct vnode *vp;
547 daddr_t bn;
548 int size;
549 struct proc *p;
550 {
551 register struct buf *bp;
552 struct nfsmount *nmp = VFSTONFS(vp->v_mount);
553
554 if (nmp->nm_flag & NFSMNT_INT) {
555 bp = getblk(vp, bn, size, PCATCH, 0);
556 while (bp == (struct buf *)0) {
557 if (nfs_sigintr(nmp, (struct nfsreq *)0, p))
558 return ((struct buf *)0);
559 bp = getblk(vp, bn, size, 0, 2 * hz);
560 }
561 } else
562 bp = getblk(vp, bn, size, 0, 0);
563 return (bp);
564 }
565
566 /*
567 * Flush and invalidate all dirty buffers. If another process is already
568 * doing the flush, just wait for completion.
569 */
570 nfs_vinvalbuf(vp, flags, cred, p, intrflg)
571 struct vnode *vp;
572 int flags;
573 struct ucred *cred;
574 struct proc *p;
575 int intrflg;
576 {
577 register struct nfsnode *np = VTONFS(vp);
578 struct nfsmount *nmp = VFSTONFS(vp->v_mount);
579 int error = 0, slpflag, slptimeo;
580
581 if ((nmp->nm_flag & NFSMNT_INT) == 0)
582 intrflg = 0;
583 if (intrflg) {
584 slpflag = PCATCH;
585 slptimeo = 2 * hz;
586 } else {
587 slpflag = 0;
588 slptimeo = 0;
589 }
590 /*
591 * First wait for any other process doing a flush to complete.
592 */
593 while (np->n_flag & NFLUSHINPROG) {
594 np->n_flag |= NFLUSHWANT;
595 error = tsleep((caddr_t)&np->n_flag, PRIBIO + 2, "nfsvinval",
596 slptimeo);
597 if (error && intrflg && nfs_sigintr(nmp, (struct nfsreq *)0, p))
598 return (EINTR);
599 }
600
601 /*
602 * Now, flush as required.
603 */
604 np->n_flag |= NFLUSHINPROG;
605 error = vinvalbuf(vp, flags, cred, p, slpflag, 0);
606 while (error) {
607 if (intrflg && nfs_sigintr(nmp, (struct nfsreq *)0, p)) {
608 np->n_flag &= ~NFLUSHINPROG;
609 if (np->n_flag & NFLUSHWANT) {
610 np->n_flag &= ~NFLUSHWANT;
611 wakeup((caddr_t)&np->n_flag);
612 }
613 return (EINTR);
614 }
615 error = vinvalbuf(vp, flags, cred, p, 0, slptimeo);
616 }
617 np->n_flag &= ~(NMODIFIED | NFLUSHINPROG);
618 if (np->n_flag & NFLUSHWANT) {
619 np->n_flag &= ~NFLUSHWANT;
620 wakeup((caddr_t)&np->n_flag);
621 }
622 return (0);
623 }
624
625 /*
626 * Initiate asynchronous I/O. Return an error if no nfsiods are available.
627 * This is mainly to avoid queueing async I/O requests when the nfsiods
628 * are all hung on a dead server.
629 */
630 nfs_asyncio(bp, cred)
631 register struct buf *bp;
632 struct ucred *cred;
633 {
634 register int i;
635
636 if (nfs_numasync == 0)
637 return (EIO);
638 for (i = 0; i < NFS_MAXASYNCDAEMON; i++)
639 if (nfs_iodwant[i]) {
640 if (bp->b_flags & B_READ) {
641 if (bp->b_rcred == NOCRED && cred != NOCRED) {
642 crhold(cred);
643 bp->b_rcred = cred;
644 }
645 } else {
646 if (bp->b_wcred == NOCRED && cred != NOCRED) {
647 crhold(cred);
648 bp->b_wcred = cred;
649 }
650 }
651
652 TAILQ_INSERT_TAIL(&nfs_bufq, bp, b_freelist);
653 nfs_iodwant[i] = (struct proc *)0;
654 wakeup((caddr_t)&nfs_iodwant[i]);
655 return (0);
656 }
657 return (EIO);
658 }
659
660 /*
661 * Do an I/O operation to/from a cache block. This may be called
662 * synchronously or from an nfsiod.
663 */
664 int
665 nfs_doio(bp, cr, p)
666 register struct buf *bp;
667 struct cred *cr;
668 struct proc *p;
669 {
670 register struct uio *uiop;
671 register struct vnode *vp;
672 struct nfsnode *np;
673 struct nfsmount *nmp;
674 int error, diff, len;
675 struct uio uio;
676 struct iovec io;
677
678 vp = bp->b_vp;
679 np = VTONFS(vp);
680 nmp = VFSTONFS(vp->v_mount);
681 uiop = &uio;
682 uiop->uio_iov = &io;
683 uiop->uio_iovcnt = 1;
684 uiop->uio_segflg = UIO_SYSSPACE;
685 uiop->uio_procp = p;
686
687 /*
688 * Historically, paging was done with physio, but no more.
689 */
690 if (bp->b_flags & B_PHYS)
691 panic("doio phys");
692 if (bp->b_flags & B_READ) {
693 io.iov_len = uiop->uio_resid = bp->b_bcount;
694 io.iov_base = bp->b_data;
695 uiop->uio_rw = UIO_READ;
696 switch (vp->v_type) {
697 case VREG:
698 uiop->uio_offset = bp->b_blkno * DEV_BSIZE;
699 nfsstats.read_bios++;
700 error = nfs_readrpc(vp, uiop, cr);
701 if (!error) {
702 bp->b_validoff = 0;
703 if (uiop->uio_resid) {
704 /*
705 * If len > 0, there is a hole in the file and
706 * no writes after the hole have been pushed to
707 * the server yet.
708 * Just zero fill the rest of the valid area.
709 */
710 diff = bp->b_bcount - uiop->uio_resid;
711 len = np->n_size - (bp->b_blkno * DEV_BSIZE
712 + diff);
713 if (len > 0) {
714 len = min(len, uiop->uio_resid);
715 bzero((char *)bp->b_data + diff, len);
716 bp->b_validend = diff + len;
717 } else
718 bp->b_validend = diff;
719 } else
720 bp->b_validend = bp->b_bcount;
721 }
722 if (p && (vp->v_flag & VTEXT) &&
723 (((nmp->nm_flag & NFSMNT_NQNFS) &&
724 np->n_lrev != np->n_brev) ||
725 (!(nmp->nm_flag & NFSMNT_NQNFS) &&
726 np->n_mtime != np->n_vattr.va_mtime.ts_sec))) {
727 uprintf("Process killed due to text file modification\n");
728 psignal(p, SIGKILL);
729 p->p_holdcnt++;
730 }
731 break;
732 case VLNK:
733 uiop->uio_offset = 0;
734 nfsstats.readlink_bios++;
735 error = nfs_readlinkrpc(vp, uiop, cr);
736 break;
737 case VDIR:
738 uiop->uio_offset = bp->b_lblkno;
739 nfsstats.readdir_bios++;
740 if (VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NQNFS)
741 error = nfs_readdirlookrpc(vp, uiop, cr);
742 else
743 error = nfs_readdirrpc(vp, uiop, cr);
744 /*
745 * Save offset cookie in b_blkno.
746 */
747 bp->b_blkno = uiop->uio_offset;
748 break;
749 };
750 if (error) {
751 bp->b_flags |= B_ERROR;
752 bp->b_error = error;
753 }
754 } else {
755 io.iov_len = uiop->uio_resid = bp->b_dirtyend
756 - bp->b_dirtyoff;
757 uiop->uio_offset = (bp->b_blkno * DEV_BSIZE)
758 + bp->b_dirtyoff;
759 io.iov_base = (char *)bp->b_data + bp->b_dirtyoff;
760 uiop->uio_rw = UIO_WRITE;
761 nfsstats.write_bios++;
762 if (bp->b_flags & B_APPENDWRITE)
763 error = nfs_writerpc(vp, uiop, cr, IO_APPEND);
764 else
765 error = nfs_writerpc(vp, uiop, cr, 0);
766 bp->b_flags &= ~(B_WRITEINPROG | B_APPENDWRITE);
767
768 /*
769 * For an interrupted write, the buffer is still valid and the
770 * write hasn't been pushed to the server yet, so we can't set
771 * B_ERROR and report the interruption by setting B_EINTR. For
772 * the B_ASYNC case, B_EINTR is not relevant, so the rpc attempt
773 * is essentially a noop.
774 */
775 if (error == EINTR) {
776 bp->b_flags &= ~B_INVAL;
777 bp->b_flags |= B_DELWRI;
778
779 /*
780 * Since for the B_ASYNC case, nfs_bwrite() has reassigned the
781 * buffer to the clean list, we have to reassign it back to the
782 * dirty one. Ugh.
783 */
784 if (bp->b_flags & B_ASYNC)
785 reassignbuf(bp, vp);
786 else
787 bp->b_flags |= B_EINTR;
788 } else {
789 if (error) {
790 bp->b_flags |= B_ERROR;
791 bp->b_error = np->n_error = error;
792 np->n_flag |= NWRITEERR;
793 }
794 bp->b_dirtyoff = bp->b_dirtyend = 0;
795 }
796 }
797 bp->b_resid = uiop->uio_resid;
798 biodone(bp);
799 return (error);
800 }
801