nfs_bio.c revision 1.23 1 /* $NetBSD: nfs_bio.c,v 1.23 1996/02/09 21:48:19 christos Exp $ */
2
3 /*
4 * Copyright (c) 1989, 1993
5 * The Regents of the University of California. All rights reserved.
6 *
7 * This code is derived from software contributed to Berkeley by
8 * Rick Macklem at The University of Guelph.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 * must display the following acknowledgement:
20 * This product includes software developed by the University of
21 * California, Berkeley and its contributors.
22 * 4. Neither the name of the University nor the names of its contributors
23 * may be used to endorse or promote products derived from this software
24 * without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * SUCH DAMAGE.
37 *
38 * @(#)nfs_bio.c 8.5 (Berkeley) 1/4/94
39 */
40
41 #include <sys/param.h>
42 #include <sys/systm.h>
43 #include <sys/resourcevar.h>
44 #include <sys/proc.h>
45 #include <sys/buf.h>
46 #include <sys/vnode.h>
47 #include <sys/trace.h>
48 #include <sys/mount.h>
49 #include <sys/kernel.h>
50 #include <sys/namei.h>
51 #include <sys/signalvar.h>
52
53 #include <vm/vm.h>
54
55 #include <nfs/nfsnode.h>
56 #include <nfs/rpcv2.h>
57 #include <nfs/nfsv2.h>
58 #include <nfs/nfs.h>
59 #include <nfs/nfsmount.h>
60 #include <nfs/nqnfs.h>
61 #include <nfs/nfs_var.h>
62
63 extern struct proc *nfs_iodwant[NFS_MAXASYNCDAEMON];
64 extern int nfs_numasync;
65
66 /*
67 * Vnode op for read using bio
68 * Any similarity to readip() is purely coincidental
69 */
70 int
71 nfs_bioread(vp, uio, ioflag, cred)
72 register struct vnode *vp;
73 register struct uio *uio;
74 int ioflag;
75 struct ucred *cred;
76 {
77 register struct nfsnode *np = VTONFS(vp);
78 register int biosize, diff;
79 struct buf *bp = NULL, *rabp;
80 struct vattr vattr;
81 struct proc *p;
82 struct nfsmount *nmp;
83 daddr_t lbn, bn, rabn;
84 caddr_t baddr;
85 int got_buf = 0, nra, error = 0, n = 0, on = 0, not_readin;
86
87 #ifdef lint
88 ioflag = ioflag;
89 #endif /* lint */
90 #ifdef DIAGNOSTIC
91 if (uio->uio_rw != UIO_READ)
92 panic("nfs_read mode");
93 #endif
94 if (uio->uio_resid == 0)
95 return (0);
96 if (uio->uio_offset < 0 && vp->v_type != VDIR)
97 return (EINVAL);
98 nmp = VFSTONFS(vp->v_mount);
99 biosize = nmp->nm_rsize;
100 p = uio->uio_procp;
101 /*
102 * For nfs, cache consistency can only be maintained approximately.
103 * Although RFC1094 does not specify the criteria, the following is
104 * believed to be compatible with the reference port.
105 * For nqnfs, full cache consistency is maintained within the loop.
106 * For nfs:
107 * If the file's modify time on the server has changed since the
108 * last read rpc or you have written to the file,
109 * you may have lost data cache consistency with the
110 * server, so flush all of the file's data out of the cache.
111 * Then force a getattr rpc to ensure that you have up to date
112 * attributes.
113 * The mount flag NFSMNT_MYWRITE says "Assume that my writes are
114 * the ones changing the modify time.
115 * NB: This implies that cache data can be read when up to
116 * NFS_ATTRTIMEO seconds out of date. If you find that you need current
117 * attributes this could be forced by setting n_attrstamp to 0 before
118 * the VOP_GETATTR() call.
119 */
120 if ((nmp->nm_flag & NFSMNT_NQNFS) == 0 && vp->v_type != VLNK) {
121 if (np->n_flag & NMODIFIED) {
122 if ((nmp->nm_flag & NFSMNT_MYWRITE) == 0 ||
123 vp->v_type != VREG) {
124 error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
125 if (error)
126 return (error);
127 }
128 np->n_attrstamp = 0;
129 np->n_direofoffset = 0;
130 error = VOP_GETATTR(vp, &vattr, cred, p);
131 if (error)
132 return (error);
133 np->n_mtime = vattr.va_mtime.tv_sec;
134 } else {
135 if ((error = VOP_GETATTR(vp, &vattr, cred, p)) != 0)
136 return (error);
137 if (np->n_mtime != vattr.va_mtime.tv_sec) {
138 np->n_direofoffset = 0;
139 error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
140 if (error)
141 return (error);
142 np->n_mtime = vattr.va_mtime.tv_sec;
143 }
144 }
145 }
146 do {
147
148 /*
149 * Get a valid lease. If cached data is stale, flush it.
150 */
151 if (nmp->nm_flag & NFSMNT_NQNFS) {
152 if (NQNFS_CKINVALID(vp, np, NQL_READ)) {
153 do {
154 error = nqnfs_getlease(vp, NQL_READ, cred, p);
155 } while (error == NQNFS_EXPIRED);
156 if (error)
157 return (error);
158 if (np->n_lrev != np->n_brev ||
159 (np->n_flag & NQNFSNONCACHE) ||
160 ((np->n_flag & NMODIFIED) && vp->v_type == VDIR)) {
161 if (vp->v_type == VDIR) {
162 np->n_direofoffset = 0;
163 cache_purge(vp);
164 }
165 error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
166 if (error)
167 return (error);
168 np->n_brev = np->n_lrev;
169 }
170 } else if (vp->v_type == VDIR && (np->n_flag & NMODIFIED)) {
171 np->n_direofoffset = 0;
172 cache_purge(vp);
173 error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
174 if (error)
175 return (error);
176 }
177 }
178 if (np->n_flag & NQNFSNONCACHE) {
179 switch (vp->v_type) {
180 case VREG:
181 error = nfs_readrpc(vp, uio, cred);
182 break;
183 case VLNK:
184 error = nfs_readlinkrpc(vp, uio, cred);
185 break;
186 case VDIR:
187 error = nfs_readdirrpc(vp, uio, cred);
188 break;
189 case VCHR:
190 case VSOCK:
191 case VFIFO:
192 case VBAD:
193 case VNON:
194 case VBLK:
195 break;
196 };
197 return (error);
198 }
199 baddr = (caddr_t)0;
200 switch (vp->v_type) {
201 case VREG:
202 nfsstats.biocache_reads++;
203 lbn = uio->uio_offset / biosize;
204 on = uio->uio_offset & (biosize-1);
205 bn = lbn * (biosize / DEV_BSIZE);
206 not_readin = 1;
207
208 /*
209 * Start the read ahead(s), as required.
210 */
211 if (nfs_numasync > 0 && nmp->nm_readahead > 0 &&
212 lbn == vp->v_lastr + 1) {
213 for (nra = 0; nra < nmp->nm_readahead &&
214 (lbn + 1 + nra) * biosize < np->n_size; nra++) {
215 rabn = (lbn + 1 + nra) * (biosize / DEV_BSIZE);
216 if (!incore(vp, rabn)) {
217 rabp = nfs_getcacheblk(vp, rabn, biosize, p);
218 if (!rabp)
219 return (EINTR);
220 if ((rabp->b_flags & (B_DELWRI | B_DONE)) == 0) {
221 rabp->b_flags |= (B_READ | B_ASYNC);
222 if (nfs_asyncio(rabp, cred)) {
223 rabp->b_flags |= B_INVAL;
224 brelse(rabp);
225 }
226 } else
227 brelse(rabp);
228 }
229 }
230 }
231
232 /*
233 * If the block is in the cache and has the required data
234 * in a valid region, just copy it out.
235 * Otherwise, get the block and write back/read in,
236 * as required.
237 */
238 if ((bp = incore(vp, bn)) &&
239 (bp->b_flags & (B_BUSY | B_WRITEINPROG)) ==
240 (B_BUSY | B_WRITEINPROG))
241 got_buf = 0;
242 else {
243 again:
244 bp = nfs_getcacheblk(vp, bn, biosize, p);
245 if (!bp)
246 return (EINTR);
247 got_buf = 1;
248 if ((bp->b_flags & (B_DONE | B_DELWRI)) == 0) {
249 bp->b_flags |= B_READ;
250 not_readin = 0;
251 error = nfs_doio(bp, cred, p);
252 if (error) {
253 brelse(bp);
254 return (error);
255 }
256 }
257 }
258 n = min((unsigned)(biosize - on), uio->uio_resid);
259 diff = np->n_size - uio->uio_offset;
260 if (diff < n)
261 n = diff;
262 if (not_readin && n > 0) {
263 if (on < bp->b_validoff || (on + n) > bp->b_validend) {
264 if (!got_buf) {
265 bp = nfs_getcacheblk(vp, bn, biosize, p);
266 if (!bp)
267 return (EINTR);
268 got_buf = 1;
269 }
270 bp->b_flags |= B_INVAL;
271 if (bp->b_dirtyend > 0) {
272 if ((bp->b_flags & B_DELWRI) == 0)
273 panic("nfsbioread");
274 if (VOP_BWRITE(bp) == EINTR)
275 return (EINTR);
276 } else
277 brelse(bp);
278 goto again;
279 }
280 }
281 vp->v_lastr = lbn;
282 diff = (on >= bp->b_validend) ? 0 : (bp->b_validend - on);
283 if (diff < n)
284 n = diff;
285 break;
286 case VLNK:
287 nfsstats.biocache_readlinks++;
288 bp = nfs_getcacheblk(vp, (daddr_t)0, NFS_MAXPATHLEN, p);
289 if (!bp)
290 return (EINTR);
291 if ((bp->b_flags & B_DONE) == 0) {
292 bp->b_flags |= B_READ;
293 if ((error = nfs_doio(bp, cred, p)) != 0) {
294 brelse(bp);
295 return (error);
296 }
297 }
298 n = min(uio->uio_resid, NFS_MAXPATHLEN - bp->b_resid);
299 got_buf = 1;
300 on = 0;
301 break;
302 case VDIR:
303 if (uio->uio_resid < NFS_DIRBLKSIZ)
304 return (0);
305 nfsstats.biocache_readdirs++;
306 bn = (daddr_t)uio->uio_offset;
307 bp = nfs_getcacheblk(vp, bn, NFS_DIRBLKSIZ, p);
308 if (!bp)
309 return (EINTR);
310 if ((bp->b_flags & B_DONE) == 0) {
311 bp->b_flags |= B_READ;
312 if ((error = nfs_doio(bp, cred, p)) != 0) {
313 brelse(bp);
314 return (error);
315 }
316 }
317
318 /*
319 * If not eof and read aheads are enabled, start one.
320 * (You need the current block first, so that you have the
321 * directory offset cookie of the next block.
322 */
323 rabn = bp->b_blkno;
324 if (nfs_numasync > 0 && nmp->nm_readahead > 0 &&
325 rabn != 0 && rabn != np->n_direofoffset &&
326 !incore(vp, rabn)) {
327 rabp = nfs_getcacheblk(vp, rabn, NFS_DIRBLKSIZ, p);
328 if (rabp) {
329 if ((rabp->b_flags & (B_DONE | B_DELWRI)) == 0) {
330 rabp->b_flags |= (B_READ | B_ASYNC);
331 if (nfs_asyncio(rabp, cred)) {
332 rabp->b_flags |= B_INVAL;
333 brelse(rabp);
334 }
335 } else
336 brelse(rabp);
337 }
338 }
339 on = 0;
340 n = min(uio->uio_resid, NFS_DIRBLKSIZ - bp->b_resid);
341 got_buf = 1;
342 break;
343 case VBAD:
344 case VSOCK:
345 case VCHR:
346 case VBLK:
347 case VNON:
348 case VFIFO:
349 break;
350 };
351
352 if (n > 0) {
353 if (!baddr)
354 baddr = bp->b_data;
355 error = uiomove(baddr + on, (int)n, uio);
356 }
357 switch (vp->v_type) {
358 case VLNK:
359 n = 0;
360 break;
361 case VDIR:
362 uio->uio_offset = bp->b_blkno;
363 break;
364 case VREG:
365 case VBAD:
366 case VFIFO:
367 case VSOCK:
368 case VCHR:
369 case VBLK:
370 case VNON:
371 break;
372 };
373 if (got_buf)
374 brelse(bp);
375 } while (error == 0 && uio->uio_resid > 0 && n > 0);
376 return (error);
377 }
378
379 /*
380 * Vnode op for write using bio
381 */
382 int
383 nfs_write(v)
384 void *v;
385 {
386 struct vop_write_args /* {
387 struct vnode a_vp;
388 struct uio *a_uio;
389 int a_ioflag;
390 struct ucred *a_cred;
391 } */ *ap = v;
392 register int biosize;
393 register struct uio *uio = ap->a_uio;
394 struct proc *p = uio->uio_procp;
395 register struct vnode *vp = ap->a_vp;
396 struct nfsnode *np = VTONFS(vp);
397 register struct ucred *cred = ap->a_cred;
398 int ioflag = ap->a_ioflag;
399 struct buf *bp;
400 struct vattr vattr;
401 struct nfsmount *nmp;
402 daddr_t lbn, bn;
403 int n, on, error = 0;
404
405 #ifdef DIAGNOSTIC
406 if (uio->uio_rw != UIO_WRITE)
407 panic("nfs_write mode");
408 if (uio->uio_segflg == UIO_USERSPACE && uio->uio_procp != curproc)
409 panic("nfs_write proc");
410 #endif
411 if (vp->v_type != VREG)
412 return (EIO);
413 if (np->n_flag & NWRITEERR) {
414 np->n_flag &= ~NWRITEERR;
415 return (np->n_error);
416 }
417 if (ioflag & (IO_APPEND | IO_SYNC)) {
418 if (np->n_flag & NMODIFIED) {
419 np->n_attrstamp = 0;
420 error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
421 if (error)
422 return (error);
423 }
424 if (ioflag & IO_APPEND) {
425 np->n_attrstamp = 0;
426 error = VOP_GETATTR(vp, &vattr, cred, p);
427 if (error)
428 return (error);
429 uio->uio_offset = np->n_size;
430 }
431 }
432 nmp = VFSTONFS(vp->v_mount);
433 if (uio->uio_offset < 0)
434 return (EINVAL);
435 if (uio->uio_resid == 0)
436 return (0);
437 /*
438 * Maybe this should be above the vnode op call, but so long as
439 * file servers have no limits, i don't think it matters
440 */
441 if (p && uio->uio_offset + uio->uio_resid >
442 p->p_rlimit[RLIMIT_FSIZE].rlim_cur) {
443 psignal(p, SIGXFSZ);
444 return (EFBIG);
445 }
446 /*
447 * I use nm_rsize, not nm_wsize so that all buffer cache blocks
448 * will be the same size within a filesystem. nfs_writerpc will
449 * still use nm_wsize when sizing the rpc's.
450 */
451 biosize = nmp->nm_rsize;
452 do {
453
454 /*
455 * XXX make sure we aren't cached in the VM page cache
456 */
457 (void)vnode_pager_uncache(vp);
458
459 /*
460 * Check for a valid write lease.
461 * If non-cachable, just do the rpc
462 */
463 if ((nmp->nm_flag & NFSMNT_NQNFS) &&
464 NQNFS_CKINVALID(vp, np, NQL_WRITE)) {
465 do {
466 error = nqnfs_getlease(vp, NQL_WRITE, cred, p);
467 } while (error == NQNFS_EXPIRED);
468 if (error)
469 return (error);
470 if (np->n_lrev != np->n_brev ||
471 (np->n_flag & NQNFSNONCACHE)) {
472 error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
473 if (error)
474 return (error);
475 np->n_brev = np->n_lrev;
476 }
477 }
478 if (np->n_flag & NQNFSNONCACHE)
479 return (nfs_writerpc(vp, uio, cred, ioflag));
480 nfsstats.biocache_writes++;
481 lbn = uio->uio_offset / biosize;
482 on = uio->uio_offset & (biosize-1);
483 n = min((unsigned)(biosize - on), uio->uio_resid);
484 bn = lbn * (biosize / DEV_BSIZE);
485 again:
486 bp = nfs_getcacheblk(vp, bn, biosize, p);
487 if (!bp)
488 return (EINTR);
489 if (bp->b_wcred == NOCRED) {
490 crhold(cred);
491 bp->b_wcred = cred;
492 }
493 np->n_flag |= NMODIFIED;
494 if (uio->uio_offset + n > np->n_size) {
495 np->n_size = uio->uio_offset + n;
496 vnode_pager_setsize(vp, (u_long)np->n_size);
497 }
498
499 /*
500 * If the new write will leave a contiguous dirty
501 * area, just update the b_dirtyoff and b_dirtyend,
502 * otherwise force a write rpc of the old dirty area.
503 */
504 if (bp->b_dirtyend > 0 &&
505 (on > bp->b_dirtyend || (on + n) < bp->b_dirtyoff)) {
506 bp->b_proc = p;
507 if (VOP_BWRITE(bp) == EINTR)
508 return (EINTR);
509 goto again;
510 }
511
512 /*
513 * Check for valid write lease and get one as required.
514 * In case getblk() and/or bwrite() delayed us.
515 */
516 if ((nmp->nm_flag & NFSMNT_NQNFS) &&
517 NQNFS_CKINVALID(vp, np, NQL_WRITE)) {
518 do {
519 error = nqnfs_getlease(vp, NQL_WRITE, cred, p);
520 } while (error == NQNFS_EXPIRED);
521 if (error) {
522 brelse(bp);
523 return (error);
524 }
525 if (np->n_lrev != np->n_brev ||
526 (np->n_flag & NQNFSNONCACHE)) {
527 brelse(bp);
528 error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
529 if (error)
530 return (error);
531 np->n_brev = np->n_lrev;
532 goto again;
533 }
534 }
535 error = uiomove((char *)bp->b_data + on, n, uio);
536 if (error) {
537 bp->b_flags |= B_ERROR;
538 brelse(bp);
539 return (error);
540 }
541 if (bp->b_dirtyend > 0) {
542 bp->b_dirtyoff = min(on, bp->b_dirtyoff);
543 bp->b_dirtyend = max((on + n), bp->b_dirtyend);
544 } else {
545 bp->b_dirtyoff = on;
546 bp->b_dirtyend = on + n;
547 }
548 #ifndef notdef
549 if (bp->b_validend == 0 || bp->b_validend < bp->b_dirtyoff ||
550 bp->b_validoff > bp->b_dirtyend) {
551 bp->b_validoff = bp->b_dirtyoff;
552 bp->b_validend = bp->b_dirtyend;
553 } else {
554 bp->b_validoff = min(bp->b_validoff, bp->b_dirtyoff);
555 bp->b_validend = max(bp->b_validend, bp->b_dirtyend);
556 }
557 #else
558 bp->b_validoff = bp->b_dirtyoff;
559 bp->b_validend = bp->b_dirtyend;
560 #endif
561 if (ioflag & IO_APPEND)
562 bp->b_flags |= B_APPENDWRITE;
563
564 /*
565 * If the lease is non-cachable or IO_SYNC do bwrite().
566 */
567 if ((np->n_flag & NQNFSNONCACHE) || (ioflag & IO_SYNC)) {
568 bp->b_proc = p;
569 if ((error = VOP_BWRITE(bp)) != 0)
570 return (error);
571 } else if ((n + on) == biosize &&
572 (nmp->nm_flag & NFSMNT_NQNFS) == 0) {
573 bp->b_proc = (struct proc *)0;
574 bawrite(bp);
575 } else
576 bdwrite(bp);
577 } while (uio->uio_resid > 0 && n > 0);
578 return (0);
579 }
580
581 /*
582 * Get an nfs cache block.
583 * Allocate a new one if the block isn't currently in the cache
584 * and return the block marked busy. If the calling process is
585 * interrupted by a signal for an interruptible mount point, return
586 * NULL.
587 */
588 struct buf *
589 nfs_getcacheblk(vp, bn, size, p)
590 struct vnode *vp;
591 daddr_t bn;
592 int size;
593 struct proc *p;
594 {
595 register struct buf *bp;
596 struct nfsmount *nmp = VFSTONFS(vp->v_mount);
597
598 if (nmp->nm_flag & NFSMNT_INT) {
599 bp = getblk(vp, bn, size, PCATCH, 0);
600 while (bp == (struct buf *)0) {
601 if (nfs_sigintr(nmp, (struct nfsreq *)0, p))
602 return ((struct buf *)0);
603 bp = getblk(vp, bn, size, 0, 2 * hz);
604 }
605 } else
606 bp = getblk(vp, bn, size, 0, 0);
607 return (bp);
608 }
609
610 /*
611 * Flush and invalidate all dirty buffers. If another process is already
612 * doing the flush, just wait for completion.
613 */
614 int
615 nfs_vinvalbuf(vp, flags, cred, p, intrflg)
616 struct vnode *vp;
617 int flags;
618 struct ucred *cred;
619 struct proc *p;
620 int intrflg;
621 {
622 register struct nfsnode *np = VTONFS(vp);
623 struct nfsmount *nmp = VFSTONFS(vp->v_mount);
624 int error = 0, slpflag, slptimeo;
625
626 if ((nmp->nm_flag & NFSMNT_INT) == 0)
627 intrflg = 0;
628 if (intrflg) {
629 slpflag = PCATCH;
630 slptimeo = 2 * hz;
631 } else {
632 slpflag = 0;
633 slptimeo = 0;
634 }
635 /*
636 * First wait for any other process doing a flush to complete.
637 */
638 while (np->n_flag & NFLUSHINPROG) {
639 np->n_flag |= NFLUSHWANT;
640 error = tsleep((caddr_t)&np->n_flag, PRIBIO + 2, "nfsvinval",
641 slptimeo);
642 if (error && intrflg && nfs_sigintr(nmp, (struct nfsreq *)0, p))
643 return (EINTR);
644 }
645
646 /*
647 * Now, flush as required.
648 */
649 np->n_flag |= NFLUSHINPROG;
650 error = vinvalbuf(vp, flags, cred, p, slpflag, 0);
651 while (error) {
652 if (intrflg && nfs_sigintr(nmp, (struct nfsreq *)0, p)) {
653 np->n_flag &= ~NFLUSHINPROG;
654 if (np->n_flag & NFLUSHWANT) {
655 np->n_flag &= ~NFLUSHWANT;
656 wakeup((caddr_t)&np->n_flag);
657 }
658 return (EINTR);
659 }
660 error = vinvalbuf(vp, flags, cred, p, 0, slptimeo);
661 }
662 np->n_flag &= ~(NMODIFIED | NFLUSHINPROG);
663 if (np->n_flag & NFLUSHWANT) {
664 np->n_flag &= ~NFLUSHWANT;
665 wakeup((caddr_t)&np->n_flag);
666 }
667 return (0);
668 }
669
670 /*
671 * Initiate asynchronous I/O. Return an error if no nfsiods are available.
672 * This is mainly to avoid queueing async I/O requests when the nfsiods
673 * are all hung on a dead server.
674 */
675 int
676 nfs_asyncio(bp, cred)
677 register struct buf *bp;
678 struct ucred *cred;
679 {
680 register int i;
681
682 if (nfs_numasync == 0)
683 return (EIO);
684 for (i = 0; i < NFS_MAXASYNCDAEMON; i++)
685 if (nfs_iodwant[i]) {
686 if (bp->b_flags & B_READ) {
687 if (bp->b_rcred == NOCRED && cred != NOCRED) {
688 crhold(cred);
689 bp->b_rcred = cred;
690 }
691 } else {
692 if (bp->b_wcred == NOCRED && cred != NOCRED) {
693 crhold(cred);
694 bp->b_wcred = cred;
695 }
696 }
697
698 TAILQ_INSERT_TAIL(&nfs_bufq, bp, b_freelist);
699 nfs_iodwant[i] = (struct proc *)0;
700 wakeup((caddr_t)&nfs_iodwant[i]);
701 return (0);
702 }
703 return (EIO);
704 }
705
706 /*
707 * Do an I/O operation to/from a cache block. This may be called
708 * synchronously or from an nfsiod.
709 */
710 int
711 nfs_doio(bp, cr, p)
712 register struct buf *bp;
713 struct ucred *cr;
714 struct proc *p;
715 {
716 register struct uio *uiop;
717 register struct vnode *vp;
718 struct nfsnode *np;
719 struct nfsmount *nmp;
720 int error = 0, diff, len;
721 struct uio uio;
722 struct iovec io;
723
724 vp = bp->b_vp;
725 np = VTONFS(vp);
726 nmp = VFSTONFS(vp->v_mount);
727 uiop = &uio;
728 uiop->uio_iov = &io;
729 uiop->uio_iovcnt = 1;
730 uiop->uio_segflg = UIO_SYSSPACE;
731 uiop->uio_procp = p;
732
733 /*
734 * Historically, paging was done with physio, but no more...
735 */
736 if (bp->b_flags & B_PHYS) {
737 /*
738 * ...though reading /dev/drum still gets us here.
739 */
740 io.iov_len = uiop->uio_resid = bp->b_bcount;
741 /* mapping was done by vmapbuf() */
742 io.iov_base = bp->b_data;
743 uiop->uio_offset = bp->b_blkno * DEV_BSIZE;
744 if (bp->b_flags & B_READ) {
745 uiop->uio_rw = UIO_READ;
746 nfsstats.read_physios++;
747 error = nfs_readrpc(vp, uiop, cr);
748 } else {
749 uiop->uio_rw = UIO_WRITE;
750 nfsstats.write_physios++;
751 error = nfs_writerpc(vp, uiop, cr, 0);
752 }
753 if (error) {
754 bp->b_flags |= B_ERROR;
755 bp->b_error = error;
756 }
757 } else if (bp->b_flags & B_READ) {
758 io.iov_len = uiop->uio_resid = bp->b_bcount;
759 io.iov_base = bp->b_data;
760 uiop->uio_rw = UIO_READ;
761 switch (vp->v_type) {
762 case VREG:
763 uiop->uio_offset = bp->b_blkno * DEV_BSIZE;
764 nfsstats.read_bios++;
765 error = nfs_readrpc(vp, uiop, cr);
766 if (!error) {
767 bp->b_validoff = 0;
768 if (uiop->uio_resid) {
769 /*
770 * If len > 0, there is a hole in the file and
771 * no writes after the hole have been pushed to
772 * the server yet.
773 * Just zero fill the rest of the valid area.
774 */
775 diff = bp->b_bcount - uiop->uio_resid;
776 len = np->n_size - (bp->b_blkno * DEV_BSIZE
777 + diff);
778 if (len > 0) {
779 len = min(len, uiop->uio_resid);
780 bzero((char *)bp->b_data + diff, len);
781 bp->b_validend = diff + len;
782 } else
783 bp->b_validend = diff;
784 } else
785 bp->b_validend = bp->b_bcount;
786 }
787 if (p && (vp->v_flag & VTEXT) &&
788 (((nmp->nm_flag & NFSMNT_NQNFS) &&
789 NQNFS_CKINVALID(vp, np, NQL_READ) &&
790 np->n_lrev != np->n_brev) ||
791 (!(nmp->nm_flag & NFSMNT_NQNFS) &&
792 np->n_mtime != np->n_vattr.va_mtime.tv_sec))) {
793 uprintf("Process killed due to text file modification\n");
794 psignal(p, SIGKILL);
795 p->p_holdcnt++;
796 }
797 break;
798 case VLNK:
799 uiop->uio_offset = 0;
800 nfsstats.readlink_bios++;
801 error = nfs_readlinkrpc(vp, uiop, cr);
802 break;
803 case VDIR:
804 uiop->uio_offset = bp->b_lblkno;
805 nfsstats.readdir_bios++;
806 if (VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NQNFS)
807 error = nfs_readdirlookrpc(vp, uiop, cr);
808 else
809 error = nfs_readdirrpc(vp, uiop, cr);
810 /*
811 * Save offset cookie in b_blkno.
812 */
813 bp->b_blkno = uiop->uio_offset;
814 break;
815 case VNON:
816 case VBLK:
817 case VCHR:
818 case VFIFO:
819 case VBAD:
820 case VSOCK:
821 };
822 if (error) {
823 bp->b_flags |= B_ERROR;
824 bp->b_error = error;
825 }
826 } else {
827 io.iov_len = uiop->uio_resid = bp->b_dirtyend
828 - bp->b_dirtyoff;
829 uiop->uio_offset = (bp->b_blkno * DEV_BSIZE)
830 + bp->b_dirtyoff;
831 io.iov_base = (char *)bp->b_data + bp->b_dirtyoff;
832 uiop->uio_rw = UIO_WRITE;
833 nfsstats.write_bios++;
834 if (bp->b_flags & B_APPENDWRITE)
835 error = nfs_writerpc(vp, uiop, cr, IO_APPEND);
836 else
837 error = nfs_writerpc(vp, uiop, cr, 0);
838 bp->b_flags &= ~(B_WRITEINPROG | B_APPENDWRITE);
839
840 /*
841 * For an interrupted write, the buffer is still valid and the
842 * write hasn't been pushed to the server yet, so we can't set
843 * B_ERROR and report the interruption by setting B_EINTR. For
844 * the B_ASYNC case, B_EINTR is not relevant, so the rpc attempt
845 * is essentially a noop.
846 */
847 if (error == EINTR) {
848 bp->b_flags &= ~B_INVAL;
849 bp->b_flags |= B_DELWRI;
850
851 /*
852 * Since for the B_ASYNC case, nfs_bwrite() has reassigned the
853 * buffer to the clean list, we have to reassign it back to the
854 * dirty one. Ugh.
855 */
856 if (bp->b_flags & B_ASYNC)
857 reassignbuf(bp, vp);
858 else
859 bp->b_flags |= B_EINTR;
860 } else {
861 if (error) {
862 bp->b_flags |= B_ERROR;
863 bp->b_error = np->n_error = error;
864 np->n_flag |= NWRITEERR;
865 }
866 bp->b_dirtyoff = bp->b_dirtyend = 0;
867 }
868 }
869 bp->b_resid = uiop->uio_resid;
870 biodone(bp);
871 return (error);
872 }
873