nfs_clntsubs.c revision 1.1.2.2 1 /* $NetBSD: nfs_clntsubs.c,v 1.1.2.2 2010/03/11 15:04:31 yamt Exp $ */
2
3 /*
4 * Copyright (c) 1989, 1993
5 * The Regents of the University of California. All rights reserved.
6 *
7 * This code is derived from software contributed to Berkeley by
8 * Rick Macklem at The University of Guelph.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. Neither the name of the University nor the names of its contributors
19 * may be used to endorse or promote products derived from this software
20 * without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 *
34 * @(#)nfs_subs.c 8.8 (Berkeley) 5/22/95
35 */
36
37 /*
38 * Copyright 2000 Wasabi Systems, Inc.
39 * All rights reserved.
40 *
41 * Written by Frank van der Linden for Wasabi Systems, Inc.
42 *
43 * Redistribution and use in source and binary forms, with or without
44 * modification, are permitted provided that the following conditions
45 * are met:
46 * 1. Redistributions of source code must retain the above copyright
47 * notice, this list of conditions and the following disclaimer.
48 * 2. Redistributions in binary form must reproduce the above copyright
49 * notice, this list of conditions and the following disclaimer in the
50 * documentation and/or other materials provided with the distribution.
51 * 3. All advertising materials mentioning features or use of this software
52 * must display the following acknowledgement:
53 * This product includes software developed for the NetBSD Project by
54 * Wasabi Systems, Inc.
55 * 4. The name of Wasabi Systems, Inc. may not be used to endorse
56 * or promote products derived from this software without specific prior
57 * written permission.
58 *
59 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
60 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
61 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
62 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC
63 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
64 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
65 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
66 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
67 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
68 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
69 * POSSIBILITY OF SUCH DAMAGE.
70 */
71
72 #include <sys/cdefs.h>
73 __KERNEL_RCSID(0, "$NetBSD: nfs_clntsubs.c,v 1.1.2.2 2010/03/11 15:04:31 yamt Exp $");
74
75 #ifdef _KERNEL_OPT
76 #include "opt_nfs.h"
77 #endif
78
79 /*
80 * These functions support the macros and help fiddle mbuf chains for
81 * the nfs op functions. They do things like create the rpc header and
82 * copy data between mbuf chains and uio lists.
83 */
84 #include <sys/param.h>
85 #include <sys/proc.h>
86 #include <sys/systm.h>
87 #include <sys/kernel.h>
88 #include <sys/kmem.h>
89 #include <sys/mount.h>
90 #include <sys/vnode.h>
91 #include <sys/namei.h>
92 #include <sys/mbuf.h>
93 #include <sys/socket.h>
94 #include <sys/stat.h>
95 #include <sys/filedesc.h>
96 #include <sys/time.h>
97 #include <sys/dirent.h>
98 #include <sys/once.h>
99 #include <sys/kauth.h>
100 #include <sys/atomic.h>
101
102 #include <uvm/uvm_extern.h>
103
104 #include <nfs/rpcv2.h>
105 #include <nfs/nfsproto.h>
106 #include <nfs/nfsnode.h>
107 #include <nfs/nfs.h>
108 #include <nfs/xdr_subs.h>
109 #include <nfs/nfsm_subs.h>
110 #include <nfs/nfsmount.h>
111 #include <nfs/nfsrtt.h>
112 #include <nfs/nfs_var.h>
113
114 #include <miscfs/specfs/specdev.h>
115
116 #include <netinet/in.h>
117
118 /*
119 * Attribute cache routines.
120 * nfs_loadattrcache() - loads or updates the cache contents from attributes
121 * that are on the mbuf list
122 * nfs_getattrcache() - returns valid attributes if found in cache, returns
123 * error otherwise
124 */
125
126 /*
127 * Load the attribute cache (that lives in the nfsnode entry) with
128 * the values on the mbuf list and
129 * Iff vap not NULL
130 * copy the attributes to *vaper
131 */
132 int
133 nfsm_loadattrcache(struct vnode **vpp, struct mbuf **mdp, char **dposp, struct vattr *vaper, int flags)
134 {
135 int32_t t1;
136 char *cp2;
137 int error = 0;
138 struct mbuf *md;
139 int v3 = NFS_ISV3(*vpp);
140
141 md = *mdp;
142 t1 = (mtod(md, char *) + md->m_len) - *dposp;
143 error = nfsm_disct(mdp, dposp, NFSX_FATTR(v3), t1, &cp2);
144 if (error)
145 return (error);
146 return nfs_loadattrcache(vpp, (struct nfs_fattr *)cp2, vaper, flags);
147 }
148
149 int
150 nfs_loadattrcache(struct vnode **vpp, struct nfs_fattr *fp, struct vattr *vaper, int flags)
151 {
152 struct vnode *vp = *vpp;
153 struct vattr *vap;
154 int v3 = NFS_ISV3(vp);
155 enum vtype vtyp;
156 u_short vmode;
157 struct timespec mtime;
158 struct timespec ctime;
159 int32_t rdev;
160 struct nfsnode *np;
161 extern int (**spec_nfsv2nodeop_p)(void *);
162 uid_t uid;
163 gid_t gid;
164
165 if (v3) {
166 vtyp = nfsv3tov_type(fp->fa_type);
167 vmode = fxdr_unsigned(u_short, fp->fa_mode);
168 rdev = makedev(fxdr_unsigned(u_int32_t, fp->fa3_rdev.specdata1),
169 fxdr_unsigned(u_int32_t, fp->fa3_rdev.specdata2));
170 fxdr_nfsv3time(&fp->fa3_mtime, &mtime);
171 fxdr_nfsv3time(&fp->fa3_ctime, &ctime);
172 } else {
173 vtyp = nfsv2tov_type(fp->fa_type);
174 vmode = fxdr_unsigned(u_short, fp->fa_mode);
175 if (vtyp == VNON || vtyp == VREG)
176 vtyp = IFTOVT(vmode);
177 rdev = fxdr_unsigned(int32_t, fp->fa2_rdev);
178 fxdr_nfsv2time(&fp->fa2_mtime, &mtime);
179 ctime.tv_sec = fxdr_unsigned(u_int32_t,
180 fp->fa2_ctime.nfsv2_sec);
181 ctime.tv_nsec = 0;
182
183 /*
184 * Really ugly NFSv2 kludge.
185 */
186 if (vtyp == VCHR && rdev == 0xffffffff)
187 vtyp = VFIFO;
188 }
189
190 vmode &= ALLPERMS;
191
192 /*
193 * If v_type == VNON it is a new node, so fill in the v_type,
194 * n_mtime fields. Check to see if it represents a special
195 * device, and if so, check for a possible alias. Once the
196 * correct vnode has been obtained, fill in the rest of the
197 * information.
198 */
199 np = VTONFS(vp);
200 if (vp->v_type == VNON) {
201 vp->v_type = vtyp;
202 if (vp->v_type == VFIFO) {
203 extern int (**fifo_nfsv2nodeop_p)(void *);
204 vp->v_op = fifo_nfsv2nodeop_p;
205 } else if (vp->v_type == VREG) {
206 mutex_init(&np->n_commitlock, MUTEX_DEFAULT, IPL_NONE);
207 } else if (vp->v_type == VCHR || vp->v_type == VBLK) {
208 vp->v_op = spec_nfsv2nodeop_p;
209 spec_node_init(vp, (dev_t)rdev);
210 }
211 np->n_mtime = mtime;
212 }
213 uid = fxdr_unsigned(uid_t, fp->fa_uid);
214 gid = fxdr_unsigned(gid_t, fp->fa_gid);
215 vap = np->n_vattr;
216
217 mutex_enter(&np->n_attrlock);
218 /*
219 * Invalidate access cache if uid, gid, mode or ctime changed.
220 */
221 if (np->n_accstamp != -1 &&
222 (gid != vap->va_gid || uid != vap->va_uid || vmode != vap->va_mode
223 || timespeccmp(&ctime, &vap->va_ctime, !=)))
224 np->n_accstamp = -1;
225
226 vap->va_type = vtyp;
227 vap->va_mode = vmode;
228 vap->va_rdev = (dev_t)rdev;
229 vap->va_mtime = mtime;
230 vap->va_ctime = ctime;
231 vap->va_birthtime.tv_sec = VNOVAL;
232 vap->va_birthtime.tv_nsec = VNOVAL;
233 vap->va_fsid = vp->v_mount->mnt_stat.f_fsidx.__fsid_val[0];
234 switch (vtyp) {
235 case VDIR:
236 vap->va_blocksize = NFS_DIRFRAGSIZ;
237 break;
238 case VBLK:
239 vap->va_blocksize = BLKDEV_IOSIZE;
240 break;
241 case VCHR:
242 vap->va_blocksize = MAXBSIZE;
243 break;
244 default:
245 vap->va_blocksize = v3 ? vp->v_mount->mnt_stat.f_iosize :
246 fxdr_unsigned(int32_t, fp->fa2_blocksize);
247 break;
248 }
249 if (v3) {
250 vap->va_nlink = fxdr_unsigned(u_short, fp->fa_nlink);
251 vap->va_uid = uid;
252 vap->va_gid = gid;
253 vap->va_size = fxdr_hyper(&fp->fa3_size);
254 vap->va_bytes = fxdr_hyper(&fp->fa3_used);
255 vap->va_fileid = fxdr_hyper(&fp->fa3_fileid);
256 fxdr_nfsv3time(&fp->fa3_atime, &vap->va_atime);
257 vap->va_flags = 0;
258 vap->va_filerev = 0;
259 } else {
260 vap->va_nlink = fxdr_unsigned(u_short, fp->fa_nlink);
261 vap->va_uid = uid;
262 vap->va_gid = gid;
263 vap->va_size = fxdr_unsigned(u_int32_t, fp->fa2_size);
264 vap->va_bytes = fxdr_unsigned(int32_t, fp->fa2_blocks)
265 * NFS_FABLKSIZE;
266 vap->va_fileid = fxdr_unsigned(int32_t, fp->fa2_fileid);
267 fxdr_nfsv2time(&fp->fa2_atime, &vap->va_atime);
268 vap->va_flags = 0;
269 vap->va_gen = fxdr_unsigned(u_int32_t,fp->fa2_ctime.nfsv2_usec);
270 vap->va_filerev = 0;
271 }
272 if (vap->va_size > VFSTONFS(vp->v_mount)->nm_maxfilesize) {
273 mutex_exit(&np->n_attrlock);
274 return EFBIG;
275 }
276 if (vap->va_size != np->n_size) {
277 if ((np->n_flag & NMODIFIED) && vap->va_size < np->n_size) {
278 vap->va_size = np->n_size;
279 } else {
280 np->n_size = vap->va_size;
281 if (vap->va_type == VREG) {
282 /*
283 * we can't free pages if NAC_NOTRUNC because
284 * the pages can be owned by ourselves.
285 */
286 if (flags & NAC_NOTRUNC) {
287 np->n_flag |= NTRUNCDELAYED;
288 } else {
289 mutex_exit(&np->n_attrlock); /* XXX */
290 genfs_node_wrlock(vp);
291 mutex_enter(&vp->v_interlock);
292 (void)VOP_PUTPAGES(vp, 0,
293 0, PGO_SYNCIO | PGO_CLEANIT |
294 PGO_FREE | PGO_ALLPAGES);
295 uvm_vnp_setsize(vp, np->n_size);
296 genfs_node_unlock(vp);
297 mutex_enter(&np->n_attrlock);
298 }
299 }
300 }
301 }
302 np->n_attrstamp = time_second;
303 if (vaper != NULL) {
304 memcpy((void *)vaper, (void *)vap, sizeof(*vap));
305 if (np->n_flag & NCHG) {
306 if (np->n_flag & NACC)
307 vaper->va_atime = np->n_atim;
308 if (np->n_flag & NUPD)
309 vaper->va_mtime = np->n_mtim;
310 }
311 }
312 mutex_exit(&np->n_attrlock);
313 return (0);
314 }
315
316 /*
317 * Check the time stamp
318 * If the cache is valid, copy contents to *vap and return 0
319 * otherwise return an error
320 */
321 int
322 nfs_getattrcache(struct vnode *vp, struct vattr *vaper)
323 {
324 struct nfsnode *np = VTONFS(vp);
325 struct nfsmount *nmp = VFSTONFS(vp->v_mount);
326 struct vattr *vap;
327
328 mutex_enter(&np->n_attrlock);
329 if (np->n_attrstamp == 0 ||
330 (time_second - np->n_attrstamp) >= nfs_attrtimeo(nmp, np)) {
331 mutex_exit(&np->n_attrlock);
332 nfsstats.attrcache_misses++;
333 return (ENOENT);
334 }
335 nfsstats.attrcache_hits++;
336
337 vap = np->n_vattr;
338 if (vap->va_size != np->n_size) {
339 if (vap->va_type == VREG) {
340 voff_t size;
341
342 if ((np->n_flag & NMODIFIED) != 0 &&
343 vap->va_size < np->n_size) {
344 vap->va_size = np->n_size;
345 } else {
346 np->n_size = vap->va_size;
347 }
348 size = np->n_size;
349 mutex_exit(&np->n_attrlock); /* XXX */
350 genfs_node_wrlock(vp);
351 uvm_vnp_setsize(vp, size);
352 genfs_node_unlock(vp);
353 mutex_enter(&np->n_attrlock);
354 } else
355 np->n_size = vap->va_size;
356 }
357 memcpy(vaper, vap, sizeof(struct vattr));
358 if (np->n_flag & NCHG) {
359 if (np->n_flag & NACC)
360 vaper->va_atime = np->n_atim;
361 if (np->n_flag & NUPD)
362 vaper->va_mtime = np->n_mtim;
363 }
364 mutex_exit(&np->n_attrlock);
365 return (0);
366 }
367
368 void
369 nfs_delayedtruncate(struct vnode *vp)
370 {
371 struct nfsnode *np = VTONFS(vp);
372
373 if (np->n_flag & NTRUNCDELAYED) {
374 np->n_flag &= ~NTRUNCDELAYED;
375 genfs_node_wrlock(vp);
376 mutex_enter(&vp->v_interlock);
377 (void)VOP_PUTPAGES(vp, 0,
378 0, PGO_SYNCIO | PGO_CLEANIT | PGO_FREE | PGO_ALLPAGES);
379 uvm_vnp_setsize(vp, np->n_size);
380 genfs_node_unlock(vp);
381 }
382 }
383
384 #define NFS_WCCKLUDGE_TIMEOUT (24 * 60 * 60) /* 1 day */
385 #define NFS_WCCKLUDGE(nmp, now) \
386 (((nmp)->nm_iflag & NFSMNT_WCCKLUDGE) && \
387 ((now) - (nmp)->nm_wcckludgetime - NFS_WCCKLUDGE_TIMEOUT) < 0)
388
389 /*
390 * nfs_check_wccdata: check inaccurate wcc_data
391 *
392 * => return non-zero if we shouldn't trust the wcc_data.
393 * => NFS_WCCKLUDGE_TIMEOUT is for the case that the server is "fixed".
394 */
395
396 int
397 nfs_check_wccdata(struct nfsnode *np, const struct timespec *ctime,
398 struct timespec *mtime, bool docheck)
399 {
400 int error = 0;
401
402 #if !defined(NFS_V2_ONLY)
403
404 if (docheck) {
405 struct vnode *vp = NFSTOV(np);
406 struct nfsmount *nmp;
407 long now = time_second;
408 const struct timespec *omtime = &np->n_vattr->va_mtime;
409 const struct timespec *octime = &np->n_vattr->va_ctime;
410 const char *reason = NULL; /* XXX: gcc */
411
412 if (timespeccmp(omtime, mtime, <=)) {
413 reason = "mtime";
414 error = EINVAL;
415 }
416
417 if (vp->v_type == VDIR && timespeccmp(octime, ctime, <=)) {
418 reason = "ctime";
419 error = EINVAL;
420 }
421
422 nmp = VFSTONFS(vp->v_mount);
423 if (error) {
424
425 /*
426 * despite of the fact that we've updated the file,
427 * timestamps of the file were not updated as we
428 * expected.
429 * it means that the server has incompatible
430 * semantics of timestamps or (more likely)
431 * the server time is not precise enough to
432 * track each modifications.
433 * in that case, we disable wcc processing.
434 *
435 * yes, strictly speaking, we should disable all
436 * caching. it's a compromise.
437 */
438
439 mutex_enter(&nmp->nm_lock);
440 if (!NFS_WCCKLUDGE(nmp, now)) {
441 printf("%s: inaccurate wcc data (%s) detected,"
442 " disabling wcc"
443 " (ctime %u.%09u %u.%09u,"
444 " mtime %u.%09u %u.%09u)\n",
445 vp->v_mount->mnt_stat.f_mntfromname,
446 reason,
447 (unsigned int)octime->tv_sec,
448 (unsigned int)octime->tv_nsec,
449 (unsigned int)ctime->tv_sec,
450 (unsigned int)ctime->tv_nsec,
451 (unsigned int)omtime->tv_sec,
452 (unsigned int)omtime->tv_nsec,
453 (unsigned int)mtime->tv_sec,
454 (unsigned int)mtime->tv_nsec);
455 }
456 nmp->nm_iflag |= NFSMNT_WCCKLUDGE;
457 nmp->nm_wcckludgetime = now;
458 mutex_exit(&nmp->nm_lock);
459 } else if (NFS_WCCKLUDGE(nmp, now)) {
460 error = EPERM; /* XXX */
461 } else if (nmp->nm_iflag & NFSMNT_WCCKLUDGE) {
462 mutex_enter(&nmp->nm_lock);
463 if (nmp->nm_iflag & NFSMNT_WCCKLUDGE) {
464 printf("%s: re-enabling wcc\n",
465 vp->v_mount->mnt_stat.f_mntfromname);
466 nmp->nm_iflag &= ~NFSMNT_WCCKLUDGE;
467 }
468 mutex_exit(&nmp->nm_lock);
469 }
470 }
471
472 #endif /* !defined(NFS_V2_ONLY) */
473
474 return error;
475 }
476
477 /*
478 * Heuristic to see if the server XDR encodes directory cookies or not.
479 * it is not supposed to, but a lot of servers may do this. Also, since
480 * most/all servers will implement V2 as well, it is expected that they
481 * may return just 32 bits worth of cookie information, so we need to
482 * find out in which 32 bits this information is available. We do this
483 * to avoid trouble with emulated binaries that can't handle 64 bit
484 * directory offsets.
485 */
486
487 void
488 nfs_cookieheuristic(struct vnode *vp, int *flagp, struct lwp *l, kauth_cred_t cred)
489 {
490 struct uio auio;
491 struct iovec aiov;
492 char *tbuf, *cp;
493 struct dirent *dp;
494 off_t *cookies = NULL, *cop;
495 int error, eof, nc, len;
496
497 tbuf = malloc(NFS_DIRFRAGSIZ, M_TEMP, M_WAITOK);
498
499 aiov.iov_base = tbuf;
500 aiov.iov_len = NFS_DIRFRAGSIZ;
501 auio.uio_iov = &aiov;
502 auio.uio_iovcnt = 1;
503 auio.uio_rw = UIO_READ;
504 auio.uio_resid = NFS_DIRFRAGSIZ;
505 auio.uio_offset = 0;
506 UIO_SETUP_SYSSPACE(&auio);
507
508 error = VOP_READDIR(vp, &auio, cred, &eof, &cookies, &nc);
509
510 len = NFS_DIRFRAGSIZ - auio.uio_resid;
511 if (error || len == 0) {
512 free(tbuf, M_TEMP);
513 if (cookies)
514 free(cookies, M_TEMP);
515 return;
516 }
517
518 /*
519 * Find the first valid entry and look at its offset cookie.
520 */
521
522 cp = tbuf;
523 for (cop = cookies; len > 0; len -= dp->d_reclen) {
524 dp = (struct dirent *)cp;
525 if (dp->d_fileno != 0 && len >= dp->d_reclen) {
526 if ((*cop >> 32) != 0 && (*cop & 0xffffffffLL) == 0) {
527 *flagp |= NFSMNT_SWAPCOOKIE;
528 nfs_invaldircache(vp, 0);
529 nfs_vinvalbuf(vp, 0, cred, l, 1);
530 }
531 break;
532 }
533 cop++;
534 cp += dp->d_reclen;
535 }
536
537 free(tbuf, M_TEMP);
538 free(cookies, M_TEMP);
539 }
540
541 /*
542 * Set the attribute timeout based on how recently the file has been modified.
543 */
544
545 time_t
546 nfs_attrtimeo(struct nfsmount *nmp, struct nfsnode *np)
547 {
548 time_t timeo;
549
550 if ((nmp->nm_flag & NFSMNT_NOAC) != 0)
551 return 0;
552
553 if (((np)->n_flag & NMODIFIED) != 0)
554 return NFS_MINATTRTIMO;
555
556 timeo = (time_second - np->n_mtime.tv_sec) / 10;
557 timeo = max(timeo, NFS_MINATTRTIMO);
558 timeo = min(timeo, NFS_MAXATTRTIMO);
559 return timeo;
560 }
561