vfs_getcwd.c revision 1.47 1 /* $NetBSD: vfs_getcwd.c,v 1.47 2010/11/30 10:30:02 dholland Exp $ */
2
3 /*-
4 * Copyright (c) 1999 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Bill Sommerfeld.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 #include <sys/cdefs.h>
33 __KERNEL_RCSID(0, "$NetBSD: vfs_getcwd.c,v 1.47 2010/11/30 10:30:02 dholland Exp $");
34
35 #include <sys/param.h>
36 #include <sys/systm.h>
37 #include <sys/namei.h>
38 #include <sys/filedesc.h>
39 #include <sys/kernel.h>
40 #include <sys/file.h>
41 #include <sys/stat.h>
42 #include <sys/vnode.h>
43 #include <sys/mount.h>
44 #include <sys/proc.h>
45 #include <sys/uio.h>
46 #include <sys/kmem.h>
47 #include <sys/dirent.h>
48 #include <sys/kauth.h>
49
50 #include <ufs/ufs/dir.h> /* XXX only for DIRBLKSIZ */
51
52 #include <sys/syscallargs.h>
53
54 /*
55 * Vnode variable naming conventions in this file:
56 *
57 * rvp: the current root we're aiming towards.
58 * lvp, *lvpp: the "lower" vnode
59 * uvp, *uvpp: the "upper" vnode.
60 *
61 * Since all the vnodes we're dealing with are directories, and the
62 * lookups are going *up* in the filesystem rather than *down*, the
63 * usual "pvp" (parent) or "dvp" (directory) naming conventions are
64 * too confusing.
65 */
66
67 /*
68 * XXX Will infinite loop in certain cases if a directory read reliably
69 * returns EINVAL on last block.
70 * XXX is EINVAL the right thing to return if a directory is malformed?
71 */
72
73 /*
74 * XXX Untested vs. mount -o union; probably does the wrong thing.
75 */
76
77 /*
78 * Find parent vnode of *lvpp, return in *uvpp
79 *
80 * If we care about the name, scan it looking for name of directory
81 * entry pointing at lvp.
82 *
83 * Place the name in the buffer which starts at bufp, immediately
84 * before *bpp, and move bpp backwards to point at the start of it.
85 *
86 * On entry, *lvpp is a locked vnode reference; on exit, it is vput and NULL'ed
87 * On exit, *uvpp is either NULL or is a locked vnode reference.
88 */
89 static int
90 getcwd_scandir(struct vnode **lvpp, struct vnode **uvpp, char **bpp,
91 char *bufp, struct lwp *l)
92 {
93 int error = 0;
94 int eofflag;
95 off_t off;
96 int tries;
97 struct uio uio;
98 struct iovec iov;
99 char *dirbuf = NULL;
100 int dirbuflen;
101 ino_t fileno;
102 struct vattr va;
103 struct vnode *uvp = NULL;
104 struct vnode *lvp = *lvpp;
105 kauth_cred_t cred = l->l_cred;
106 struct componentname cn;
107 int len, reclen;
108 tries = 0;
109
110 /*
111 * If we want the filename, get some info we need while the
112 * current directory is still locked.
113 */
114 if (bufp != NULL) {
115 error = VOP_GETATTR(lvp, &va, cred);
116 if (error) {
117 vput(lvp);
118 *lvpp = NULL;
119 *uvpp = NULL;
120 return error;
121 }
122 }
123
124 /*
125 * Ok, we have to do it the hard way..
126 * Next, get parent vnode using lookup of ..
127 */
128 cn.cn_nameiop = LOOKUP;
129 cn.cn_flags = ISLASTCN | ISDOTDOT | RDONLY;
130 cn.cn_cred = cred;
131 cn.cn_nameptr = "..";
132 cn.cn_namelen = 2;
133 cn.cn_hash = 0;
134 cn.cn_consume = 0;
135
136 /*
137 * At this point, lvp is locked.
138 * On successful return, *uvpp will be locked
139 */
140 error = VOP_LOOKUP(lvp, uvpp, &cn);
141 vput(lvp);
142 if (error) {
143 *lvpp = NULL;
144 *uvpp = NULL;
145 return error;
146 }
147 uvp = *uvpp;
148
149 /* If we don't care about the pathname, we're done */
150 if (bufp == NULL) {
151 *lvpp = NULL;
152 return 0;
153 }
154
155 fileno = va.va_fileid;
156
157 dirbuflen = DIRBLKSIZ;
158 if (dirbuflen < va.va_blocksize)
159 dirbuflen = va.va_blocksize;
160 dirbuf = kmem_alloc(dirbuflen, KM_SLEEP);
161
162 #if 0
163 unionread:
164 #endif
165 off = 0;
166 do {
167 /* call VOP_READDIR of parent */
168 iov.iov_base = dirbuf;
169 iov.iov_len = dirbuflen;
170
171 uio.uio_iov = &iov;
172 uio.uio_iovcnt = 1;
173 uio.uio_offset = off;
174 uio.uio_resid = dirbuflen;
175 uio.uio_rw = UIO_READ;
176 UIO_SETUP_SYSSPACE(&uio);
177
178 eofflag = 0;
179
180 error = VOP_READDIR(uvp, &uio, cred, &eofflag, 0, 0);
181
182 off = uio.uio_offset;
183
184 /*
185 * Try again if NFS tosses its cookies.
186 * XXX this can still loop forever if the directory is busted
187 * such that the second or subsequent page of it always
188 * returns EINVAL
189 */
190 if ((error == EINVAL) && (tries < 3)) {
191 off = 0;
192 tries++;
193 continue; /* once more, with feeling */
194 }
195
196 if (!error) {
197 char *cpos;
198 struct dirent *dp;
199
200 cpos = dirbuf;
201 tries = 0;
202
203 /* scan directory page looking for matching vnode */
204 for (len = (dirbuflen - uio.uio_resid); len > 0;
205 len -= reclen) {
206 dp = (struct dirent *) cpos;
207 reclen = dp->d_reclen;
208
209 /* check for malformed directory.. */
210 if (reclen < _DIRENT_MINSIZE(dp)) {
211 error = EINVAL;
212 goto out;
213 }
214 /*
215 * XXX should perhaps do VOP_LOOKUP to
216 * check that we got back to the right place,
217 * but getting the locking games for that
218 * right would be heinous.
219 */
220 if ((dp->d_type != DT_WHT) &&
221 (dp->d_fileno == fileno)) {
222 char *bp = *bpp;
223
224 bp -= dp->d_namlen;
225 if (bp <= bufp) {
226 error = ERANGE;
227 goto out;
228 }
229 memcpy(bp, dp->d_name, dp->d_namlen);
230 error = 0;
231 *bpp = bp;
232 goto out;
233 }
234 cpos += reclen;
235 }
236 } else
237 goto out;
238 } while (!eofflag);
239 #if 0
240 /*
241 * Deal with mount -o union, which unions only the
242 * root directory of the mount.
243 */
244 if ((uvp->v_vflag & VV_ROOT) &&
245 (uvp->v_mount->mnt_flag & MNT_UNION)) {
246 struct vnode *tvp = uvp;
247
248 uvp = uvp->v_mount->mnt_vnodecovered;
249 vput(tvp);
250 vref(uvp);
251 *uvpp = uvp;
252 vn_lock(uvp, LK_EXCLUSIVE | LK_RETRY);
253 goto unionread;
254 }
255 #endif
256 error = ENOENT;
257
258 out:
259 *lvpp = NULL;
260 kmem_free(dirbuf, dirbuflen);
261 return error;
262 }
263
264 /*
265 * Look in the vnode-to-name reverse cache to see if
266 * we can find things the easy way.
267 *
268 * XXX vget failure path is untested.
269 *
270 * On entry, *lvpp is a locked vnode reference.
271 * On exit, one of the following is the case:
272 * 0) Both *lvpp and *uvpp are NULL and failure is returned.
273 * 1) *uvpp is NULL, *lvpp remains locked and -1 is returned (cache miss)
274 * 2) *uvpp is a locked vnode reference, *lvpp is vput and NULL'ed
275 * and 0 is returned (cache hit)
276 */
277
278 static int
279 getcwd_getcache(struct vnode **lvpp, struct vnode **uvpp, char **bpp,
280 char *bufp)
281 {
282 struct vnode *lvp, *uvp = NULL;
283 int error;
284
285 lvp = *lvpp;
286
287 /*
288 * This returns 0 on a cache hit, -1 on a clean cache miss,
289 * or an errno on other failure.
290 */
291 error = cache_revlookup(lvp, uvpp, bpp, bufp);
292 if (error) {
293 if (error != -1) {
294 vput(lvp);
295 *lvpp = NULL;
296 *uvpp = NULL;
297 }
298 return error;
299 }
300 uvp = *uvpp;
301
302 /*
303 * Since we're going up, we have to release the current lock
304 * before we take the parent lock.
305 */
306
307 VOP_UNLOCK(lvp);
308 vn_lock(uvp, LK_EXCLUSIVE | LK_RETRY);
309 vrele(lvp);
310 *lvpp = NULL;
311
312 return error;
313 }
314
315 /*
316 * common routine shared by sys___getcwd() and vn_isunder()
317 */
318
319 int
320 getcwd_common(struct vnode *lvp, struct vnode *rvp, char **bpp, char *bufp,
321 int limit, int flags, struct lwp *l)
322 {
323 struct cwdinfo *cwdi = l->l_proc->p_cwdi;
324 kauth_cred_t cred = l->l_cred;
325 struct vnode *uvp = NULL;
326 char *bp = NULL;
327 int error;
328 int perms = VEXEC;
329
330 error = 0;
331 if (rvp == NULL) {
332 rvp = cwdi->cwdi_rdir;
333 if (rvp == NULL)
334 rvp = rootvnode;
335 }
336
337 vref(rvp);
338 vref(lvp);
339
340 /*
341 * Error handling invariant:
342 * Before a `goto out':
343 * lvp is either NULL, or locked and held.
344 * uvp is either NULL, or locked and held.
345 */
346
347 vn_lock(lvp, LK_EXCLUSIVE | LK_RETRY);
348 if (bufp)
349 bp = *bpp;
350
351 /*
352 * this loop will terminate when one of the following happens:
353 * - we hit the root
354 * - getdirentries or lookup fails
355 * - we run out of space in the buffer.
356 */
357 if (lvp == rvp) {
358 if (bp)
359 *(--bp) = '/';
360 goto out;
361 }
362 do {
363 /*
364 * access check here is optional, depending on
365 * whether or not caller cares.
366 */
367 if (flags & GETCWD_CHECK_ACCESS) {
368 error = VOP_ACCESS(lvp, perms, cred);
369 if (error)
370 goto out;
371 perms = VEXEC|VREAD;
372 }
373
374 /*
375 * step up if we're a covered vnode..
376 */
377 while (lvp->v_vflag & VV_ROOT) {
378 struct vnode *tvp;
379
380 if (lvp == rvp)
381 goto out;
382
383 tvp = lvp;
384 lvp = lvp->v_mount->mnt_vnodecovered;
385 vput(tvp);
386 /*
387 * hodie natus est radici frater
388 */
389 if (lvp == NULL) {
390 error = ENOENT;
391 goto out;
392 }
393 vref(lvp);
394 error = vn_lock(lvp, LK_EXCLUSIVE | LK_RETRY);
395 if (error != 0) {
396 vrele(lvp);
397 lvp = NULL;
398 goto out;
399 }
400 }
401 /*
402 * Look in the name cache; if that fails, look in the
403 * directory..
404 */
405 error = getcwd_getcache(&lvp, &uvp, &bp, bufp);
406 if (error == -1) {
407 if (lvp->v_type != VDIR) {
408 error = ENOTDIR;
409 goto out;
410 }
411 error = getcwd_scandir(&lvp, &uvp, &bp, bufp, l);
412 }
413 if (error)
414 goto out;
415 #if DIAGNOSTIC
416 if (lvp != NULL)
417 panic("getcwd: oops, forgot to null lvp");
418 if (bufp && (bp <= bufp)) {
419 panic("getcwd: oops, went back too far");
420 }
421 #endif
422 if (bp)
423 *(--bp) = '/';
424 lvp = uvp;
425 uvp = NULL;
426 limit--;
427 } while ((lvp != rvp) && (limit > 0));
428
429 out:
430 if (bpp)
431 *bpp = bp;
432 if (uvp)
433 vput(uvp);
434 if (lvp)
435 vput(lvp);
436 vrele(rvp);
437 return error;
438 }
439
440 /*
441 * Check if one directory can be found inside another in the directory
442 * hierarchy.
443 *
444 * Intended to be used in chroot, chdir, fchdir, etc., to ensure that
445 * chroot() actually means something.
446 */
447 int
448 vn_isunder(struct vnode *lvp, struct vnode *rvp, struct lwp *l)
449 {
450 int error;
451
452 error = getcwd_common(lvp, rvp, NULL, NULL, MAXPATHLEN / 2, 0, l);
453
454 if (!error)
455 return 1;
456 else
457 return 0;
458 }
459
460 /*
461 * Returns true if proc p1's root directory equal to or under p2's
462 * root directory.
463 *
464 * Intended to be used from ptrace/procfs sorts of things.
465 */
466
467 int
468 proc_isunder(struct proc *p1, struct lwp *l2)
469 {
470 struct vnode *r1 = p1->p_cwdi->cwdi_rdir;
471 struct vnode *r2 = l2->l_proc->p_cwdi->cwdi_rdir;
472
473 if (r1 == NULL)
474 return (r2 == NULL);
475 else if (r2 == NULL)
476 return 1;
477 else
478 return vn_isunder(r1, r2, l2);
479 }
480
481 /*
482 * Find pathname of process's current directory.
483 *
484 * Use vfs vnode-to-name reverse cache; if that fails, fall back
485 * to reading directory contents.
486 */
487
488 int
489 sys___getcwd(struct lwp *l, const struct sys___getcwd_args *uap, register_t *retval)
490 {
491 /* {
492 syscallarg(char *) bufp;
493 syscallarg(size_t) length;
494 } */
495
496 int error;
497 char *path;
498 char *bp, *bend;
499 int len = SCARG(uap, length);
500 int lenused;
501 struct cwdinfo *cwdi;
502
503 if (len > MAXPATHLEN * 4)
504 len = MAXPATHLEN * 4;
505 else if (len < 2)
506 return ERANGE;
507
508 path = kmem_alloc(len, KM_SLEEP);
509 if (!path)
510 return ENOMEM;
511
512 bp = &path[len];
513 bend = bp;
514 *(--bp) = '\0';
515
516 /*
517 * 5th argument here is "max number of vnodes to traverse".
518 * Since each entry takes up at least 2 bytes in the output buffer,
519 * limit it to N/2 vnodes for an N byte buffer.
520 */
521 cwdi = l->l_proc->p_cwdi;
522 rw_enter(&cwdi->cwdi_lock, RW_READER);
523 error = getcwd_common(cwdi->cwdi_cdir, NULL, &bp, path,
524 len/2, GETCWD_CHECK_ACCESS, l);
525 rw_exit(&cwdi->cwdi_lock);
526
527 if (error)
528 goto out;
529 lenused = bend - bp;
530 *retval = lenused;
531 /* put the result into user buffer */
532 error = copyout(bp, SCARG(uap, bufp), lenused);
533
534 out:
535 kmem_free(path, len);
536 return error;
537 }
538
539 /*
540 * Try to find a pathname for a vnode. Since there is no mapping
541 * vnode -> parent directory, this needs the NAMECACHE_ENTER_REVERSE
542 * option to work (to make cache_revlookup succeed). Caller holds a
543 * reference to the vnode.
544 */
545 int
546 vnode_to_path(char *path, size_t len, struct vnode *vp, struct lwp *curl,
547 struct proc *p)
548 {
549 struct proc *curp = curl->l_proc;
550 int error, lenused, elen;
551 char *bp, *bend;
552 struct vnode *dvp;
553
554 KASSERT(vp->v_usecount > 0);
555
556 bp = bend = &path[len];
557 *(--bp) = '\0';
558
559 error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
560 if (error != 0)
561 return error;
562 error = cache_revlookup(vp, &dvp, &bp, path);
563 VOP_UNLOCK(vp);
564 if (error != 0)
565 return (error == -1 ? ENOENT : error);
566
567 *(--bp) = '/';
568 error = getcwd_common(dvp, NULL, &bp, path, len / 2,
569 GETCWD_CHECK_ACCESS, curl);
570 vrele(dvp);
571
572 /*
573 * Strip off emulation path for emulated processes looking at
574 * the maps file of a process of the same emulation. (Won't
575 * work if /emul/xxx is a symlink..)
576 */
577 if (curp->p_emul == p->p_emul && curp->p_emul->e_path != NULL) {
578 elen = strlen(curp->p_emul->e_path);
579 if (!strncmp(bp, curp->p_emul->e_path, elen))
580 bp = &bp[elen];
581 }
582
583 lenused = bend - bp;
584
585 memcpy(path, bp, lenused);
586 path[lenused] = 0;
587
588 return 0;
589 }
590