tmpfs_subr.c revision 1.34.4.6 1 /* $NetBSD: tmpfs_subr.c,v 1.34.4.6 2007/09/16 18:32:36 ad Exp $ */
2
3 /*
4 * Copyright (c) 2005, 2006, 2007 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Julio M. Merino Vidal, developed as part of Google's Summer of Code
9 * 2005 program.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 * 3. All advertising materials mentioning features or use of this software
20 * must display the following acknowledgement:
21 * This product includes software developed by the NetBSD
22 * Foundation, Inc. and its contributors.
23 * 4. Neither the name of The NetBSD Foundation nor the names of its
24 * contributors may be used to endorse or promote products derived
25 * from this software without specific prior written permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
28 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
29 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
30 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
31 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37 * POSSIBILITY OF SUCH DAMAGE.
38 */
39
40 /*
41 * Efficient memory file system supporting functions.
42 */
43
44 #include <sys/cdefs.h>
45 __KERNEL_RCSID(0, "$NetBSD: tmpfs_subr.c,v 1.34.4.6 2007/09/16 18:32:36 ad Exp $");
46
47 #include <sys/param.h>
48 #include <sys/dirent.h>
49 #include <sys/event.h>
50 #include <sys/malloc.h>
51 #include <sys/mount.h>
52 #include <sys/namei.h>
53 #include <sys/time.h>
54 #include <sys/stat.h>
55 #include <sys/systm.h>
56 #include <sys/swap.h>
57 #include <sys/vnode.h>
58 #include <sys/kauth.h>
59 #include <sys/proc.h>
60
61 #include <uvm/uvm.h>
62
63 #include <miscfs/specfs/specdev.h>
64 #include <fs/tmpfs/tmpfs.h>
65 #include <fs/tmpfs/tmpfs_fifoops.h>
66 #include <fs/tmpfs/tmpfs_specops.h>
67 #include <fs/tmpfs/tmpfs_vnops.h>
68
69 /* --------------------------------------------------------------------- */
70
71 /*
72 * Allocates a new node of type 'type' inside the 'tmp' mount point, with
73 * its owner set to 'uid', its group to 'gid' and its mode set to 'mode',
74 * using the credentials of the process 'p'.
75 *
76 * If the node type is set to 'VDIR', then the parent parameter must point
77 * to the parent directory of the node being created. It may only be NULL
78 * while allocating the root node.
79 *
80 * If the node type is set to 'VBLK' or 'VCHR', then the rdev parameter
81 * specifies the device the node represents.
82 *
83 * If the node type is set to 'VLNK', then the parameter target specifies
84 * the file name of the target file for the symbolic link that is being
85 * created.
86 *
87 * Note that new nodes are retrieved from the available list if it has
88 * items or, if it is empty, from the node pool as long as there is enough
89 * space to create them.
90 *
91 * Returns zero on success or an appropriate error code on failure.
92 */
93 int
94 tmpfs_alloc_node(struct tmpfs_mount *tmp, enum vtype type,
95 uid_t uid, gid_t gid, mode_t mode, struct tmpfs_node *parent,
96 char *target, dev_t rdev, struct proc *p, struct tmpfs_node **node)
97 {
98 struct tmpfs_node *nnode;
99 ino_t ino;
100
101 /* If the root directory of the 'tmp' file system is not yet
102 * allocated, this must be the request to do it. */
103 KASSERT(IMPLIES(tmp->tm_root == NULL, parent == NULL && type == VDIR));
104
105 KASSERT(IFF(type == VLNK, target != NULL));
106 KASSERT(IFF(type == VBLK || type == VCHR, rdev != VNOVAL));
107
108 KASSERT(uid != VNOVAL && gid != VNOVAL && mode != VNOVAL);
109
110 nnode = NULL;
111 mutex_enter(&tmp->tm_lock);
112 if (LIST_EMPTY(&tmp->tm_nodes_avail)) {
113 KASSERT(tmp->tm_nodes_last <= tmp->tm_nodes_max);
114 if (tmp->tm_nodes_last == tmp->tm_nodes_max) {
115 mutex_exit(&tmp->tm_lock);
116 return ENOSPC;
117 }
118 ino = tmp->tm_nodes_last++;
119 mutex_exit(&tmp->tm_lock);
120
121 nnode =
122 (struct tmpfs_node *)TMPFS_POOL_GET(&tmp->tm_node_pool, 0);
123 if (nnode == NULL) {
124 mutex_enter(&tmp->tm_lock);
125 if (ino == tmp->tm_nodes_last - 1)
126 tmp->tm_nodes_last--;
127 else {
128 /* XXX Oops, just threw away inode number */
129 }
130 mutex_exit(&tmp->tm_lock);
131 return ENOSPC;
132 }
133 nnode->tn_id = ino;
134 nnode->tn_gen = arc4random();
135 mutex_init(&nnode->tn_vlock, MUTEX_DEFAULT, IPL_NONE);
136
137 mutex_enter(&tmp->tm_lock);
138 } else {
139 nnode = LIST_FIRST(&tmp->tm_nodes_avail);
140 LIST_REMOVE(nnode, tn_entries);
141 nnode->tn_gen++;
142 }
143 KASSERT(nnode != NULL);
144 LIST_INSERT_HEAD(&tmp->tm_nodes_used, nnode, tn_entries);
145 mutex_exit(&tmp->tm_lock);
146
147 /* Generic initialization. */
148 nnode->tn_type = type;
149 nnode->tn_size = 0;
150 nnode->tn_status = 0;
151 nnode->tn_flags = 0;
152 nnode->tn_links = 0;
153 getnanotime(&nnode->tn_atime);
154 nnode->tn_birthtime = nnode->tn_ctime = nnode->tn_mtime =
155 nnode->tn_atime;
156 nnode->tn_uid = uid;
157 nnode->tn_gid = gid;
158 nnode->tn_mode = mode;
159 nnode->tn_lockf = NULL;
160 nnode->tn_vnode = NULL;
161
162 /* Type-specific initialization. */
163 switch (nnode->tn_type) {
164 case VBLK:
165 case VCHR:
166 nnode->tn_spec.tn_dev.tn_rdev = rdev;
167 break;
168
169 case VDIR:
170 TAILQ_INIT(&nnode->tn_spec.tn_dir.tn_dir);
171 nnode->tn_spec.tn_dir.tn_parent =
172 (parent == NULL) ? nnode : parent;
173 nnode->tn_spec.tn_dir.tn_readdir_lastn = 0;
174 nnode->tn_spec.tn_dir.tn_readdir_lastp = NULL;
175 nnode->tn_links++;
176 nnode->tn_spec.tn_dir.tn_parent->tn_links++;
177 if (parent != NULL) {
178 KASSERT(parent->tn_vnode != NULL);
179 VN_KNOTE(parent->tn_vnode, NOTE_LINK);
180 }
181 break;
182
183 case VFIFO:
184 /* FALLTHROUGH */
185 case VSOCK:
186 break;
187
188 case VLNK:
189 KASSERT(strlen(target) < MAXPATHLEN);
190 nnode->tn_size = strlen(target);
191 nnode->tn_spec.tn_lnk.tn_link =
192 tmpfs_str_pool_get(&tmp->tm_str_pool, nnode->tn_size, 0);
193 if (nnode->tn_spec.tn_lnk.tn_link == NULL) {
194 nnode->tn_type = VNON;
195 tmpfs_free_node(tmp, nnode);
196 return ENOSPC;
197 }
198 memcpy(nnode->tn_spec.tn_lnk.tn_link, target, nnode->tn_size);
199 break;
200
201 case VREG:
202 nnode->tn_spec.tn_reg.tn_aobj =
203 uao_create(INT32_MAX - PAGE_SIZE, 0);
204 nnode->tn_spec.tn_reg.tn_aobj_pages = 0;
205 break;
206
207 default:
208 KASSERT(0);
209 }
210
211 *node = nnode;
212 return 0;
213 }
214
215 /* --------------------------------------------------------------------- */
216
217 /*
218 * Destroys the node pointed to by node from the file system 'tmp'.
219 * If the node does not belong to the given mount point, the results are
220 * unpredicted.
221 *
222 * If the node references a directory; no entries are allowed because
223 * their removal could need a recursive algorithm, something forbidden in
224 * kernel space. Furthermore, there is not need to provide such
225 * functionality (recursive removal) because the only primitives offered
226 * to the user are the removal of empty directories and the deletion of
227 * individual files.
228 *
229 * Note that nodes are not really deleted; in fact, when a node has been
230 * allocated, it cannot be deleted during the whole life of the file
231 * system. Instead, they are moved to the available list and remain there
232 * until reused.
233 */
234 void
235 tmpfs_free_node(struct tmpfs_mount *tmp, struct tmpfs_node *node)
236 {
237 size_t pages;
238
239 if (node->tn_type == VREG)
240 pages = node->tn_spec.tn_reg.tn_aobj_pages;
241 else
242 pages = 0;
243
244 mutex_enter(&tmp->tm_lock);
245 tmp->tm_pages_used -= pages;
246 LIST_REMOVE(node, tn_entries);
247 mutex_exit(&tmp->tm_lock);
248
249 switch (node->tn_type) {
250 case VLNK:
251 tmpfs_str_pool_put(&tmp->tm_str_pool,
252 node->tn_spec.tn_lnk.tn_link, node->tn_size);
253 break;
254
255 case VREG:
256 if (node->tn_spec.tn_reg.tn_aobj != NULL)
257 uao_detach(node->tn_spec.tn_reg.tn_aobj);
258 break;
259
260 default:
261 break;
262 }
263
264 mutex_enter(&tmp->tm_lock);
265 node->tn_type = VNON;
266 LIST_INSERT_HEAD(&tmp->tm_nodes_avail, node, tn_entries);
267 mutex_exit(&tmp->tm_lock);
268 }
269
270 /* --------------------------------------------------------------------- */
271
272 /*
273 * Allocates a new directory entry for the node node with a name of name.
274 * The new directory entry is returned in *de.
275 *
276 * The link count of node is increased by one to reflect the new object
277 * referencing it. This takes care of notifying kqueue listeners about
278 * this change.
279 *
280 * Returns zero on success or an appropriate error code on failure.
281 */
282 int
283 tmpfs_alloc_dirent(struct tmpfs_mount *tmp, struct tmpfs_node *node,
284 const char *name, uint16_t len, struct tmpfs_dirent **de)
285 {
286 struct tmpfs_dirent *nde;
287
288 nde = (struct tmpfs_dirent *)TMPFS_POOL_GET(&tmp->tm_dirent_pool, 0);
289 if (nde == NULL)
290 return ENOSPC;
291
292 nde->td_name = tmpfs_str_pool_get(&tmp->tm_str_pool, len, 0);
293 if (nde->td_name == NULL) {
294 TMPFS_POOL_PUT(&tmp->tm_dirent_pool, nde);
295 return ENOSPC;
296 }
297 nde->td_namelen = len;
298 memcpy(nde->td_name, name, len);
299 nde->td_node = node;
300
301 node->tn_links++;
302 if (node->tn_links > 1 && node->tn_vnode != NULL)
303 VN_KNOTE(node->tn_vnode, NOTE_LINK);
304 *de = nde;
305
306 return 0;
307 }
308
309 /* --------------------------------------------------------------------- */
310
311 /*
312 * Frees a directory entry. It is the caller's responsibility to destroy
313 * the node referenced by it if needed.
314 *
315 * The link count of node is decreased by one to reflect the removal of an
316 * object that referenced it. This only happens if 'node_exists' is true;
317 * otherwise the function will not access the node referred to by the
318 * directory entry, as it may already have been released from the outside.
319 *
320 * Interested parties (kqueue) are notified of the link count change; note
321 * that this can include both the node pointed to by the directory entry
322 * as well as its parent.
323 */
324 void
325 tmpfs_free_dirent(struct tmpfs_mount *tmp, struct tmpfs_dirent *de,
326 bool node_exists)
327 {
328 if (node_exists) {
329 struct tmpfs_node *node;
330
331 node = de->td_node;
332
333 KASSERT(node->tn_links > 0);
334 node->tn_links--;
335 if (node->tn_vnode != NULL)
336 VN_KNOTE(node->tn_vnode, node->tn_links == 0 ?
337 NOTE_DELETE : NOTE_LINK);
338 if (node->tn_type == VDIR)
339 VN_KNOTE(node->tn_spec.tn_dir.tn_parent->tn_vnode,
340 NOTE_LINK);
341 }
342
343 tmpfs_str_pool_put(&tmp->tm_str_pool, de->td_name, de->td_namelen);
344 TMPFS_POOL_PUT(&tmp->tm_dirent_pool, de);
345 }
346
347 /* --------------------------------------------------------------------- */
348
349 /*
350 * Allocates a new vnode for the node node or returns a new reference to
351 * an existing one if the node had already a vnode referencing it. The
352 * resulting locked vnode is returned in *vpp.
353 *
354 * Returns zero on success or an appropriate error code on failure.
355 */
356 int
357 tmpfs_alloc_vp(struct mount *mp, struct tmpfs_node *node, struct vnode **vpp)
358 {
359 int error;
360 struct vnode *nvp;
361 struct vnode *vp;
362
363 mutex_enter(&node->tn_vlock);
364 if ((vp = node->tn_vnode) != NULL) {
365 mutex_enter(&vp->v_interlock);
366 mutex_exit(&node->tn_vlock);
367 vget(vp, LK_EXCLUSIVE | LK_RETRY | LK_INTERLOCK);
368 *vpp = vp;
369 return 0;
370 }
371
372 /* Get a new vnode and associate it with our node. */
373 error = getnewvnode(VT_TMPFS, mp, tmpfs_vnodeop_p, &vp);
374 if (error != 0) {
375 mutex_exit(&node->tn_vlock);
376 return error;
377 }
378
379 error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
380 if (error != 0) {
381 mutex_exit(&node->tn_vlock);
382 ungetnewvnode(vp);
383 return error;
384 }
385
386 vp->v_type = node->tn_type;
387
388 /* Type-specific initialization. */
389 switch (node->tn_type) {
390 case VBLK:
391 /* FALLTHROUGH */
392 case VCHR:
393 vp->v_op = tmpfs_specop_p;
394 nvp = checkalias(vp, node->tn_spec.tn_dev.tn_rdev, mp);
395 if (nvp != NULL) {
396 /* Discard unneeded vnode, but save its inode. */
397 nvp->v_data = node;
398
399 /* XXX spec_vnodeops has no locking, so we have to
400 * do it explicitly. */
401 VOP_UNLOCK(vp, 0);
402 vp->v_op = spec_vnodeop_p;
403 mutex_enter(&vp->v_interlock);
404 vp->v_iflag &= ~VI_LOCKSWORK;
405 mutex_exit(&vp->v_interlock);
406 vgone(vp);
407
408 /* Reinitialize aliased node. */
409 vp = nvp;
410 error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
411 if (error != 0) {
412 mutex_exit(&node->tn_vlock);
413 return error;
414 }
415 }
416 break;
417
418 case VDIR:
419 vp->v_vflag |= (node->tn_spec.tn_dir.tn_parent == node ? VV_ROOT : 0);
420 break;
421
422 case VFIFO:
423 vp->v_op = tmpfs_fifoop_p;
424 break;
425
426 case VLNK:
427 /* FALLTHROUGH */
428 case VREG:
429 /* FALLTHROUGH */
430 case VSOCK:
431 break;
432
433 default:
434 KASSERT(0);
435 }
436
437 uvm_vnp_setsize(vp, node->tn_size);
438 vp->v_data = node;
439 node->tn_vnode = vp;
440 mutex_exit(&node->tn_vlock);
441 *vpp = vp;
442
443 KASSERT(IFF(error == 0, *vpp != NULL && VOP_ISLOCKED(*vpp)));
444
445 return error;
446 }
447
448 /* --------------------------------------------------------------------- */
449
450 /*
451 * Destroys the association between the vnode vp and the node it
452 * references.
453 */
454 void
455 tmpfs_free_vp(struct vnode *vp)
456 {
457 struct tmpfs_node *node;
458
459 node = VP_TO_TMPFS_NODE(vp);
460
461 mutex_enter(&node->tn_vlock);
462 node->tn_vnode = NULL;
463 mutex_exit(&node->tn_vlock);
464 vp->v_data = NULL;
465 }
466
467 /* --------------------------------------------------------------------- */
468
469 /*
470 * Allocates a new file of type 'type' and adds it to the parent directory
471 * 'dvp'; this addition is done using the component name given in 'cnp'.
472 * The ownership of the new file is automatically assigned based on the
473 * credentials of the caller (through 'cnp'), the group is set based on
474 * the parent directory and the mode is determined from the 'vap' argument.
475 * If successful, *vpp holds a vnode to the newly created file and zero
476 * is returned. Otherwise *vpp is NULL and the function returns an
477 * appropriate error code.
478 */
479 int
480 tmpfs_alloc_file(struct vnode *dvp, struct vnode **vpp, struct vattr *vap,
481 struct componentname *cnp, char *target)
482 {
483 int error;
484 struct tmpfs_dirent *de;
485 struct tmpfs_mount *tmp;
486 struct tmpfs_node *dnode;
487 struct tmpfs_node *node;
488 struct tmpfs_node *parent;
489
490 KASSERT(VOP_ISLOCKED(dvp));
491 KASSERT(cnp->cn_flags & HASBUF);
492
493 tmp = VFS_TO_TMPFS(dvp->v_mount);
494 dnode = VP_TO_TMPFS_DIR(dvp);
495 *vpp = NULL;
496
497 /* If the entry we are creating is a directory, we cannot overflow
498 * the number of links of its parent, because it will get a new
499 * link. */
500 if (vap->va_type == VDIR) {
501 /* Ensure that we do not overflow the maximum number of links
502 * imposed by the system. */
503 KASSERT(dnode->tn_links <= LINK_MAX);
504 if (dnode->tn_links == LINK_MAX) {
505 error = EMLINK;
506 goto out;
507 }
508
509 parent = dnode;
510 } else
511 parent = NULL;
512
513 /* Allocate a node that represents the new file. */
514 error = tmpfs_alloc_node(tmp, vap->va_type, kauth_cred_geteuid(cnp->cn_cred),
515 dnode->tn_gid, vap->va_mode, parent, target, vap->va_rdev,
516 cnp->cn_lwp->l_proc, &node);
517 if (error != 0)
518 goto out;
519
520 /* Allocate a directory entry that points to the new file. */
521 error = tmpfs_alloc_dirent(tmp, node, cnp->cn_nameptr, cnp->cn_namelen,
522 &de);
523 if (error != 0) {
524 tmpfs_free_node(tmp, node);
525 goto out;
526 }
527
528 /* Allocate a vnode for the new file. */
529 error = tmpfs_alloc_vp(dvp->v_mount, node, vpp);
530 if (error != 0) {
531 tmpfs_free_dirent(tmp, de, true);
532 tmpfs_free_node(tmp, node);
533 goto out;
534 }
535
536 /* Now that all required items are allocated, we can proceed to
537 * insert the new node into the directory, an operation that
538 * cannot fail. */
539 tmpfs_dir_attach(dvp, de);
540
541 out:
542 if (error != 0 || !(cnp->cn_flags & SAVESTART))
543 PNBUF_PUT(cnp->cn_pnbuf);
544 vput(dvp);
545
546 KASSERT(IFF(error == 0, *vpp != NULL));
547
548 return error;
549 }
550
551 /* --------------------------------------------------------------------- */
552
553 /*
554 * Attaches the directory entry de to the directory represented by vp.
555 * Note that this does not change the link count of the node pointed by
556 * the directory entry, as this is done by tmpfs_alloc_dirent.
557 *
558 * As the "parent" directory changes, interested parties are notified of
559 * a write to it.
560 */
561 void
562 tmpfs_dir_attach(struct vnode *vp, struct tmpfs_dirent *de)
563 {
564 struct tmpfs_node *dnode;
565
566 dnode = VP_TO_TMPFS_DIR(vp);
567
568 TAILQ_INSERT_TAIL(&dnode->tn_spec.tn_dir.tn_dir, de, td_entries);
569 dnode->tn_size += sizeof(struct tmpfs_dirent);
570 dnode->tn_status |= TMPFS_NODE_ACCESSED | TMPFS_NODE_CHANGED | \
571 TMPFS_NODE_MODIFIED;
572 uvm_vnp_setsize(vp, dnode->tn_size);
573
574 VN_KNOTE(vp, NOTE_WRITE);
575 }
576
577 /* --------------------------------------------------------------------- */
578
579 /*
580 * Detaches the directory entry de from the directory represented by vp.
581 * Note that this does not change the link count of the node pointed by
582 * the directory entry, as this is done by tmpfs_free_dirent.
583 *
584 * As the "parent" directory changes, interested parties are notified of
585 * a write to it.
586 */
587 void
588 tmpfs_dir_detach(struct vnode *vp, struct tmpfs_dirent *de)
589 {
590 struct tmpfs_node *dnode;
591
592 KASSERT(VOP_ISLOCKED(vp));
593
594 dnode = VP_TO_TMPFS_DIR(vp);
595
596 if (dnode->tn_spec.tn_dir.tn_readdir_lastp == de) {
597 dnode->tn_spec.tn_dir.tn_readdir_lastn = 0;
598 dnode->tn_spec.tn_dir.tn_readdir_lastp = NULL;
599 }
600
601 TAILQ_REMOVE(&dnode->tn_spec.tn_dir.tn_dir, de, td_entries);
602 dnode->tn_size -= sizeof(struct tmpfs_dirent);
603 dnode->tn_status |= TMPFS_NODE_ACCESSED | TMPFS_NODE_CHANGED | \
604 TMPFS_NODE_MODIFIED;
605 uvm_vnp_setsize(vp, dnode->tn_size);
606
607 VN_KNOTE(vp, NOTE_WRITE);
608 }
609
610 /* --------------------------------------------------------------------- */
611
612 /*
613 * Looks for a directory entry in the directory represented by node.
614 * 'cnp' describes the name of the entry to look for. Note that the .
615 * and .. components are not allowed as they do not physically exist
616 * within directories.
617 *
618 * Returns a pointer to the entry when found, otherwise NULL.
619 */
620 struct tmpfs_dirent *
621 tmpfs_dir_lookup(struct tmpfs_node *node, struct componentname *cnp)
622 {
623 struct tmpfs_dirent *de;
624
625 KASSERT(IMPLIES(cnp->cn_namelen == 1, cnp->cn_nameptr[0] != '.'));
626 KASSERT(IMPLIES(cnp->cn_namelen == 2, !(cnp->cn_nameptr[0] == '.' &&
627 cnp->cn_nameptr[1] == '.')));
628 TMPFS_VALIDATE_DIR(node);
629
630 node->tn_status |= TMPFS_NODE_ACCESSED;
631
632 TAILQ_FOREACH(de, &node->tn_spec.tn_dir.tn_dir, td_entries) {
633 KASSERT(cnp->cn_namelen < 0xffff);
634 if (de->td_namelen == (uint16_t)cnp->cn_namelen &&
635 memcmp(de->td_name, cnp->cn_nameptr, de->td_namelen) == 0)
636 break;
637 }
638
639 return de;
640 }
641
642 /* --------------------------------------------------------------------- */
643
644 /*
645 * Helper function for tmpfs_readdir. Creates a '.' entry for the given
646 * directory and returns it in the uio space. The function returns 0
647 * on success, -1 if there was not enough space in the uio structure to
648 * hold the directory entry or an appropriate error code if another
649 * error happens.
650 */
651 int
652 tmpfs_dir_getdotdent(struct tmpfs_node *node, struct uio *uio)
653 {
654 int error;
655 struct dirent dent;
656
657 TMPFS_VALIDATE_DIR(node);
658 KASSERT(uio->uio_offset == TMPFS_DIRCOOKIE_DOT);
659
660 dent.d_fileno = node->tn_id;
661 dent.d_type = DT_DIR;
662 dent.d_namlen = 1;
663 dent.d_name[0] = '.';
664 dent.d_name[1] = '\0';
665 dent.d_reclen = _DIRENT_SIZE(&dent);
666
667 if (dent.d_reclen > uio->uio_resid)
668 error = -1;
669 else {
670 error = uiomove(&dent, dent.d_reclen, uio);
671 if (error == 0)
672 uio->uio_offset = TMPFS_DIRCOOKIE_DOTDOT;
673 }
674
675 node->tn_status |= TMPFS_NODE_ACCESSED;
676
677 return error;
678 }
679
680 /* --------------------------------------------------------------------- */
681
682 /*
683 * Helper function for tmpfs_readdir. Creates a '..' entry for the given
684 * directory and returns it in the uio space. The function returns 0
685 * on success, -1 if there was not enough space in the uio structure to
686 * hold the directory entry or an appropriate error code if another
687 * error happens.
688 */
689 int
690 tmpfs_dir_getdotdotdent(struct tmpfs_node *node, struct uio *uio)
691 {
692 int error;
693 struct dirent dent;
694
695 TMPFS_VALIDATE_DIR(node);
696 KASSERT(uio->uio_offset == TMPFS_DIRCOOKIE_DOTDOT);
697
698 dent.d_fileno = node->tn_spec.tn_dir.tn_parent->tn_id;
699 dent.d_type = DT_DIR;
700 dent.d_namlen = 2;
701 dent.d_name[0] = '.';
702 dent.d_name[1] = '.';
703 dent.d_name[2] = '\0';
704 dent.d_reclen = _DIRENT_SIZE(&dent);
705
706 if (dent.d_reclen > uio->uio_resid)
707 error = -1;
708 else {
709 error = uiomove(&dent, dent.d_reclen, uio);
710 if (error == 0) {
711 struct tmpfs_dirent *de;
712
713 de = TAILQ_FIRST(&node->tn_spec.tn_dir.tn_dir);
714 if (de == NULL)
715 uio->uio_offset = TMPFS_DIRCOOKIE_EOF;
716 else
717 uio->uio_offset = tmpfs_dircookie(de);
718 }
719 }
720
721 node->tn_status |= TMPFS_NODE_ACCESSED;
722
723 return error;
724 }
725
726 /* --------------------------------------------------------------------- */
727
728 /*
729 * Lookup a directory entry by its associated cookie.
730 */
731 struct tmpfs_dirent *
732 tmpfs_dir_lookupbycookie(struct tmpfs_node *node, off_t cookie)
733 {
734 struct tmpfs_dirent *de;
735
736 if (cookie == node->tn_spec.tn_dir.tn_readdir_lastn &&
737 node->tn_spec.tn_dir.tn_readdir_lastp != NULL) {
738 return node->tn_spec.tn_dir.tn_readdir_lastp;
739 }
740
741 TAILQ_FOREACH(de, &node->tn_spec.tn_dir.tn_dir, td_entries) {
742 if (tmpfs_dircookie(de) == cookie) {
743 break;
744 }
745 }
746
747 return de;
748 }
749
750 /* --------------------------------------------------------------------- */
751
752 /*
753 * Helper function for tmpfs_readdir. Returns as much directory entries
754 * as can fit in the uio space. The read starts at uio->uio_offset.
755 * The function returns 0 on success, -1 if there was not enough space
756 * in the uio structure to hold the directory entry or an appropriate
757 * error code if another error happens.
758 */
759 int
760 tmpfs_dir_getdents(struct tmpfs_node *node, struct uio *uio, off_t *cntp)
761 {
762 int error;
763 off_t startcookie;
764 struct tmpfs_dirent *de;
765
766 TMPFS_VALIDATE_DIR(node);
767
768 /* Locate the first directory entry we have to return. We have cached
769 * the last readdir in the node, so use those values if appropriate.
770 * Otherwise do a linear scan to find the requested entry. */
771 startcookie = uio->uio_offset;
772 KASSERT(startcookie != TMPFS_DIRCOOKIE_DOT);
773 KASSERT(startcookie != TMPFS_DIRCOOKIE_DOTDOT);
774 if (startcookie == TMPFS_DIRCOOKIE_EOF) {
775 return 0;
776 } else {
777 de = tmpfs_dir_lookupbycookie(node, startcookie);
778 }
779 if (de == NULL) {
780 return EINVAL;
781 }
782
783 /* Read as much entries as possible; i.e., until we reach the end of
784 * the directory or we exhaust uio space. */
785 do {
786 struct dirent d;
787
788 /* Create a dirent structure representing the current
789 * tmpfs_node and fill it. */
790 d.d_fileno = de->td_node->tn_id;
791 switch (de->td_node->tn_type) {
792 case VBLK:
793 d.d_type = DT_BLK;
794 break;
795
796 case VCHR:
797 d.d_type = DT_CHR;
798 break;
799
800 case VDIR:
801 d.d_type = DT_DIR;
802 break;
803
804 case VFIFO:
805 d.d_type = DT_FIFO;
806 break;
807
808 case VLNK:
809 d.d_type = DT_LNK;
810 break;
811
812 case VREG:
813 d.d_type = DT_REG;
814 break;
815
816 case VSOCK:
817 d.d_type = DT_SOCK;
818 break;
819
820 default:
821 KASSERT(0);
822 }
823 d.d_namlen = de->td_namelen;
824 KASSERT(de->td_namelen < sizeof(d.d_name));
825 (void)memcpy(d.d_name, de->td_name, de->td_namelen);
826 d.d_name[de->td_namelen] = '\0';
827 d.d_reclen = _DIRENT_SIZE(&d);
828
829 /* Stop reading if the directory entry we are treating is
830 * bigger than the amount of data that can be returned. */
831 if (d.d_reclen > uio->uio_resid) {
832 error = -1;
833 break;
834 }
835
836 /* Copy the new dirent structure into the output buffer and
837 * advance pointers. */
838 error = uiomove(&d, d.d_reclen, uio);
839
840 (*cntp)++;
841 de = TAILQ_NEXT(de, td_entries);
842 } while (error == 0 && uio->uio_resid > 0 && de != NULL);
843
844 /* Update the offset and cache. */
845 if (de == NULL) {
846 uio->uio_offset = TMPFS_DIRCOOKIE_EOF;
847 node->tn_spec.tn_dir.tn_readdir_lastn = 0;
848 node->tn_spec.tn_dir.tn_readdir_lastp = NULL;
849 } else {
850 node->tn_spec.tn_dir.tn_readdir_lastn = uio->uio_offset =
851 tmpfs_dircookie(de);
852 node->tn_spec.tn_dir.tn_readdir_lastp = de;
853 }
854
855 node->tn_status |= TMPFS_NODE_ACCESSED;
856
857 return error;
858 }
859
860 /* --------------------------------------------------------------------- */
861
862 /*
863 * Resizes the aobj associated to the regular file pointed to by vp to
864 * the size newsize. 'vp' must point to a vnode that represents a regular
865 * file. 'newsize' must be positive.
866 *
867 * If the file is extended, the appropriate kevent is raised. This does
868 * not rise a write event though because resizing is not the same as
869 * writing.
870 *
871 * Returns zero on success or an appropriate error code on failure.
872 */
873 int
874 tmpfs_reg_resize(struct vnode *vp, off_t newsize)
875 {
876 int error;
877 size_t newpages, oldpages;
878 struct tmpfs_mount *tmp;
879 struct tmpfs_node *node;
880 off_t oldsize;
881
882 KASSERT(vp->v_type == VREG);
883 KASSERT(newsize >= 0);
884
885 node = VP_TO_TMPFS_NODE(vp);
886 tmp = VFS_TO_TMPFS(vp->v_mount);
887
888 /* Convert the old and new sizes to the number of pages needed to
889 * store them. It may happen that we do not need to do anything
890 * because the last allocated page can accommodate the change on
891 * its own. */
892 oldsize = node->tn_size;
893 oldpages = round_page(oldsize) / PAGE_SIZE;
894 KASSERT(oldpages == node->tn_spec.tn_reg.tn_aobj_pages);
895 newpages = round_page(newsize) / PAGE_SIZE;
896
897 mutex_enter(&tmp->tm_lock);
898 if (newpages > oldpages &&
899 newpages - oldpages > TMPFS_PAGES_AVAIL(tmp)) {
900 error = ENOSPC;
901 mutex_exit(&tmp->tm_lock);
902 goto out;
903 }
904 tmp->tm_pages_used += (newpages - oldpages);
905 mutex_exit(&tmp->tm_lock);
906
907 if (newsize < oldsize) {
908 int zerolen = MIN(round_page(newsize), node->tn_size) - newsize;
909
910 /*
911 * free "backing store"
912 */
913
914 if (newpages < oldpages) {
915 struct uvm_object *uobj;
916
917 uobj = node->tn_spec.tn_reg.tn_aobj;
918
919 mutex_enter(&uobj->vmobjlock);
920 uao_dropswap_range(uobj, newpages, oldpages);
921 mutex_exit(&uobj->vmobjlock);
922 }
923
924 /*
925 * zero out the truncated part of the last page.
926 */
927
928 uvm_vnp_zerorange(vp, newsize, zerolen);
929 }
930
931 node->tn_spec.tn_reg.tn_aobj_pages = newpages;
932 node->tn_size = newsize;
933 uvm_vnp_setsize(vp, newsize);
934
935 error = 0;
936
937 if (newsize > oldsize)
938 VN_KNOTE(vp, NOTE_EXTEND);
939
940 out:
941 return error;
942 }
943
944 /* --------------------------------------------------------------------- */
945
946 /*
947 * Returns information about the number of available memory pages,
948 * including physical and virtual ones.
949 *
950 * If 'total' is true, the value returned is the total amount of memory
951 * pages configured for the system (either in use or free).
952 * If it is FALSE, the value returned is the amount of free memory pages.
953 *
954 * Remember to remove TMPFS_PAGES_RESERVED from the returned value to avoid
955 * excessive memory usage.
956 *
957 */
958 size_t
959 tmpfs_mem_info(bool total)
960 {
961 size_t size;
962
963 size = 0;
964 size += uvmexp.swpgavail;
965 if (!total) {
966 size -= uvmexp.swpgonly;
967 }
968 size += uvmexp.free;
969 size += uvmexp.filepages;
970 if (size > uvmexp.wired) {
971 size -= uvmexp.wired;
972 } else {
973 size = 0;
974 }
975
976 return size;
977 }
978
979 /* --------------------------------------------------------------------- */
980
981 /*
982 * Change flags of the given vnode.
983 * Caller should execute tmpfs_update on vp after a successful execution.
984 * The vnode must be locked on entry and remain locked on exit.
985 */
986 int
987 tmpfs_chflags(struct vnode *vp, int flags, kauth_cred_t cred, struct lwp *l)
988 {
989 int error;
990 struct tmpfs_node *node;
991
992 KASSERT(VOP_ISLOCKED(vp));
993
994 node = VP_TO_TMPFS_NODE(vp);
995
996 /* Disallow this operation if the file system is mounted read-only. */
997 if (vp->v_mount->mnt_flag & MNT_RDONLY)
998 return EROFS;
999
1000 /* XXX: The following comes from UFS code, and can be found in
1001 * several other file systems. Shouldn't this be centralized
1002 * somewhere? */
1003 if (kauth_cred_geteuid(cred) != node->tn_uid &&
1004 (error = kauth_authorize_generic(cred, KAUTH_GENERIC_ISSUSER,
1005 NULL)))
1006 return error;
1007 if (kauth_authorize_generic(cred, KAUTH_GENERIC_ISSUSER, NULL) == 0) {
1008 /* The super-user is only allowed to change flags if the file
1009 * wasn't protected before and the securelevel is zero. */
1010 if ((node->tn_flags & (SF_IMMUTABLE | SF_APPEND)) &&
1011 kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_CHSYSFLAGS,
1012 0, NULL, NULL, NULL))
1013 return EPERM;
1014 node->tn_flags = flags;
1015 } else {
1016 /* Regular users can change flags provided they only want to
1017 * change user-specific ones, not those reserved for the
1018 * super-user. */
1019 if ((node->tn_flags & (SF_IMMUTABLE | SF_APPEND)) ||
1020 (flags & UF_SETTABLE) != flags)
1021 return EPERM;
1022 if ((node->tn_flags & SF_SETTABLE) != (flags & SF_SETTABLE))
1023 return EPERM;
1024 node->tn_flags &= SF_SETTABLE;
1025 node->tn_flags |= (flags & UF_SETTABLE);
1026 }
1027
1028 node->tn_status |= TMPFS_NODE_CHANGED;
1029 VN_KNOTE(vp, NOTE_ATTRIB);
1030
1031 KASSERT(VOP_ISLOCKED(vp));
1032
1033 return 0;
1034 }
1035
1036 /* --------------------------------------------------------------------- */
1037
1038 /*
1039 * Change access mode on the given vnode.
1040 * Caller should execute tmpfs_update on vp after a successful execution.
1041 * The vnode must be locked on entry and remain locked on exit.
1042 */
1043 int
1044 tmpfs_chmod(struct vnode *vp, mode_t mode, kauth_cred_t cred, struct lwp *l)
1045 {
1046 int error, ismember = 0;
1047 struct tmpfs_node *node;
1048
1049 KASSERT(VOP_ISLOCKED(vp));
1050
1051 node = VP_TO_TMPFS_NODE(vp);
1052
1053 /* Disallow this operation if the file system is mounted read-only. */
1054 if (vp->v_mount->mnt_flag & MNT_RDONLY)
1055 return EROFS;
1056
1057 /* Immutable or append-only files cannot be modified, either. */
1058 if (node->tn_flags & (IMMUTABLE | APPEND))
1059 return EPERM;
1060
1061 /* XXX: The following comes from UFS code, and can be found in
1062 * several other file systems. Shouldn't this be centralized
1063 * somewhere? */
1064 if (kauth_cred_geteuid(cred) != node->tn_uid &&
1065 (error = kauth_authorize_generic(cred, KAUTH_GENERIC_ISSUSER,
1066 NULL)))
1067 return error;
1068 if (kauth_authorize_generic(cred, KAUTH_GENERIC_ISSUSER, NULL) != 0) {
1069 if (vp->v_type != VDIR && (mode & S_ISTXT))
1070 return EFTYPE;
1071
1072 if ((kauth_cred_ismember_gid(cred, node->tn_gid,
1073 &ismember) != 0 || !ismember) && (mode & S_ISGID))
1074 return EPERM;
1075 }
1076
1077 node->tn_mode = (mode & ALLPERMS);
1078
1079 node->tn_status |= TMPFS_NODE_CHANGED;
1080 VN_KNOTE(vp, NOTE_ATTRIB);
1081
1082 KASSERT(VOP_ISLOCKED(vp));
1083
1084 return 0;
1085 }
1086
1087 /* --------------------------------------------------------------------- */
1088
1089 /*
1090 * Change ownership of the given vnode. At least one of uid or gid must
1091 * be different than VNOVAL. If one is set to that value, the attribute
1092 * is unchanged.
1093 * Caller should execute tmpfs_update on vp after a successful execution.
1094 * The vnode must be locked on entry and remain locked on exit.
1095 */
1096 int
1097 tmpfs_chown(struct vnode *vp, uid_t uid, gid_t gid, kauth_cred_t cred,
1098 struct lwp *l)
1099 {
1100 int error, ismember = 0;
1101 struct tmpfs_node *node;
1102
1103 KASSERT(VOP_ISLOCKED(vp));
1104
1105 node = VP_TO_TMPFS_NODE(vp);
1106
1107 /* Assign default values if they are unknown. */
1108 KASSERT(uid != VNOVAL || gid != VNOVAL);
1109 if (uid == VNOVAL)
1110 uid = node->tn_uid;
1111 if (gid == VNOVAL)
1112 gid = node->tn_gid;
1113 KASSERT(uid != VNOVAL && gid != VNOVAL);
1114
1115 /* Disallow this operation if the file system is mounted read-only. */
1116 if (vp->v_mount->mnt_flag & MNT_RDONLY)
1117 return EROFS;
1118
1119 /* Immutable or append-only files cannot be modified, either. */
1120 if (node->tn_flags & (IMMUTABLE | APPEND))
1121 return EPERM;
1122
1123 /* XXX: The following comes from UFS code, and can be found in
1124 * several other file systems. Shouldn't this be centralized
1125 * somewhere? */
1126 if ((kauth_cred_geteuid(cred) != node->tn_uid || uid != node->tn_uid ||
1127 (gid != node->tn_gid && !(kauth_cred_getegid(cred) == node->tn_gid ||
1128 (kauth_cred_ismember_gid(cred, gid, &ismember) == 0 && ismember)))) &&
1129 ((error = kauth_authorize_generic(cred, KAUTH_GENERIC_ISSUSER,
1130 NULL)) != 0))
1131 return error;
1132
1133 node->tn_uid = uid;
1134 node->tn_gid = gid;
1135
1136 node->tn_status |= TMPFS_NODE_CHANGED;
1137 VN_KNOTE(vp, NOTE_ATTRIB);
1138
1139 KASSERT(VOP_ISLOCKED(vp));
1140
1141 return 0;
1142 }
1143
1144 /* --------------------------------------------------------------------- */
1145
1146 /*
1147 * Change size of the given vnode.
1148 * Caller should execute tmpfs_update on vp after a successful execution.
1149 * The vnode must be locked on entry and remain locked on exit.
1150 */
1151 int
1152 tmpfs_chsize(struct vnode *vp, u_quad_t size, kauth_cred_t cred,
1153 struct lwp *l)
1154 {
1155 int error;
1156 struct tmpfs_node *node;
1157
1158 KASSERT(VOP_ISLOCKED(vp));
1159
1160 node = VP_TO_TMPFS_NODE(vp);
1161
1162 /* Decide whether this is a valid operation based on the file type. */
1163 error = 0;
1164 switch (vp->v_type) {
1165 case VDIR:
1166 return EISDIR;
1167
1168 case VREG:
1169 if (vp->v_mount->mnt_flag & MNT_RDONLY)
1170 return EROFS;
1171 break;
1172
1173 case VBLK:
1174 /* FALLTHROUGH */
1175 case VCHR:
1176 /* FALLTHROUGH */
1177 case VFIFO:
1178 /* Allow modifications of special files even if in the file
1179 * system is mounted read-only (we are not modifying the
1180 * files themselves, but the objects they represent). */
1181 return 0;
1182
1183 default:
1184 /* Anything else is unsupported. */
1185 return EOPNOTSUPP;
1186 }
1187
1188 /* Immutable or append-only files cannot be modified, either. */
1189 if (node->tn_flags & (IMMUTABLE | APPEND))
1190 return EPERM;
1191
1192 error = tmpfs_truncate(vp, size);
1193 /* tmpfs_truncate will raise the NOTE_EXTEND and NOTE_ATTRIB kevents
1194 * for us, as will update tn_status; no need to do that here. */
1195
1196 KASSERT(VOP_ISLOCKED(vp));
1197
1198 return error;
1199 }
1200
1201 /* --------------------------------------------------------------------- */
1202
1203 /*
1204 * Change access and modification times of the given vnode.
1205 * Caller should execute tmpfs_update on vp after a successful execution.
1206 * The vnode must be locked on entry and remain locked on exit.
1207 */
1208 int
1209 tmpfs_chtimes(struct vnode *vp, struct timespec *atime, struct timespec *mtime,
1210 int vaflags, kauth_cred_t cred, struct lwp *l)
1211 {
1212 int error;
1213 struct tmpfs_node *node;
1214
1215 KASSERT(VOP_ISLOCKED(vp));
1216
1217 node = VP_TO_TMPFS_NODE(vp);
1218
1219 /* Disallow this operation if the file system is mounted read-only. */
1220 if (vp->v_mount->mnt_flag & MNT_RDONLY)
1221 return EROFS;
1222
1223 /* Immutable or append-only files cannot be modified, either. */
1224 if (node->tn_flags & (IMMUTABLE | APPEND))
1225 return EPERM;
1226
1227 /* XXX: The following comes from UFS code, and can be found in
1228 * several other file systems. Shouldn't this be centralized
1229 * somewhere? */
1230 if (kauth_cred_geteuid(cred) != node->tn_uid &&
1231 (error = kauth_authorize_generic(cred, KAUTH_GENERIC_ISSUSER,
1232 NULL)) && ((vaflags & VA_UTIMES_NULL) == 0 ||
1233 (error = VOP_ACCESS(vp, VWRITE, cred, l))))
1234 return error;
1235
1236 if (atime->tv_sec != VNOVAL && atime->tv_nsec != VNOVAL)
1237 node->tn_status |= TMPFS_NODE_ACCESSED;
1238
1239 if (mtime->tv_sec != VNOVAL && mtime->tv_nsec != VNOVAL)
1240 node->tn_status |= TMPFS_NODE_MODIFIED;
1241
1242 tmpfs_update(vp, atime, mtime, 0);
1243 VN_KNOTE(vp, NOTE_ATTRIB);
1244
1245 KASSERT(VOP_ISLOCKED(vp));
1246
1247 return 0;
1248 }
1249
1250 /* --------------------------------------------------------------------- */
1251
1252 /* Sync timestamps */
1253 void
1254 tmpfs_itimes(struct vnode *vp, const struct timespec *acc,
1255 const struct timespec *mod)
1256 {
1257 struct timespec now;
1258 struct tmpfs_node *node;
1259
1260 node = VP_TO_TMPFS_NODE(vp);
1261
1262 if ((node->tn_status & (TMPFS_NODE_ACCESSED | TMPFS_NODE_MODIFIED |
1263 TMPFS_NODE_CHANGED)) == 0)
1264 return;
1265
1266 getnanotime(&now);
1267 if (node->tn_status & TMPFS_NODE_ACCESSED) {
1268 if (acc == NULL)
1269 acc = &now;
1270 node->tn_atime = *acc;
1271 }
1272 if (node->tn_status & TMPFS_NODE_MODIFIED) {
1273 if (mod == NULL)
1274 mod = &now;
1275 node->tn_mtime = *mod;
1276 }
1277 if (node->tn_status & TMPFS_NODE_CHANGED)
1278 node->tn_ctime = now;
1279
1280 node->tn_status &=
1281 ~(TMPFS_NODE_ACCESSED | TMPFS_NODE_MODIFIED | TMPFS_NODE_CHANGED);
1282 }
1283
1284 /* --------------------------------------------------------------------- */
1285
1286 void
1287 tmpfs_update(struct vnode *vp, const struct timespec *acc,
1288 const struct timespec *mod, int flags)
1289 {
1290
1291 struct tmpfs_node *node;
1292
1293 KASSERT(VOP_ISLOCKED(vp));
1294
1295 node = VP_TO_TMPFS_NODE(vp);
1296
1297 #if 0
1298 if (flags & UPDATE_CLOSE)
1299 ; /* XXX Need to do anything special? */
1300 #endif
1301
1302 tmpfs_itimes(vp, acc, mod);
1303
1304 KASSERT(VOP_ISLOCKED(vp));
1305 }
1306
1307 /* --------------------------------------------------------------------- */
1308
1309 int
1310 tmpfs_truncate(struct vnode *vp, off_t length)
1311 {
1312 bool extended;
1313 int error;
1314 struct tmpfs_node *node;
1315
1316 node = VP_TO_TMPFS_NODE(vp);
1317 extended = length > node->tn_size;
1318
1319 if (length < 0) {
1320 error = EINVAL;
1321 goto out;
1322 }
1323
1324 if (node->tn_size == length) {
1325 error = 0;
1326 goto out;
1327 }
1328
1329 error = tmpfs_reg_resize(vp, length);
1330 if (error == 0)
1331 node->tn_status |= TMPFS_NODE_CHANGED | TMPFS_NODE_MODIFIED;
1332
1333 out:
1334 tmpfs_update(vp, NULL, NULL, 0);
1335
1336 return error;
1337 }
1338