tmpfs_subr.c revision 1.82 1 /* $NetBSD: tmpfs_subr.c,v 1.82 2013/11/01 15:38:45 rmind Exp $ */
2
3 /*
4 * Copyright (c) 2005-2011 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Julio M. Merino Vidal, developed as part of Google's Summer of Code
9 * 2005 program, and by Mindaugas Rasiukevicius.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 * POSSIBILITY OF SUCH DAMAGE.
31 */
32
33 /*
34 * Efficient memory file system: interfaces for inode and directory entry
35 * construction, destruction and manipulation.
36 *
37 * Reference counting
38 *
39 * The link count of inode (tmpfs_node_t::tn_links) is used as a
40 * reference counter. However, it has slightly different semantics.
41 *
42 * For directories - link count represents directory entries, which
43 * refer to the directories. In other words, it represents the count
44 * of sub-directories. It also takes into account the virtual '.'
45 * entry (which has no real entry in the list). For files - link count
46 * represents the hard links. Since only empty directories can be
47 * removed - link count aligns the reference counting requirements
48 * enough. Note: to check whether directory is not empty, the inode
49 * size (tmpfs_node_t::tn_size) can be used.
50 *
51 * The inode itself, as an object, gathers its first reference when
52 * directory entry is attached via tmpfs_dir_attach(9). For instance,
53 * after regular tmpfs_create(), a file would have a link count of 1,
54 * while directory after tmpfs_mkdir() would have 2 (due to '.').
55 *
56 * Reclamation
57 *
58 * It should be noted that tmpfs inodes rely on a combination of vnode
59 * reference counting and link counting. That is, an inode can only be
60 * destroyed if its associated vnode is inactive. The destruction is
61 * done on vnode reclamation i.e. tmpfs_reclaim(). It should be noted
62 * that tmpfs_node_t::tn_links being 0 is a destruction criterion.
63 *
64 * If an inode has references within the file system (tn_links > 0) and
65 * its inactive vnode gets reclaimed/recycled - then the association is
66 * broken in tmpfs_reclaim(). In such case, an inode will always pass
67 * tmpfs_lookup() and thus tmpfs_vnode_get() to associate a new vnode.
68 *
69 * Lock order
70 *
71 * tmpfs_node_t::tn_vlock ->
72 * vnode_t::v_vlock ->
73 * vnode_t::v_interlock
74 */
75
76 #include <sys/cdefs.h>
77 __KERNEL_RCSID(0, "$NetBSD: tmpfs_subr.c,v 1.82 2013/11/01 15:38:45 rmind Exp $");
78
79 #include <sys/param.h>
80 #include <sys/dirent.h>
81 #include <sys/event.h>
82 #include <sys/kmem.h>
83 #include <sys/mount.h>
84 #include <sys/namei.h>
85 #include <sys/time.h>
86 #include <sys/stat.h>
87 #include <sys/systm.h>
88 #include <sys/vnode.h>
89 #include <sys/kauth.h>
90 #include <sys/atomic.h>
91
92 #include <uvm/uvm.h>
93
94 #include <miscfs/specfs/specdev.h>
95 #include <miscfs/genfs/genfs.h>
96 #include <fs/tmpfs/tmpfs.h>
97 #include <fs/tmpfs/tmpfs_fifoops.h>
98 #include <fs/tmpfs/tmpfs_specops.h>
99 #include <fs/tmpfs/tmpfs_vnops.h>
100
101 /*
102 * tmpfs_alloc_node: allocate a new inode of a specified type and
103 * insert it into the list of specified mount point.
104 */
105 int
106 tmpfs_alloc_node(tmpfs_mount_t *tmp, enum vtype type, uid_t uid, gid_t gid,
107 mode_t mode, char *target, dev_t rdev, tmpfs_node_t **node)
108 {
109 tmpfs_node_t *nnode;
110
111 nnode = tmpfs_node_get(tmp);
112 if (nnode == NULL) {
113 return ENOSPC;
114 }
115
116 /* Initially, no references and no associations. */
117 nnode->tn_links = 0;
118 nnode->tn_vnode = NULL;
119 nnode->tn_dirent_hint = NULL;
120
121 /*
122 * XXX Where the pool is backed by a map larger than (4GB *
123 * sizeof(*nnode)), this may produce duplicate inode numbers
124 * for applications that do not understand 64-bit ino_t.
125 */
126 nnode->tn_id = (ino_t)((uintptr_t)nnode / sizeof(*nnode));
127 nnode->tn_gen = TMPFS_NODE_GEN_MASK & random();
128
129 /* Generic initialization. */
130 nnode->tn_type = type;
131 nnode->tn_size = 0;
132 nnode->tn_status = 0;
133 nnode->tn_flags = 0;
134 nnode->tn_lockf = NULL;
135
136 vfs_timestamp(&nnode->tn_atime);
137 nnode->tn_birthtime = nnode->tn_atime;
138 nnode->tn_ctime = nnode->tn_atime;
139 nnode->tn_mtime = nnode->tn_atime;
140
141 KASSERT(uid != VNOVAL && gid != VNOVAL && mode != VNOVAL);
142 nnode->tn_uid = uid;
143 nnode->tn_gid = gid;
144 nnode->tn_mode = mode;
145
146 /* Type-specific initialization. */
147 switch (nnode->tn_type) {
148 case VBLK:
149 case VCHR:
150 /* Character/block special device. */
151 KASSERT(rdev != VNOVAL);
152 nnode->tn_spec.tn_dev.tn_rdev = rdev;
153 break;
154 case VDIR:
155 /* Directory. */
156 TAILQ_INIT(&nnode->tn_spec.tn_dir.tn_dir);
157 nnode->tn_spec.tn_dir.tn_parent = NULL;
158 nnode->tn_spec.tn_dir.tn_readdir_lastn = 0;
159 nnode->tn_spec.tn_dir.tn_readdir_lastp = NULL;
160
161 /* Extra link count for the virtual '.' entry. */
162 nnode->tn_links++;
163 break;
164 case VFIFO:
165 case VSOCK:
166 break;
167 case VLNK:
168 /* Symbolic link. Target specifies the file name. */
169 KASSERT(target != NULL);
170 nnode->tn_size = strlen(target);
171
172 if (nnode->tn_size == 0) {
173 /* Zero-length targets are supported. */
174 nnode->tn_spec.tn_lnk.tn_link = NULL;
175 break;
176 }
177
178 KASSERT(nnode->tn_size < MAXPATHLEN);
179 nnode->tn_size++; /* include the NUL terminator */
180
181 nnode->tn_spec.tn_lnk.tn_link =
182 tmpfs_strname_alloc(tmp, nnode->tn_size);
183 if (nnode->tn_spec.tn_lnk.tn_link == NULL) {
184 tmpfs_node_put(tmp, nnode);
185 return ENOSPC;
186 }
187 memcpy(nnode->tn_spec.tn_lnk.tn_link, target, nnode->tn_size);
188 break;
189 case VREG:
190 /* Regular file. Create an underlying UVM object. */
191 nnode->tn_spec.tn_reg.tn_aobj =
192 uao_create(INT32_MAX - PAGE_SIZE, 0);
193 nnode->tn_spec.tn_reg.tn_aobj_pages = 0;
194 break;
195 default:
196 KASSERT(false);
197 }
198
199 mutex_init(&nnode->tn_vlock, MUTEX_DEFAULT, IPL_NONE);
200
201 mutex_enter(&tmp->tm_lock);
202 LIST_INSERT_HEAD(&tmp->tm_nodes, nnode, tn_entries);
203 mutex_exit(&tmp->tm_lock);
204
205 *node = nnode;
206 return 0;
207 }
208
209 /*
210 * tmpfs_free_node: remove the inode from a list in the mount point and
211 * destroy the inode structures.
212 */
213 void
214 tmpfs_free_node(tmpfs_mount_t *tmp, tmpfs_node_t *node)
215 {
216 size_t objsz;
217
218 mutex_enter(&tmp->tm_lock);
219 LIST_REMOVE(node, tn_entries);
220 mutex_exit(&tmp->tm_lock);
221
222 switch (node->tn_type) {
223 case VLNK:
224 if (node->tn_size > 0) {
225 tmpfs_strname_free(tmp, node->tn_spec.tn_lnk.tn_link,
226 node->tn_size);
227 }
228 break;
229 case VREG:
230 /*
231 * Calculate the size of inode data, decrease the used-memory
232 * counter, and destroy the unerlying UVM object (if any).
233 */
234 objsz = PAGE_SIZE * node->tn_spec.tn_reg.tn_aobj_pages;
235 if (objsz != 0) {
236 tmpfs_mem_decr(tmp, objsz);
237 }
238 if (node->tn_spec.tn_reg.tn_aobj != NULL) {
239 uao_detach(node->tn_spec.tn_reg.tn_aobj);
240 }
241 break;
242 case VDIR:
243 /*
244 * KASSERT(TAILQ_EMPTY(&node->tn_spec.tn_dir.tn_dir));
245 * KASSERT(node->tn_spec.tn_dir.tn_parent == NULL ||
246 * node == tmp->tm_root);
247 */
248 break;
249 default:
250 break;
251 }
252
253 mutex_destroy(&node->tn_vlock);
254 tmpfs_node_put(tmp, node);
255 }
256
257 /*
258 * tmpfs_vnode_get: allocate or reclaim a vnode for a specified inode.
259 *
260 * => Must be called with tmpfs_node_t::tn_vlock held.
261 * => Returns vnode (*vpp) locked.
262 */
263 int
264 tmpfs_vnode_get(struct mount *mp, tmpfs_node_t *node, vnode_t **vpp)
265 {
266 vnode_t *vp;
267 kmutex_t *slock;
268 int error;
269 again:
270 /* If there is already a vnode, try to reclaim it. */
271 if ((vp = node->tn_vnode) != NULL) {
272 atomic_or_ulong(&node->tn_gen, TMPFS_RECLAIMING_BIT);
273 mutex_enter(vp->v_interlock);
274 mutex_exit(&node->tn_vlock);
275 error = vget(vp, LK_EXCLUSIVE);
276 if (error == ENOENT) {
277 mutex_enter(&node->tn_vlock);
278 goto again;
279 }
280 atomic_and_ulong(&node->tn_gen, ~TMPFS_RECLAIMING_BIT);
281 *vpp = vp;
282 return error;
283 }
284 if (TMPFS_NODE_RECLAIMING(node)) {
285 atomic_and_ulong(&node->tn_gen, ~TMPFS_RECLAIMING_BIT);
286 }
287
288 /*
289 * Get a new vnode and associate it with our inode. Share the
290 * lock with underlying UVM object, if there is one (VREG case).
291 */
292 if (node->tn_type == VREG) {
293 struct uvm_object *uobj = node->tn_spec.tn_reg.tn_aobj;
294 slock = uobj->vmobjlock;
295 } else {
296 slock = NULL;
297 }
298 error = getnewvnode(VT_TMPFS, mp, tmpfs_vnodeop_p, slock, &vp);
299 if (error) {
300 mutex_exit(&node->tn_vlock);
301 return error;
302 }
303
304 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
305 vp->v_type = node->tn_type;
306
307 /* Type-specific initialization. */
308 switch (node->tn_type) {
309 case VBLK:
310 case VCHR:
311 vp->v_op = tmpfs_specop_p;
312 spec_node_init(vp, node->tn_spec.tn_dev.tn_rdev);
313 break;
314 case VDIR:
315 vp->v_vflag |= node->tn_spec.tn_dir.tn_parent == node ?
316 VV_ROOT : 0;
317 break;
318 case VFIFO:
319 vp->v_op = tmpfs_fifoop_p;
320 break;
321 case VLNK:
322 case VREG:
323 case VSOCK:
324 break;
325 default:
326 KASSERT(false);
327 }
328
329 uvm_vnp_setsize(vp, node->tn_size);
330 vp->v_data = node;
331 node->tn_vnode = vp;
332 mutex_exit(&node->tn_vlock);
333
334 KASSERT(VOP_ISLOCKED(vp));
335 *vpp = vp;
336 return 0;
337 }
338
339 /*
340 * tmpfs_alloc_file: allocate a new file of specified type and adds it
341 * into the parent directory.
342 *
343 * => Credentials of the caller are used.
344 */
345 int
346 tmpfs_alloc_file(vnode_t *dvp, vnode_t **vpp, struct vattr *vap,
347 struct componentname *cnp, char *target)
348 {
349 tmpfs_mount_t *tmp = VFS_TO_TMPFS(dvp->v_mount);
350 tmpfs_node_t *dnode = VP_TO_TMPFS_DIR(dvp), *node;
351 tmpfs_dirent_t *de, *wde;
352 int error;
353
354 KASSERT(VOP_ISLOCKED(dvp));
355 *vpp = NULL;
356
357 /* Check for the maximum number of links limit. */
358 if (vap->va_type == VDIR) {
359 /* Check for maximum links limit. */
360 if (dnode->tn_links == LINK_MAX) {
361 error = EMLINK;
362 goto out;
363 }
364 KASSERT(dnode->tn_links < LINK_MAX);
365 }
366
367 /* Allocate a node that represents the new file. */
368 error = tmpfs_alloc_node(tmp, vap->va_type, kauth_cred_geteuid(cnp->cn_cred),
369 dnode->tn_gid, vap->va_mode, target, vap->va_rdev, &node);
370 if (error)
371 goto out;
372
373 /* Allocate a directory entry that points to the new file. */
374 error = tmpfs_alloc_dirent(tmp, cnp->cn_nameptr, cnp->cn_namelen, &de);
375 if (error) {
376 tmpfs_free_node(tmp, node);
377 goto out;
378 }
379
380 /* Get a vnode for the new file. */
381 mutex_enter(&node->tn_vlock);
382 error = tmpfs_vnode_get(dvp->v_mount, node, vpp);
383 if (error) {
384 tmpfs_free_dirent(tmp, de);
385 tmpfs_free_node(tmp, node);
386 goto out;
387 }
388
389 /* Remove whiteout before adding the new entry. */
390 if (cnp->cn_flags & ISWHITEOUT) {
391 wde = tmpfs_dir_lookup(dnode, cnp);
392 KASSERT(wde != NULL && wde->td_node == TMPFS_NODE_WHITEOUT);
393 tmpfs_dir_detach(dvp, wde);
394 tmpfs_free_dirent(tmp, wde);
395 }
396
397 /* Associate inode and attach the entry into the directory. */
398 tmpfs_dir_attach(dvp, de, node);
399
400 /* Make node opaque if requested. */
401 if (cnp->cn_flags & ISWHITEOUT)
402 node->tn_flags |= UF_OPAQUE;
403 out:
404 vput(dvp);
405 return error;
406 }
407
408 /*
409 * tmpfs_alloc_dirent: allocates a new directory entry for the inode.
410 * The directory entry contains a path name component.
411 */
412 int
413 tmpfs_alloc_dirent(tmpfs_mount_t *tmp, const char *name, uint16_t len,
414 tmpfs_dirent_t **de)
415 {
416 tmpfs_dirent_t *nde;
417
418 nde = tmpfs_dirent_get(tmp);
419 if (nde == NULL)
420 return ENOSPC;
421
422 nde->td_name = tmpfs_strname_alloc(tmp, len);
423 if (nde->td_name == NULL) {
424 tmpfs_dirent_put(tmp, nde);
425 return ENOSPC;
426 }
427 nde->td_namelen = len;
428 memcpy(nde->td_name, name, len);
429
430 *de = nde;
431 return 0;
432 }
433
434 /*
435 * tmpfs_free_dirent: free a directory entry.
436 */
437 void
438 tmpfs_free_dirent(tmpfs_mount_t *tmp, tmpfs_dirent_t *de)
439 {
440
441 /* KASSERT(de->td_node == NULL); */
442 tmpfs_strname_free(tmp, de->td_name, de->td_namelen);
443 tmpfs_dirent_put(tmp, de);
444 }
445
446 /*
447 * tmpfs_dir_attach: associate directory entry with a specified inode,
448 * and attach the entry into the directory, specified by vnode.
449 *
450 * => Increases link count on the associated node.
451 * => Increases link count on directory node, if our node is VDIR.
452 * It is caller's responsibility to check for the LINK_MAX limit.
453 * => Triggers kqueue events here.
454 */
455 void
456 tmpfs_dir_attach(vnode_t *dvp, tmpfs_dirent_t *de, tmpfs_node_t *node)
457 {
458 tmpfs_node_t *dnode = VP_TO_TMPFS_DIR(dvp);
459 int events = NOTE_WRITE;
460
461 KASSERT(VOP_ISLOCKED(dvp));
462
463 /* Associate directory entry and the inode. */
464 de->td_node = node;
465 if (node != TMPFS_NODE_WHITEOUT) {
466 KASSERT(node->tn_links < LINK_MAX);
467 node->tn_links++;
468
469 /* Save the hint (might overwrite). */
470 node->tn_dirent_hint = de;
471 }
472
473 /* Insert the entry to the directory (parent of inode). */
474 TAILQ_INSERT_TAIL(&dnode->tn_spec.tn_dir.tn_dir, de, td_entries);
475 dnode->tn_size += sizeof(tmpfs_dirent_t);
476 dnode->tn_status |= TMPFS_NODE_STATUSALL;
477 uvm_vnp_setsize(dvp, dnode->tn_size);
478
479 if (node != TMPFS_NODE_WHITEOUT && node->tn_type == VDIR) {
480 /* Set parent. */
481 KASSERT(node->tn_spec.tn_dir.tn_parent == NULL);
482 node->tn_spec.tn_dir.tn_parent = dnode;
483
484 /* Increase the link count of parent. */
485 KASSERT(dnode->tn_links < LINK_MAX);
486 dnode->tn_links++;
487 events |= NOTE_LINK;
488
489 TMPFS_VALIDATE_DIR(node);
490 }
491 VN_KNOTE(dvp, events);
492 }
493
494 /*
495 * tmpfs_dir_detach: disassociate directory entry and its inode,
496 * and detach the entry from the directory, specified by vnode.
497 *
498 * => Decreases link count on the associated node.
499 * => Decreases the link count on directory node, if our node is VDIR.
500 * => Triggers kqueue events here.
501 */
502 void
503 tmpfs_dir_detach(vnode_t *dvp, tmpfs_dirent_t *de)
504 {
505 tmpfs_node_t *dnode = VP_TO_TMPFS_DIR(dvp);
506 tmpfs_node_t *node = de->td_node;
507 int events = NOTE_WRITE;
508
509 KASSERT(VOP_ISLOCKED(dvp));
510
511 if (node != TMPFS_NODE_WHITEOUT) {
512 vnode_t *vp = node->tn_vnode;
513
514 KASSERT(VOP_ISLOCKED(vp));
515
516 /* Deassociate the inode and entry. */
517 de->td_node = NULL;
518 node->tn_dirent_hint = NULL;
519
520 KASSERT(node->tn_links > 0);
521 node->tn_links--;
522 VN_KNOTE(vp, node->tn_links ? NOTE_LINK : NOTE_DELETE);
523
524 /* If directory - decrease the link count of parent. */
525 if (node->tn_type == VDIR) {
526 KASSERT(node->tn_spec.tn_dir.tn_parent == dnode);
527 node->tn_spec.tn_dir.tn_parent = NULL;
528
529 KASSERT(dnode->tn_links > 0);
530 dnode->tn_links--;
531 events |= NOTE_LINK;
532 }
533 }
534
535 /* Remove the entry from the directory. */
536 if (dnode->tn_spec.tn_dir.tn_readdir_lastp == de) {
537 dnode->tn_spec.tn_dir.tn_readdir_lastn = 0;
538 dnode->tn_spec.tn_dir.tn_readdir_lastp = NULL;
539 }
540 TAILQ_REMOVE(&dnode->tn_spec.tn_dir.tn_dir, de, td_entries);
541
542 dnode->tn_size -= sizeof(tmpfs_dirent_t);
543 dnode->tn_status |= TMPFS_NODE_STATUSALL;
544 uvm_vnp_setsize(dvp, dnode->tn_size);
545 VN_KNOTE(dvp, events);
546 }
547
548 /*
549 * tmpfs_dir_lookup: find a directory entry in the specified inode.
550 *
551 * Note that the . and .. components are not allowed as they do not
552 * physically exist within directories.
553 */
554 tmpfs_dirent_t *
555 tmpfs_dir_lookup(tmpfs_node_t *node, struct componentname *cnp)
556 {
557 const char *name = cnp->cn_nameptr;
558 const uint16_t nlen = cnp->cn_namelen;
559 tmpfs_dirent_t *de;
560
561 KASSERT(VOP_ISLOCKED(node->tn_vnode));
562 KASSERT(nlen != 1 || !(name[0] == '.'));
563 KASSERT(nlen != 2 || !(name[0] == '.' && name[1] == '.'));
564 TMPFS_VALIDATE_DIR(node);
565
566 TAILQ_FOREACH(de, &node->tn_spec.tn_dir.tn_dir, td_entries) {
567 if (de->td_namelen != nlen)
568 continue;
569 if (memcmp(de->td_name, name, nlen) != 0)
570 continue;
571 break;
572 }
573 node->tn_status |= TMPFS_NODE_ACCESSED;
574 return de;
575 }
576
577 /*
578 * tmpfs_dir_cached: get a cached directory entry if it is valid. Used to
579 * avoid unnecessary tmpds_dir_lookup().
580 *
581 * => The vnode must be locked.
582 */
583 tmpfs_dirent_t *
584 tmpfs_dir_cached(tmpfs_node_t *node)
585 {
586 tmpfs_dirent_t *de = node->tn_dirent_hint;
587
588 KASSERT(VOP_ISLOCKED(node->tn_vnode));
589
590 if (de == NULL) {
591 return NULL;
592 }
593 KASSERT(de->td_node == node);
594
595 /*
596 * Directories always have a valid hint. For files, check if there
597 * are any hard links. If there are - hint might be invalid.
598 */
599 return (node->tn_type != VDIR && node->tn_links > 1) ? NULL : de;
600 }
601
602 /*
603 * tmpfs_dir_getdotdent: helper function for tmpfs_readdir. Creates a
604 * '.' entry for the given directory and returns it in the uio space.
605 */
606 int
607 tmpfs_dir_getdotdent(tmpfs_node_t *node, struct uio *uio)
608 {
609 struct dirent *dentp;
610 int error;
611
612 TMPFS_VALIDATE_DIR(node);
613 KASSERT(uio->uio_offset == TMPFS_DIRCOOKIE_DOT);
614
615 dentp = kmem_alloc(sizeof(struct dirent), KM_SLEEP);
616 dentp->d_fileno = node->tn_id;
617 dentp->d_type = DT_DIR;
618 dentp->d_namlen = 1;
619 dentp->d_name[0] = '.';
620 dentp->d_name[1] = '\0';
621 dentp->d_reclen = _DIRENT_SIZE(dentp);
622
623 if (dentp->d_reclen > uio->uio_resid)
624 error = -1;
625 else {
626 error = uiomove(dentp, dentp->d_reclen, uio);
627 if (error == 0)
628 uio->uio_offset = TMPFS_DIRCOOKIE_DOTDOT;
629 }
630 node->tn_status |= TMPFS_NODE_ACCESSED;
631 kmem_free(dentp, sizeof(struct dirent));
632 return error;
633 }
634
635 /*
636 * tmpfs_dir_getdotdotdent: helper function for tmpfs_readdir. Creates a
637 * '..' entry for the given directory and returns it in the uio space.
638 */
639 int
640 tmpfs_dir_getdotdotdent(tmpfs_node_t *node, struct uio *uio)
641 {
642 struct dirent *dentp;
643 int error;
644
645 TMPFS_VALIDATE_DIR(node);
646 KASSERT(uio->uio_offset == TMPFS_DIRCOOKIE_DOTDOT);
647
648 dentp = kmem_alloc(sizeof(struct dirent), KM_SLEEP);
649 dentp->d_fileno = node->tn_spec.tn_dir.tn_parent->tn_id;
650 dentp->d_type = DT_DIR;
651 dentp->d_namlen = 2;
652 dentp->d_name[0] = '.';
653 dentp->d_name[1] = '.';
654 dentp->d_name[2] = '\0';
655 dentp->d_reclen = _DIRENT_SIZE(dentp);
656
657 if (dentp->d_reclen > uio->uio_resid)
658 error = -1;
659 else {
660 error = uiomove(dentp, dentp->d_reclen, uio);
661 if (error == 0) {
662 tmpfs_dirent_t *de;
663
664 de = TAILQ_FIRST(&node->tn_spec.tn_dir.tn_dir);
665 if (de == NULL)
666 uio->uio_offset = TMPFS_DIRCOOKIE_EOF;
667 else
668 uio->uio_offset = tmpfs_dircookie(de);
669 }
670 }
671 node->tn_status |= TMPFS_NODE_ACCESSED;
672 kmem_free(dentp, sizeof(struct dirent));
673 return error;
674 }
675
676 /*
677 * tmpfs_dir_lookupbycookie: lookup a directory entry by associated cookie.
678 */
679 tmpfs_dirent_t *
680 tmpfs_dir_lookupbycookie(tmpfs_node_t *node, off_t cookie)
681 {
682 tmpfs_dirent_t *de;
683
684 KASSERT(VOP_ISLOCKED(node->tn_vnode));
685
686 if (cookie == node->tn_spec.tn_dir.tn_readdir_lastn &&
687 node->tn_spec.tn_dir.tn_readdir_lastp != NULL) {
688 return node->tn_spec.tn_dir.tn_readdir_lastp;
689 }
690 TAILQ_FOREACH(de, &node->tn_spec.tn_dir.tn_dir, td_entries) {
691 if (tmpfs_dircookie(de) == cookie) {
692 break;
693 }
694 }
695 return de;
696 }
697
698 /*
699 * tmpfs_dir_getdents: relper function for tmpfs_readdir.
700 *
701 * => Returns as much directory entries as can fit in the uio space.
702 * => The read starts at uio->uio_offset.
703 */
704 int
705 tmpfs_dir_getdents(tmpfs_node_t *node, struct uio *uio, off_t *cntp)
706 {
707 tmpfs_dirent_t *de;
708 struct dirent *dentp;
709 off_t startcookie;
710 int error;
711
712 KASSERT(VOP_ISLOCKED(node->tn_vnode));
713 TMPFS_VALIDATE_DIR(node);
714
715 /*
716 * Locate the first directory entry we have to return. We have cached
717 * the last readdir in the node, so use those values if appropriate.
718 * Otherwise do a linear scan to find the requested entry.
719 */
720 startcookie = uio->uio_offset;
721 KASSERT(startcookie != TMPFS_DIRCOOKIE_DOT);
722 KASSERT(startcookie != TMPFS_DIRCOOKIE_DOTDOT);
723 if (startcookie == TMPFS_DIRCOOKIE_EOF) {
724 return 0;
725 } else {
726 de = tmpfs_dir_lookupbycookie(node, startcookie);
727 }
728 if (de == NULL) {
729 return EINVAL;
730 }
731
732 /*
733 * Read as much entries as possible; i.e., until we reach the end
734 * of the directory or we exhaust uio space.
735 */
736 dentp = kmem_alloc(sizeof(struct dirent), KM_SLEEP);
737 do {
738 /*
739 * Create a dirent structure representing the current
740 * inode and fill it.
741 */
742 if (de->td_node == TMPFS_NODE_WHITEOUT) {
743 dentp->d_fileno = 1;
744 dentp->d_type = DT_WHT;
745 } else {
746 dentp->d_fileno = de->td_node->tn_id;
747 switch (de->td_node->tn_type) {
748 case VBLK:
749 dentp->d_type = DT_BLK;
750 break;
751 case VCHR:
752 dentp->d_type = DT_CHR;
753 break;
754 case VDIR:
755 dentp->d_type = DT_DIR;
756 break;
757 case VFIFO:
758 dentp->d_type = DT_FIFO;
759 break;
760 case VLNK:
761 dentp->d_type = DT_LNK;
762 break;
763 case VREG:
764 dentp->d_type = DT_REG;
765 break;
766 case VSOCK:
767 dentp->d_type = DT_SOCK;
768 break;
769 default:
770 KASSERT(false);
771 }
772 }
773 dentp->d_namlen = de->td_namelen;
774 KASSERT(de->td_namelen < sizeof(dentp->d_name));
775 memcpy(dentp->d_name, de->td_name, de->td_namelen);
776 dentp->d_name[de->td_namelen] = '\0';
777 dentp->d_reclen = _DIRENT_SIZE(dentp);
778
779 /* Stop reading if the directory entry we are treating is
780 * bigger than the amount of data that can be returned. */
781 if (dentp->d_reclen > uio->uio_resid) {
782 error = -1;
783 break;
784 }
785
786 /*
787 * Copy the new dirent structure into the output buffer and
788 * advance pointers.
789 */
790 error = uiomove(dentp, dentp->d_reclen, uio);
791
792 (*cntp)++;
793 de = TAILQ_NEXT(de, td_entries);
794 } while (error == 0 && uio->uio_resid > 0 && de != NULL);
795
796 /* Update the offset and cache. */
797 if (de == NULL) {
798 uio->uio_offset = TMPFS_DIRCOOKIE_EOF;
799 node->tn_spec.tn_dir.tn_readdir_lastn = 0;
800 node->tn_spec.tn_dir.tn_readdir_lastp = NULL;
801 } else {
802 node->tn_spec.tn_dir.tn_readdir_lastn = uio->uio_offset =
803 tmpfs_dircookie(de);
804 node->tn_spec.tn_dir.tn_readdir_lastp = de;
805 }
806 node->tn_status |= TMPFS_NODE_ACCESSED;
807 kmem_free(dentp, sizeof(struct dirent));
808 return error;
809 }
810
811 /*
812 * tmpfs_reg_resize: resize the underlying UVM object associated with the
813 * specified regular file.
814 */
815 int
816 tmpfs_reg_resize(struct vnode *vp, off_t newsize)
817 {
818 tmpfs_mount_t *tmp = VFS_TO_TMPFS(vp->v_mount);
819 tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp);
820 struct uvm_object *uobj = node->tn_spec.tn_reg.tn_aobj;
821 size_t newpages, oldpages;
822 off_t oldsize;
823
824 KASSERT(vp->v_type == VREG);
825 KASSERT(newsize >= 0);
826
827 oldsize = node->tn_size;
828 oldpages = round_page(oldsize) >> PAGE_SHIFT;
829 newpages = round_page(newsize) >> PAGE_SHIFT;
830 KASSERT(oldpages == node->tn_spec.tn_reg.tn_aobj_pages);
831
832 if (newpages > oldpages) {
833 /* Increase the used-memory counter if getting extra pages. */
834 if (!tmpfs_mem_incr(tmp, (newpages - oldpages) << PAGE_SHIFT)) {
835 return ENOSPC;
836 }
837 } else if (newsize < oldsize) {
838 int zerolen = MIN(round_page(newsize), node->tn_size) - newsize;
839
840 ubc_zerorange(uobj, newsize, zerolen, UBC_UNMAP_FLAG(vp));
841 }
842
843 node->tn_spec.tn_reg.tn_aobj_pages = newpages;
844 node->tn_size = newsize;
845 uvm_vnp_setsize(vp, newsize);
846
847 /*
848 * Free "backing store".
849 */
850 if (newpages < oldpages) {
851 KASSERT(uobj->vmobjlock == vp->v_interlock);
852
853 mutex_enter(uobj->vmobjlock);
854 uao_dropswap_range(uobj, newpages, oldpages);
855 mutex_exit(uobj->vmobjlock);
856
857 /* Decrease the used-memory counter. */
858 tmpfs_mem_decr(tmp, (oldpages - newpages) << PAGE_SHIFT);
859 }
860 if (newsize > oldsize) {
861 VN_KNOTE(vp, NOTE_EXTEND);
862 }
863 return 0;
864 }
865
866 /*
867 * tmpfs_chflags: change flags of the given vnode.
868 *
869 * => Caller should perform tmpfs_update().
870 */
871 int
872 tmpfs_chflags(vnode_t *vp, int flags, kauth_cred_t cred, lwp_t *l)
873 {
874 tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp);
875 kauth_action_t action = KAUTH_VNODE_WRITE_FLAGS;
876 int error;
877 bool changing_sysflags = false;
878
879 KASSERT(VOP_ISLOCKED(vp));
880
881 /* Disallow this operation if the file system is mounted read-only. */
882 if (vp->v_mount->mnt_flag & MNT_RDONLY)
883 return EROFS;
884
885 /*
886 * If the new flags have non-user flags that are different than
887 * those on the node, we need special permission to change them.
888 */
889 if ((flags & SF_SETTABLE) != (node->tn_flags & SF_SETTABLE)) {
890 action |= KAUTH_VNODE_WRITE_SYSFLAGS;
891 changing_sysflags = true;
892 }
893
894 /*
895 * Indicate that this node's flags have system attributes in them if
896 * that's the case.
897 */
898 if (node->tn_flags & (SF_IMMUTABLE | SF_APPEND)) {
899 action |= KAUTH_VNODE_HAS_SYSFLAGS;
900 }
901
902 error = kauth_authorize_vnode(cred, action, vp, NULL,
903 genfs_can_chflags(cred, vp->v_type, node->tn_uid,
904 changing_sysflags));
905 if (error)
906 return error;
907
908 /*
909 * Set the flags. If we're not setting non-user flags, be careful not
910 * to overwrite them.
911 *
912 * XXX: Can't we always assign here? if the system flags are different,
913 * the code above should catch attempts to change them without
914 * proper permissions, and if we're here it means it's okay to
915 * change them...
916 */
917 if (!changing_sysflags) {
918 /* Clear all user-settable flags and re-set them. */
919 node->tn_flags &= SF_SETTABLE;
920 node->tn_flags |= (flags & UF_SETTABLE);
921 } else {
922 node->tn_flags = flags;
923 }
924 node->tn_status |= TMPFS_NODE_CHANGED;
925 VN_KNOTE(vp, NOTE_ATTRIB);
926 return 0;
927 }
928
929 /*
930 * tmpfs_chmod: change access mode on the given vnode.
931 *
932 * => Caller should perform tmpfs_update().
933 */
934 int
935 tmpfs_chmod(vnode_t *vp, mode_t mode, kauth_cred_t cred, lwp_t *l)
936 {
937 tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp);
938 int error;
939
940 KASSERT(VOP_ISLOCKED(vp));
941
942 /* Disallow this operation if the file system is mounted read-only. */
943 if (vp->v_mount->mnt_flag & MNT_RDONLY)
944 return EROFS;
945
946 /* Immutable or append-only files cannot be modified, either. */
947 if (node->tn_flags & (IMMUTABLE | APPEND))
948 return EPERM;
949
950 error = kauth_authorize_vnode(cred, KAUTH_VNODE_WRITE_SECURITY, vp,
951 NULL, genfs_can_chmod(vp->v_type, cred, node->tn_uid, node->tn_gid, mode));
952 if (error) {
953 return error;
954 }
955 node->tn_mode = (mode & ALLPERMS);
956 node->tn_status |= TMPFS_NODE_CHANGED;
957 VN_KNOTE(vp, NOTE_ATTRIB);
958 return 0;
959 }
960
961 /*
962 * tmpfs_chown: change ownership of the given vnode.
963 *
964 * => At least one of uid or gid must be different than VNOVAL.
965 * => Attribute is unchanged for VNOVAL case.
966 * => Caller should perform tmpfs_update().
967 */
968 int
969 tmpfs_chown(vnode_t *vp, uid_t uid, gid_t gid, kauth_cred_t cred, lwp_t *l)
970 {
971 tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp);
972 int error;
973
974 KASSERT(VOP_ISLOCKED(vp));
975
976 /* Assign default values if they are unknown. */
977 KASSERT(uid != VNOVAL || gid != VNOVAL);
978 if (uid == VNOVAL) {
979 uid = node->tn_uid;
980 }
981 if (gid == VNOVAL) {
982 gid = node->tn_gid;
983 }
984
985 /* Disallow this operation if the file system is mounted read-only. */
986 if (vp->v_mount->mnt_flag & MNT_RDONLY)
987 return EROFS;
988
989 /* Immutable or append-only files cannot be modified, either. */
990 if (node->tn_flags & (IMMUTABLE | APPEND))
991 return EPERM;
992
993 error = kauth_authorize_vnode(cred, KAUTH_VNODE_CHANGE_OWNERSHIP, vp,
994 NULL, genfs_can_chown(cred, node->tn_uid, node->tn_gid, uid,
995 gid));
996 if (error) {
997 return error;
998 }
999 node->tn_uid = uid;
1000 node->tn_gid = gid;
1001 node->tn_status |= TMPFS_NODE_CHANGED;
1002 VN_KNOTE(vp, NOTE_ATTRIB);
1003 return 0;
1004 }
1005
1006 /*
1007 * tmpfs_chsize: change size of the given vnode.
1008 */
1009 int
1010 tmpfs_chsize(vnode_t *vp, u_quad_t size, kauth_cred_t cred, lwp_t *l)
1011 {
1012 tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp);
1013
1014 KASSERT(VOP_ISLOCKED(vp));
1015
1016 /* Decide whether this is a valid operation based on the file type. */
1017 switch (vp->v_type) {
1018 case VDIR:
1019 return EISDIR;
1020 case VREG:
1021 if (vp->v_mount->mnt_flag & MNT_RDONLY) {
1022 return EROFS;
1023 }
1024 break;
1025 case VBLK:
1026 case VCHR:
1027 case VFIFO:
1028 /*
1029 * Allow modifications of special files even if in the file
1030 * system is mounted read-only (we are not modifying the
1031 * files themselves, but the objects they represent).
1032 */
1033 return 0;
1034 default:
1035 return EOPNOTSUPP;
1036 }
1037
1038 /* Immutable or append-only files cannot be modified, either. */
1039 if (node->tn_flags & (IMMUTABLE | APPEND)) {
1040 return EPERM;
1041 }
1042
1043 /* Note: tmpfs_truncate() will raise NOTE_EXTEND and NOTE_ATTRIB. */
1044 return tmpfs_truncate(vp, size);
1045 }
1046
1047 /*
1048 * tmpfs_chtimes: change access and modification times for vnode.
1049 */
1050 int
1051 tmpfs_chtimes(vnode_t *vp, const struct timespec *atime,
1052 const struct timespec *mtime, const struct timespec *btime,
1053 int vaflags, kauth_cred_t cred, lwp_t *l)
1054 {
1055 tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp);
1056 int error;
1057
1058 KASSERT(VOP_ISLOCKED(vp));
1059
1060 /* Disallow this operation if the file system is mounted read-only. */
1061 if (vp->v_mount->mnt_flag & MNT_RDONLY)
1062 return EROFS;
1063
1064 /* Immutable or append-only files cannot be modified, either. */
1065 if (node->tn_flags & (IMMUTABLE | APPEND))
1066 return EPERM;
1067
1068 error = kauth_authorize_vnode(cred, KAUTH_VNODE_WRITE_TIMES, vp, NULL,
1069 genfs_can_chtimes(vp, vaflags, node->tn_uid, cred));
1070 if (error)
1071 return error;
1072
1073 if (atime->tv_sec != VNOVAL && atime->tv_nsec != VNOVAL)
1074 node->tn_status |= TMPFS_NODE_ACCESSED;
1075
1076 if (mtime->tv_sec != VNOVAL && mtime->tv_nsec != VNOVAL)
1077 node->tn_status |= TMPFS_NODE_MODIFIED;
1078
1079 if (btime->tv_sec == VNOVAL && btime->tv_nsec == VNOVAL)
1080 btime = NULL;
1081
1082 tmpfs_update(vp, atime, mtime, btime, 0);
1083 VN_KNOTE(vp, NOTE_ATTRIB);
1084 return 0;
1085 }
1086
1087 /*
1088 * tmpfs_update: update timestamps, et al.
1089 */
1090 void
1091 tmpfs_update(vnode_t *vp, const struct timespec *acc,
1092 const struct timespec *mod, const struct timespec *birth, int flags)
1093 {
1094 tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp);
1095 struct timespec nowtm;
1096
1097 /* KASSERT(VOP_ISLOCKED(vp)); */
1098
1099 if (flags & UPDATE_CLOSE) {
1100 /* XXX Need to do anything special? */
1101 }
1102 if ((node->tn_status & TMPFS_NODE_STATUSALL) == 0) {
1103 return;
1104 }
1105 if (birth != NULL) {
1106 node->tn_birthtime = *birth;
1107 }
1108 vfs_timestamp(&nowtm);
1109
1110 if (node->tn_status & TMPFS_NODE_ACCESSED) {
1111 node->tn_atime = acc ? *acc : nowtm;
1112 }
1113 if (node->tn_status & TMPFS_NODE_MODIFIED) {
1114 node->tn_mtime = mod ? *mod : nowtm;
1115 }
1116 if (node->tn_status & TMPFS_NODE_CHANGED) {
1117 node->tn_ctime = nowtm;
1118 }
1119
1120 node->tn_status &= ~TMPFS_NODE_STATUSALL;
1121 }
1122
1123 int
1124 tmpfs_truncate(vnode_t *vp, off_t length)
1125 {
1126 tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp);
1127 int error;
1128
1129 if (length < 0) {
1130 error = EINVAL;
1131 goto out;
1132 }
1133 if (node->tn_size == length) {
1134 error = 0;
1135 goto out;
1136 }
1137 error = tmpfs_reg_resize(vp, length);
1138 if (error == 0) {
1139 node->tn_status |= TMPFS_NODE_CHANGED | TMPFS_NODE_MODIFIED;
1140 }
1141 out:
1142 tmpfs_update(vp, NULL, NULL, NULL, 0);
1143 return error;
1144 }
1145