tmpfs_subr.c revision 1.69 1 /* $NetBSD: tmpfs_subr.c,v 1.69 2011/05/25 00:06:45 rmind Exp $ */
2
3 /*
4 * Copyright (c) 2005, 2006, 2007 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Julio M. Merino Vidal, developed as part of Google's Summer of Code
9 * 2005 program.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 * POSSIBILITY OF SUCH DAMAGE.
31 */
32
33 /*
34 * Efficient memory file system: functions for inode and directory entry
35 * construction and destruction.
36 */
37
38 #include <sys/cdefs.h>
39 __KERNEL_RCSID(0, "$NetBSD: tmpfs_subr.c,v 1.69 2011/05/25 00:06:45 rmind Exp $");
40
41 #include <sys/param.h>
42 #include <sys/dirent.h>
43 #include <sys/event.h>
44 #include <sys/kmem.h>
45 #include <sys/mount.h>
46 #include <sys/namei.h>
47 #include <sys/time.h>
48 #include <sys/stat.h>
49 #include <sys/systm.h>
50 #include <sys/vnode.h>
51 #include <sys/kauth.h>
52 #include <sys/atomic.h>
53
54 #include <uvm/uvm.h>
55
56 #include <miscfs/specfs/specdev.h>
57 #include <miscfs/genfs/genfs.h>
58 #include <fs/tmpfs/tmpfs.h>
59 #include <fs/tmpfs/tmpfs_fifoops.h>
60 #include <fs/tmpfs/tmpfs_specops.h>
61 #include <fs/tmpfs/tmpfs_vnops.h>
62
63 /*
64 * tmpfs_alloc_node: allocate a new inode of a specified type and
65 * insert it into the list of specified mount point.
66 */
67 int
68 tmpfs_alloc_node(tmpfs_mount_t *tmp, enum vtype type, uid_t uid,
69 gid_t gid, mode_t mode, tmpfs_node_t *parent, char *target, dev_t rdev,
70 tmpfs_node_t **node)
71 {
72 tmpfs_node_t *nnode;
73
74 nnode = tmpfs_node_get(tmp);
75 if (nnode == NULL) {
76 return ENOSPC;
77 }
78
79 /*
80 * XXX Where the pool is backed by a map larger than (4GB *
81 * sizeof(*nnode)), this may produce duplicate inode numbers
82 * for applications that do not understand 64-bit ino_t.
83 */
84 nnode->tn_id = (ino_t)((uintptr_t)nnode / sizeof(*nnode));
85 nnode->tn_gen = arc4random();
86
87 /* Generic initialization. */
88 nnode->tn_type = type;
89 nnode->tn_size = 0;
90 nnode->tn_status = 0;
91 nnode->tn_flags = 0;
92 nnode->tn_links = 0;
93 nnode->tn_lockf = NULL;
94 nnode->tn_vnode = NULL;
95
96 vfs_timestamp(&nnode->tn_atime);
97 nnode->tn_birthtime = nnode->tn_atime;
98 nnode->tn_ctime = nnode->tn_atime;
99 nnode->tn_mtime = nnode->tn_atime;
100
101 KASSERT(uid != VNOVAL && gid != VNOVAL && mode != VNOVAL);
102 nnode->tn_uid = uid;
103 nnode->tn_gid = gid;
104 nnode->tn_mode = mode;
105
106 /* Type-specific initialization. */
107 switch (nnode->tn_type) {
108 case VBLK:
109 case VCHR:
110 /* Character/block special device. */
111 KASSERT(rdev != VNOVAL);
112 nnode->tn_spec.tn_dev.tn_rdev = rdev;
113 break;
114 case VDIR:
115 /*
116 * Directory. Parent must be specified, unless allocating
117 * the root inode.
118 */
119 KASSERT(parent || tmp->tm_root == NULL);
120 KASSERT(parent != nnode);
121
122 TAILQ_INIT(&nnode->tn_spec.tn_dir.tn_dir);
123 nnode->tn_spec.tn_dir.tn_parent =
124 (parent == NULL) ? nnode : parent;
125 nnode->tn_spec.tn_dir.tn_readdir_lastn = 0;
126 nnode->tn_spec.tn_dir.tn_readdir_lastp = NULL;
127 nnode->tn_links++;
128 break;
129 case VFIFO:
130 case VSOCK:
131 break;
132 case VLNK:
133 /* Symbolic link. Target specifies the file name. */
134 KASSERT(target && strlen(target) < MAXPATHLEN);
135
136 nnode->tn_size = strlen(target);
137 if (nnode->tn_size == 0) {
138 nnode->tn_spec.tn_lnk.tn_link = NULL;
139 break;
140 }
141 nnode->tn_spec.tn_lnk.tn_link =
142 tmpfs_strname_alloc(tmp, nnode->tn_size);
143 if (nnode->tn_spec.tn_lnk.tn_link == NULL) {
144 tmpfs_node_put(tmp, nnode);
145 return ENOSPC;
146 }
147 memcpy(nnode->tn_spec.tn_lnk.tn_link, target, nnode->tn_size);
148 break;
149 case VREG:
150 /* Regular file. Create an underlying UVM object. */
151 nnode->tn_spec.tn_reg.tn_aobj =
152 uao_create(INT32_MAX - PAGE_SIZE, 0);
153 nnode->tn_spec.tn_reg.tn_aobj_pages = 0;
154 break;
155 default:
156 KASSERT(false);
157 }
158
159 mutex_init(&nnode->tn_vlock, MUTEX_DEFAULT, IPL_NONE);
160
161 mutex_enter(&tmp->tm_lock);
162 LIST_INSERT_HEAD(&tmp->tm_nodes, nnode, tn_entries);
163 mutex_exit(&tmp->tm_lock);
164
165 *node = nnode;
166 return 0;
167 }
168
169 /*
170 * tmpfs_free_node: remove the inode from a list in the mount point and
171 * destroy the inode structures.
172 */
173 void
174 tmpfs_free_node(tmpfs_mount_t *tmp, tmpfs_node_t *node)
175 {
176 size_t objsz;
177
178 mutex_enter(&tmp->tm_lock);
179 LIST_REMOVE(node, tn_entries);
180 mutex_exit(&tmp->tm_lock);
181
182 switch (node->tn_type) {
183 case VLNK:
184 if (node->tn_size > 0) {
185 tmpfs_strname_free(tmp, node->tn_spec.tn_lnk.tn_link,
186 node->tn_size);
187 }
188 break;
189 case VREG:
190 /*
191 * Calculate the size of inode data, decrease the used-memory
192 * counter, and destroy the unerlying UVM object (if any).
193 */
194 objsz = PAGE_SIZE * node->tn_spec.tn_reg.tn_aobj_pages;
195 if (objsz != 0) {
196 tmpfs_mem_decr(tmp, objsz);
197 }
198 if (node->tn_spec.tn_reg.tn_aobj != NULL) {
199 uao_detach(node->tn_spec.tn_reg.tn_aobj);
200 }
201 break;
202 case VDIR:
203 /* KASSERT(TAILQ_EMPTY(&node->tn_spec.tn_dir.tn_dir)); */
204 KASSERT(node->tn_spec.tn_dir.tn_parent || node == tmp->tm_root);
205 break;
206 default:
207 break;
208 }
209
210 mutex_destroy(&node->tn_vlock);
211 tmpfs_node_put(tmp, node);
212 }
213
214 /*
215 * tmpfs_alloc_vp: allocate or reclaim a vnode for a specified inode.
216 *
217 * => Returns vnode (*vpp) locked.
218 */
219 int
220 tmpfs_alloc_vp(struct mount *mp, tmpfs_node_t *node, vnode_t **vpp)
221 {
222 vnode_t *vp;
223 int error;
224 again:
225 /* If there is already a vnode, try to reclaim it. */
226 mutex_enter(&node->tn_vlock);
227 if ((vp = node->tn_vnode) != NULL) {
228 mutex_enter(&vp->v_interlock);
229 mutex_exit(&node->tn_vlock);
230 error = vget(vp, LK_EXCLUSIVE);
231 if (error == ENOENT) {
232 goto again;
233 }
234 *vpp = vp;
235 return error;
236 }
237
238 /* Get a new vnode and associate it with our node. */
239 error = getnewvnode(VT_TMPFS, mp, tmpfs_vnodeop_p, &vp);
240 if (error) {
241 mutex_exit(&node->tn_vlock);
242 return error;
243 }
244
245 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
246 vp->v_type = node->tn_type;
247
248 /* Type-specific initialization. */
249 switch (node->tn_type) {
250 case VBLK:
251 case VCHR:
252 vp->v_op = tmpfs_specop_p;
253 spec_node_init(vp, node->tn_spec.tn_dev.tn_rdev);
254 break;
255 case VDIR:
256 vp->v_vflag |= node->tn_spec.tn_dir.tn_parent == node ?
257 VV_ROOT : 0;
258 break;
259 case VFIFO:
260 vp->v_op = tmpfs_fifoop_p;
261 break;
262 case VLNK:
263 case VREG:
264 case VSOCK:
265 break;
266 default:
267 KASSERT(false);
268 }
269
270 uvm_vnp_setsize(vp, node->tn_size);
271 vp->v_data = node;
272 node->tn_vnode = vp;
273 mutex_exit(&node->tn_vlock);
274
275 KASSERT(VOP_ISLOCKED(vp));
276 *vpp = vp;
277 return 0;
278 }
279
280 /*
281 * tmpfs_free_vp: destroys the association between the vnode and the
282 * inode it references.
283 */
284 void
285 tmpfs_free_vp(vnode_t *vp)
286 {
287 tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp);
288
289 mutex_enter(&node->tn_vlock);
290 node->tn_vnode = NULL;
291 mutex_exit(&node->tn_vlock);
292 vp->v_data = NULL;
293 }
294
295 /*
296 * tmpfs_alloc_file: allocate a new file of specified type and adds it
297 * into the parent directory.
298 *
299 * => Credentials of the caller are used.
300 */
301 int
302 tmpfs_alloc_file(vnode_t *dvp, vnode_t **vpp, struct vattr *vap,
303 struct componentname *cnp, char *target)
304 {
305 tmpfs_mount_t *tmp = VFS_TO_TMPFS(dvp->v_mount);
306 tmpfs_node_t *dnode = VP_TO_TMPFS_DIR(dvp), *node, *parent;
307 tmpfs_dirent_t *de;
308 int error;
309
310 KASSERT(VOP_ISLOCKED(dvp));
311 *vpp = NULL;
312
313 /* Check for the maximum number of links limit. */
314 if (vap->va_type == VDIR) {
315 /* Check for maximum links limit. */
316 KASSERT(dnode->tn_links <= LINK_MAX);
317 if (dnode->tn_links == LINK_MAX) {
318 error = EMLINK;
319 goto out;
320 }
321 parent = dnode;
322 } else {
323 parent = NULL;
324 }
325
326 /* Allocate a node that represents the new file. */
327 error = tmpfs_alloc_node(tmp, vap->va_type, kauth_cred_geteuid(cnp->cn_cred),
328 dnode->tn_gid, vap->va_mode, parent, target, vap->va_rdev, &node);
329 if (error)
330 goto out;
331
332 /* Allocate a directory entry that points to the new file. */
333 error = tmpfs_alloc_dirent(tmp, node, cnp->cn_nameptr, cnp->cn_namelen,
334 &de);
335 if (error) {
336 tmpfs_free_node(tmp, node);
337 goto out;
338 }
339
340 /* Allocate a vnode for the new file. */
341 error = tmpfs_alloc_vp(dvp->v_mount, node, vpp);
342 if (error) {
343 tmpfs_free_dirent(tmp, de, true);
344 tmpfs_free_node(tmp, node);
345 goto out;
346 }
347
348 /* Attach directory entry into the directory inode. */
349 tmpfs_dir_attach(dvp, de);
350 if (vap->va_type == VDIR) {
351 dnode->tn_links++;
352 KASSERT(dnode->tn_links <= LINK_MAX);
353 VN_KNOTE(dvp, NOTE_LINK);
354 }
355 out:
356 vput(dvp);
357 return error;
358 }
359
360 /*
361 * tmpfs_alloc_dirent: allocates a new directory entry for the inode.
362 *
363 * The link count of node is increased by one to reflect the new object
364 * referencing it. This takes care of notifying kqueue listeners about
365 * this change.
366 */
367 int
368 tmpfs_alloc_dirent(tmpfs_mount_t *tmp, tmpfs_node_t *node,
369 const char *name, uint16_t len, tmpfs_dirent_t **de)
370 {
371 tmpfs_dirent_t *nde;
372
373 nde = tmpfs_dirent_get(tmp);
374 if (nde == NULL)
375 return ENOSPC;
376
377 nde->td_name = tmpfs_strname_alloc(tmp, len);
378 if (nde->td_name == NULL) {
379 tmpfs_dirent_put(tmp, nde);
380 return ENOSPC;
381 }
382 nde->td_namelen = len;
383 memcpy(nde->td_name, name, len);
384 nde->td_node = node;
385
386 if (node != TMPFS_NODE_WHITEOUT) {
387 node->tn_links++;
388 if (node->tn_links > 1 && node->tn_vnode != NULL)
389 VN_KNOTE(node->tn_vnode, NOTE_LINK);
390 }
391
392 *de = nde;
393 return 0;
394 }
395
396 /*
397 * tmpfs_free_dirent: free a directory entry.
398 *
399 * => It is the caller's responsibility to destroy the referenced inode.
400 * => The link count of inode is decreased by one to reflect the removal of
401 * an object that referenced it. This only happens if 'node_exists' is true;
402 * otherwise the function will not access the node referred to by the
403 * directory entry, as it may already have been released from the outside.
404 *
405 * Interested parties (kqueue) are notified of the link count change; note
406 * that this can include both the node pointed to by the directory entry
407 * as well as its parent.
408 */
409 void
410 tmpfs_free_dirent(tmpfs_mount_t *tmp, tmpfs_dirent_t *de, bool node_exists)
411 {
412
413 if (node_exists && de->td_node != TMPFS_NODE_WHITEOUT) {
414 tmpfs_node_t *node = de->td_node;
415
416 KASSERT(node->tn_links > 0);
417 node->tn_links--;
418 if (node->tn_vnode != NULL) {
419 VN_KNOTE(node->tn_vnode, node->tn_links == 0 ?
420 NOTE_DELETE : NOTE_LINK);
421 }
422 if (node->tn_type == VDIR) {
423 VN_KNOTE(node->tn_spec.tn_dir.tn_parent->tn_vnode,
424 NOTE_LINK);
425 }
426 }
427 tmpfs_strname_free(tmp, de->td_name, de->td_namelen);
428 tmpfs_dirent_put(tmp, de);
429 }
430
431 /*
432 * tmpfs_dir_attach: attach the directory entry to the specified vnode.
433 *
434 * => The link count of inode is not changed; done by tmpfs_alloc_dirent().
435 * => Triggers NOTE_WRITE event here.
436 */
437 void
438 tmpfs_dir_attach(vnode_t *vp, tmpfs_dirent_t *de)
439 {
440 tmpfs_node_t *dnode = VP_TO_TMPFS_DIR(vp);
441
442 KASSERT(VOP_ISLOCKED(vp));
443
444 TAILQ_INSERT_TAIL(&dnode->tn_spec.tn_dir.tn_dir, de, td_entries);
445 dnode->tn_size += sizeof(tmpfs_dirent_t);
446 dnode->tn_status |= TMPFS_NODE_STATUSALL;
447 uvm_vnp_setsize(vp, dnode->tn_size);
448 VN_KNOTE(vp, NOTE_WRITE);
449 }
450
451 /*
452 * tmpfs_dir_detach: detache the directory entry from the specified vnode.
453 *
454 * => The link count of inode is not changed; done by tmpfs_free_dirent().
455 * => Triggers NOTE_WRITE event here.
456 */
457 void
458 tmpfs_dir_detach(vnode_t *vp, tmpfs_dirent_t *de)
459 {
460 tmpfs_node_t *dnode = VP_TO_TMPFS_DIR(vp);
461
462 KASSERT(VOP_ISLOCKED(vp));
463
464 if (dnode->tn_spec.tn_dir.tn_readdir_lastp == de) {
465 dnode->tn_spec.tn_dir.tn_readdir_lastn = 0;
466 dnode->tn_spec.tn_dir.tn_readdir_lastp = NULL;
467 }
468 TAILQ_REMOVE(&dnode->tn_spec.tn_dir.tn_dir, de, td_entries);
469
470 dnode->tn_size -= sizeof(tmpfs_dirent_t);
471 dnode->tn_status |= TMPFS_NODE_STATUSALL;
472 uvm_vnp_setsize(vp, dnode->tn_size);
473 VN_KNOTE(vp, NOTE_WRITE);
474 }
475
476 /*
477 * tmpfs_dir_lookup: find a directory entry in the specified inode.
478 *
479 * Note that the . and .. components are not allowed as they do not
480 * physically exist within directories.
481 */
482 tmpfs_dirent_t *
483 tmpfs_dir_lookup(tmpfs_node_t *node, struct componentname *cnp)
484 {
485 const char *name = cnp->cn_nameptr;
486 const uint16_t nlen = cnp->cn_namelen;
487 tmpfs_dirent_t *de;
488
489 KASSERT(VOP_ISLOCKED(node->tn_vnode));
490 TMPFS_VALIDATE_DIR(node);
491 KASSERT(nlen != 1 || !(name[0] == '.'));
492 KASSERT(nlen != 2 || !(name[0] == '.' && name[1] == '.'));
493
494 TAILQ_FOREACH(de, &node->tn_spec.tn_dir.tn_dir, td_entries) {
495 if (de->td_namelen != nlen)
496 continue;
497 if (memcmp(de->td_name, name, nlen) != 0)
498 continue;
499 break;
500 }
501 node->tn_status |= TMPFS_NODE_ACCESSED;
502 return de;
503 }
504
505 /*
506 * tmpfs_dir_getdotdent: helper function for tmpfs_readdir. Creates a
507 * '.' entry for the given directory and returns it in the uio space.
508 */
509 int
510 tmpfs_dir_getdotdent(tmpfs_node_t *node, struct uio *uio)
511 {
512 struct dirent *dentp;
513 int error;
514
515 TMPFS_VALIDATE_DIR(node);
516 KASSERT(uio->uio_offset == TMPFS_DIRCOOKIE_DOT);
517
518 dentp = kmem_alloc(sizeof(struct dirent), KM_SLEEP);
519 dentp->d_fileno = node->tn_id;
520 dentp->d_type = DT_DIR;
521 dentp->d_namlen = 1;
522 dentp->d_name[0] = '.';
523 dentp->d_name[1] = '\0';
524 dentp->d_reclen = _DIRENT_SIZE(dentp);
525
526 if (dentp->d_reclen > uio->uio_resid)
527 error = -1;
528 else {
529 error = uiomove(dentp, dentp->d_reclen, uio);
530 if (error == 0)
531 uio->uio_offset = TMPFS_DIRCOOKIE_DOTDOT;
532 }
533 node->tn_status |= TMPFS_NODE_ACCESSED;
534 kmem_free(dentp, sizeof(struct dirent));
535 return error;
536 }
537
538 /*
539 * tmpfs_dir_getdotdotdent: helper function for tmpfs_readdir. Creates a
540 * '..' entry for the given directory and returns it in the uio space.
541 */
542 int
543 tmpfs_dir_getdotdotdent(tmpfs_node_t *node, struct uio *uio)
544 {
545 struct dirent *dentp;
546 int error;
547
548 TMPFS_VALIDATE_DIR(node);
549 KASSERT(uio->uio_offset == TMPFS_DIRCOOKIE_DOTDOT);
550
551 dentp = kmem_alloc(sizeof(struct dirent), KM_SLEEP);
552 dentp->d_fileno = node->tn_spec.tn_dir.tn_parent->tn_id;
553 dentp->d_type = DT_DIR;
554 dentp->d_namlen = 2;
555 dentp->d_name[0] = '.';
556 dentp->d_name[1] = '.';
557 dentp->d_name[2] = '\0';
558 dentp->d_reclen = _DIRENT_SIZE(dentp);
559
560 if (dentp->d_reclen > uio->uio_resid)
561 error = -1;
562 else {
563 error = uiomove(dentp, dentp->d_reclen, uio);
564 if (error == 0) {
565 tmpfs_dirent_t *de;
566
567 de = TAILQ_FIRST(&node->tn_spec.tn_dir.tn_dir);
568 if (de == NULL)
569 uio->uio_offset = TMPFS_DIRCOOKIE_EOF;
570 else
571 uio->uio_offset = tmpfs_dircookie(de);
572 }
573 }
574 node->tn_status |= TMPFS_NODE_ACCESSED;
575 kmem_free(dentp, sizeof(struct dirent));
576 return error;
577 }
578
579 /*
580 * tmpfs_dir_lookupbycookie: lookup a directory entry by associated cookie.
581 */
582 tmpfs_dirent_t *
583 tmpfs_dir_lookupbycookie(tmpfs_node_t *node, off_t cookie)
584 {
585 tmpfs_dirent_t *de;
586
587 KASSERT(VOP_ISLOCKED(node->tn_vnode));
588
589 if (cookie == node->tn_spec.tn_dir.tn_readdir_lastn &&
590 node->tn_spec.tn_dir.tn_readdir_lastp != NULL) {
591 return node->tn_spec.tn_dir.tn_readdir_lastp;
592 }
593 TAILQ_FOREACH(de, &node->tn_spec.tn_dir.tn_dir, td_entries) {
594 if (tmpfs_dircookie(de) == cookie) {
595 break;
596 }
597 }
598 return de;
599 }
600
601 /*
602 * tmpfs_dir_getdents: relper function for tmpfs_readdir.
603 *
604 * => Returns as much directory entries as can fit in the uio space.
605 * => The read starts at uio->uio_offset.
606 */
607 int
608 tmpfs_dir_getdents(tmpfs_node_t *node, struct uio *uio, off_t *cntp)
609 {
610 tmpfs_dirent_t *de;
611 struct dirent *dentp;
612 off_t startcookie;
613 int error;
614
615 KASSERT(VOP_ISLOCKED(node->tn_vnode));
616 TMPFS_VALIDATE_DIR(node);
617
618 /*
619 * Locate the first directory entry we have to return. We have cached
620 * the last readdir in the node, so use those values if appropriate.
621 * Otherwise do a linear scan to find the requested entry.
622 */
623 startcookie = uio->uio_offset;
624 KASSERT(startcookie != TMPFS_DIRCOOKIE_DOT);
625 KASSERT(startcookie != TMPFS_DIRCOOKIE_DOTDOT);
626 if (startcookie == TMPFS_DIRCOOKIE_EOF) {
627 return 0;
628 } else {
629 de = tmpfs_dir_lookupbycookie(node, startcookie);
630 }
631 if (de == NULL) {
632 return EINVAL;
633 }
634
635 /*
636 * Read as much entries as possible; i.e., until we reach the end
637 * of the directory or we exhaust uio space.
638 */
639 dentp = kmem_alloc(sizeof(struct dirent), KM_SLEEP);
640 do {
641 /*
642 * Create a dirent structure representing the current
643 * inode and fill it.
644 */
645 if (de->td_node == TMPFS_NODE_WHITEOUT) {
646 dentp->d_fileno = 1;
647 dentp->d_type = DT_WHT;
648 } else {
649 dentp->d_fileno = de->td_node->tn_id;
650 switch (de->td_node->tn_type) {
651 case VBLK:
652 dentp->d_type = DT_BLK;
653 break;
654 case VCHR:
655 dentp->d_type = DT_CHR;
656 break;
657 case VDIR:
658 dentp->d_type = DT_DIR;
659 break;
660 case VFIFO:
661 dentp->d_type = DT_FIFO;
662 break;
663 case VLNK:
664 dentp->d_type = DT_LNK;
665 break;
666 case VREG:
667 dentp->d_type = DT_REG;
668 break;
669 case VSOCK:
670 dentp->d_type = DT_SOCK;
671 break;
672 default:
673 KASSERT(false);
674 }
675 }
676 dentp->d_namlen = de->td_namelen;
677 KASSERT(de->td_namelen < sizeof(dentp->d_name));
678 memcpy(dentp->d_name, de->td_name, de->td_namelen);
679 dentp->d_name[de->td_namelen] = '\0';
680 dentp->d_reclen = _DIRENT_SIZE(dentp);
681
682 /* Stop reading if the directory entry we are treating is
683 * bigger than the amount of data that can be returned. */
684 if (dentp->d_reclen > uio->uio_resid) {
685 error = -1;
686 break;
687 }
688
689 /*
690 * Copy the new dirent structure into the output buffer and
691 * advance pointers.
692 */
693 error = uiomove(dentp, dentp->d_reclen, uio);
694
695 (*cntp)++;
696 de = TAILQ_NEXT(de, td_entries);
697 } while (error == 0 && uio->uio_resid > 0 && de != NULL);
698
699 /* Update the offset and cache. */
700 if (de == NULL) {
701 uio->uio_offset = TMPFS_DIRCOOKIE_EOF;
702 node->tn_spec.tn_dir.tn_readdir_lastn = 0;
703 node->tn_spec.tn_dir.tn_readdir_lastp = NULL;
704 } else {
705 node->tn_spec.tn_dir.tn_readdir_lastn = uio->uio_offset =
706 tmpfs_dircookie(de);
707 node->tn_spec.tn_dir.tn_readdir_lastp = de;
708 }
709 node->tn_status |= TMPFS_NODE_ACCESSED;
710 kmem_free(dentp, sizeof(struct dirent));
711 return error;
712 }
713
714 /*
715 * tmpfs_reg_resize: resize the underlying UVM object associated with the
716 * specified regular file.
717 */
718 int
719 tmpfs_reg_resize(struct vnode *vp, off_t newsize)
720 {
721 tmpfs_mount_t *tmp = VFS_TO_TMPFS(vp->v_mount);
722 tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp);
723 size_t newpages, oldpages;
724 off_t oldsize;
725
726 KASSERT(vp->v_type == VREG);
727 KASSERT(newsize >= 0);
728
729 oldsize = node->tn_size;
730 oldpages = round_page(oldsize) >> PAGE_SHIFT;
731 newpages = round_page(newsize) >> PAGE_SHIFT;
732 KASSERT(oldpages == node->tn_spec.tn_reg.tn_aobj_pages);
733
734 if (newpages > oldpages) {
735 /* Increase the used-memory counter if getting extra pages. */
736 if (!tmpfs_mem_incr(tmp, (newpages - oldpages) << PAGE_SHIFT)) {
737 return ENOSPC;
738 }
739 } else if (newsize < oldsize) {
740 int zerolen = MIN(round_page(newsize), node->tn_size) - newsize;
741
742 /* Zero out the truncated part of the last page. */
743 uvm_vnp_zerorange(vp, newsize, zerolen);
744 }
745
746 node->tn_spec.tn_reg.tn_aobj_pages = newpages;
747 node->tn_size = newsize;
748 uvm_vnp_setsize(vp, newsize);
749
750 /*
751 * Free "backing store".
752 */
753 if (newpages < oldpages) {
754 struct uvm_object *uobj;
755
756 uobj = node->tn_spec.tn_reg.tn_aobj;
757
758 mutex_enter(&uobj->vmobjlock);
759 uao_dropswap_range(uobj, newpages, oldpages);
760 mutex_exit(&uobj->vmobjlock);
761
762 /* Decrease the used-memory counter. */
763 tmpfs_mem_decr(tmp, (oldpages - newpages) << PAGE_SHIFT);
764 }
765 if (newsize > oldsize) {
766 VN_KNOTE(vp, NOTE_EXTEND);
767 }
768 return 0;
769 }
770
771 /*
772 * tmpfs_chflags: change flags of the given vnode.
773 *
774 * => Caller should perform tmpfs_update().
775 */
776 int
777 tmpfs_chflags(vnode_t *vp, int flags, kauth_cred_t cred, lwp_t *l)
778 {
779 tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp);
780 kauth_action_t action = KAUTH_VNODE_WRITE_FLAGS;
781 int error, fs_decision = 0;
782
783 KASSERT(VOP_ISLOCKED(vp));
784
785 /* Disallow this operation if the file system is mounted read-only. */
786 if (vp->v_mount->mnt_flag & MNT_RDONLY)
787 return EROFS;
788
789 if (kauth_cred_geteuid(cred) != node->tn_uid) {
790 fs_decision = EACCES;
791 }
792
793 /*
794 * If the new flags have non-user flags that are different than
795 * those on the node, we need special permission to change them.
796 */
797 if ((flags & SF_SETTABLE) != (node->tn_flags & SF_SETTABLE)) {
798 action |= KAUTH_VNODE_WRITE_SYSFLAGS;
799 if (!fs_decision) {
800 fs_decision = EPERM;
801 }
802 }
803
804 /*
805 * Indicate that this node's flags have system attributes in them if
806 * that's the case.
807 */
808 if (node->tn_flags & (SF_IMMUTABLE | SF_APPEND)) {
809 action |= KAUTH_VNODE_HAS_SYSFLAGS;
810 }
811
812 error = kauth_authorize_vnode(cred, action, vp, NULL, fs_decision);
813 if (error)
814 return error;
815
816 /*
817 * Set the flags. If we're not setting non-user flags, be careful not
818 * to overwrite them.
819 *
820 * XXX: Can't we always assign here? if the system flags are different,
821 * the code above should catch attempts to change them without
822 * proper permissions, and if we're here it means it's okay to
823 * change them...
824 */
825 if ((action & KAUTH_VNODE_WRITE_SYSFLAGS) == 0) {
826 /* Clear all user-settable flags and re-set them. */
827 node->tn_flags &= SF_SETTABLE;
828 node->tn_flags |= (flags & UF_SETTABLE);
829 } else {
830 node->tn_flags = flags;
831 }
832 node->tn_status |= TMPFS_NODE_CHANGED;
833 VN_KNOTE(vp, NOTE_ATTRIB);
834 return 0;
835 }
836
837 /*
838 * tmpfs_chmod: change access mode on the given vnode.
839 *
840 * => Caller should perform tmpfs_update().
841 */
842 int
843 tmpfs_chmod(vnode_t *vp, mode_t mode, kauth_cred_t cred, lwp_t *l)
844 {
845 tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp);
846 int error;
847
848 KASSERT(VOP_ISLOCKED(vp));
849
850 /* Disallow this operation if the file system is mounted read-only. */
851 if (vp->v_mount->mnt_flag & MNT_RDONLY)
852 return EROFS;
853
854 /* Immutable or append-only files cannot be modified, either. */
855 if (node->tn_flags & (IMMUTABLE | APPEND))
856 return EPERM;
857
858 error = kauth_authorize_vnode(cred, KAUTH_VNODE_WRITE_SECURITY, vp,
859 NULL, genfs_can_chmod(vp, cred, node->tn_uid, node->tn_gid, mode));
860 if (error) {
861 return error;
862 }
863 node->tn_mode = (mode & ALLPERMS);
864 node->tn_status |= TMPFS_NODE_CHANGED;
865 VN_KNOTE(vp, NOTE_ATTRIB);
866 return 0;
867 }
868
869 /*
870 * tmpfs_chown: change ownership of the given vnode.
871 *
872 * => At least one of uid or gid must be different than VNOVAL.
873 * => Attribute is unchanged for VNOVAL case.
874 * => Caller should perform tmpfs_update().
875 */
876 int
877 tmpfs_chown(vnode_t *vp, uid_t uid, gid_t gid, kauth_cred_t cred, lwp_t *l)
878 {
879 tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp);
880 int error;
881
882 KASSERT(VOP_ISLOCKED(vp));
883
884 /* Assign default values if they are unknown. */
885 KASSERT(uid != VNOVAL || gid != VNOVAL);
886 if (uid == VNOVAL) {
887 uid = node->tn_uid;
888 }
889 if (gid == VNOVAL) {
890 gid = node->tn_gid;
891 }
892
893 /* Disallow this operation if the file system is mounted read-only. */
894 if (vp->v_mount->mnt_flag & MNT_RDONLY)
895 return EROFS;
896
897 /* Immutable or append-only files cannot be modified, either. */
898 if (node->tn_flags & (IMMUTABLE | APPEND))
899 return EPERM;
900
901 error = kauth_authorize_vnode(cred, KAUTH_VNODE_CHANGE_OWNERSHIP, vp,
902 NULL, genfs_can_chown(vp, cred, node->tn_uid, node->tn_gid, uid,
903 gid));
904 if (error) {
905 return error;
906 }
907 node->tn_uid = uid;
908 node->tn_gid = gid;
909 node->tn_status |= TMPFS_NODE_CHANGED;
910 VN_KNOTE(vp, NOTE_ATTRIB);
911 return 0;
912 }
913
914 /*
915 * tmpfs_chsize: change size of the given vnode.
916 */
917 int
918 tmpfs_chsize(vnode_t *vp, u_quad_t size, kauth_cred_t cred, lwp_t *l)
919 {
920 tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp);
921
922 KASSERT(VOP_ISLOCKED(vp));
923
924 /* Decide whether this is a valid operation based on the file type. */
925 switch (vp->v_type) {
926 case VDIR:
927 return EISDIR;
928 case VREG:
929 if (vp->v_mount->mnt_flag & MNT_RDONLY) {
930 return EROFS;
931 }
932 break;
933 case VBLK:
934 case VCHR:
935 case VFIFO:
936 /*
937 * Allow modifications of special files even if in the file
938 * system is mounted read-only (we are not modifying the
939 * files themselves, but the objects they represent).
940 */
941 return 0;
942 default:
943 return EOPNOTSUPP;
944 }
945
946 /* Immutable or append-only files cannot be modified, either. */
947 if (node->tn_flags & (IMMUTABLE | APPEND)) {
948 return EPERM;
949 }
950
951 /* Note: tmpfs_truncate() will raise NOTE_EXTEND and NOTE_ATTRIB. */
952 return tmpfs_truncate(vp, size);
953 }
954
955 /*
956 * tmpfs_chtimes: change access and modification times for vnode.
957 */
958 int
959 tmpfs_chtimes(vnode_t *vp, const struct timespec *atime,
960 const struct timespec *mtime, const struct timespec *btime,
961 int vaflags, kauth_cred_t cred, lwp_t *l)
962 {
963 tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp);
964 int error;
965
966 KASSERT(VOP_ISLOCKED(vp));
967
968 /* Disallow this operation if the file system is mounted read-only. */
969 if (vp->v_mount->mnt_flag & MNT_RDONLY)
970 return EROFS;
971
972 /* Immutable or append-only files cannot be modified, either. */
973 if (node->tn_flags & (IMMUTABLE | APPEND))
974 return EPERM;
975
976 error = kauth_authorize_vnode(cred, KAUTH_VNODE_WRITE_TIMES, vp, NULL,
977 genfs_can_chtimes(vp, vaflags, node->tn_uid, cred));
978 if (error)
979 return error;
980
981 if (atime->tv_sec != VNOVAL && atime->tv_nsec != VNOVAL)
982 node->tn_status |= TMPFS_NODE_ACCESSED;
983
984 if (mtime->tv_sec != VNOVAL && mtime->tv_nsec != VNOVAL)
985 node->tn_status |= TMPFS_NODE_MODIFIED;
986
987 if (btime->tv_sec == VNOVAL && btime->tv_nsec == VNOVAL)
988 btime = NULL;
989
990 tmpfs_update(vp, atime, mtime, btime, 0);
991 VN_KNOTE(vp, NOTE_ATTRIB);
992 return 0;
993 }
994
995 /*
996 * tmpfs_update: update timestamps, et al.
997 */
998 void
999 tmpfs_update(vnode_t *vp, const struct timespec *acc,
1000 const struct timespec *mod, const struct timespec *birth, int flags)
1001 {
1002 tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp);
1003 struct timespec nowtm;
1004
1005 KASSERT(VOP_ISLOCKED(vp));
1006
1007 if (flags & UPDATE_CLOSE) {
1008 /* XXX Need to do anything special? */
1009 }
1010 if ((node->tn_status & TMPFS_NODE_STATUSALL) == 0) {
1011 return;
1012 }
1013 if (birth != NULL) {
1014 node->tn_birthtime = *birth;
1015 }
1016 vfs_timestamp(&nowtm);
1017
1018 if (node->tn_status & TMPFS_NODE_ACCESSED) {
1019 node->tn_atime = acc ? *acc : nowtm;
1020 }
1021 if (node->tn_status & TMPFS_NODE_MODIFIED) {
1022 node->tn_mtime = mod ? *mod : nowtm;
1023 }
1024 if (node->tn_status & TMPFS_NODE_CHANGED) {
1025 node->tn_ctime = nowtm;
1026 }
1027
1028 node->tn_status &= ~TMPFS_NODE_STATUSALL;
1029 }
1030
1031 int
1032 tmpfs_truncate(vnode_t *vp, off_t length)
1033 {
1034 tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp);
1035 int error;
1036
1037 if (length < 0) {
1038 error = EINVAL;
1039 goto out;
1040 }
1041 if (node->tn_size == length) {
1042 error = 0;
1043 goto out;
1044 }
1045 error = tmpfs_reg_resize(vp, length);
1046 if (error == 0) {
1047 node->tn_status |= TMPFS_NODE_CHANGED | TMPFS_NODE_MODIFIED;
1048 }
1049 out:
1050 tmpfs_update(vp, NULL, NULL, NULL, 0);
1051 return error;
1052 }
1053