tmpfs_subr.c revision 1.3 1 /* $NetBSD: tmpfs_subr.c,v 1.3 2005/09/12 16:55:01 christos Exp $ */
2
3 /*
4 * Copyright (c) 2005 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Julio M. Merino Vidal.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 * must display the following acknowledgement:
20 * This product includes software developed by the NetBSD
21 * Foundation, Inc. and its contributors.
22 * 4. Neither the name of The NetBSD Foundation nor the names of its
23 * contributors may be used to endorse or promote products derived
24 * from this software without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
27 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 * POSSIBILITY OF SUCH DAMAGE.
37 */
38
39 /*
40 * Efficient memory file system supporting functions.
41 */
42
43 #include <sys/cdefs.h>
44 __KERNEL_RCSID(0, "$NetBSD: tmpfs_subr.c,v 1.3 2005/09/12 16:55:01 christos Exp $");
45
46 #include <sys/param.h>
47 #include <sys/dirent.h>
48 #include <sys/event.h>
49 #include <sys/malloc.h>
50 #include <sys/mount.h>
51 #include <sys/namei.h>
52 #include <sys/time.h>
53 #include <sys/stat.h>
54 #include <sys/systm.h>
55 #include <sys/swap.h>
56 #include <sys/vnode.h>
57
58 #include <uvm/uvm.h>
59
60 #include <miscfs/specfs/specdev.h>
61 #include <fs/tmpfs/tmpfs.h>
62 #include <fs/tmpfs/tmpfs_fifoops.h>
63 #include <fs/tmpfs/tmpfs_specops.h>
64 #include <fs/tmpfs/tmpfs_vnops.h>
65
66 /* --------------------------------------------------------------------- */
67
68 int
69 tmpfs_alloc_node(struct tmpfs_mount *tmp, enum vtype type,
70 uid_t uid, gid_t gid, mode_t mode, struct tmpfs_node *parent,
71 char *target, dev_t rdev, struct proc *p, struct tmpfs_node **node)
72 {
73 struct tmpfs_node *nnode;
74
75 /* If the root directory of the 'tmp' file system is not yet
76 * allocated, this must be the request to do it. */
77 KASSERT(IMPLIES(tmp->tm_root == NULL, parent == NULL && type == VDIR));
78
79 KASSERT(IFF(type == VLNK, target != NULL));
80 KASSERT(IFF(type == VBLK || type == VCHR, rdev != VNOVAL));
81
82 KASSERT(uid != VNOVAL && gid != VNOVAL && mode != VNOVAL);
83
84 nnode = NULL;
85 if (LIST_EMPTY(&tmp->tm_nodes_avail)) {
86 KASSERT(tmp->tm_nodes_last <= tmp->tm_nodes_max);
87 if (tmp->tm_nodes_last == tmp->tm_nodes_max)
88 return ENOSPC;
89
90 nnode =
91 (struct tmpfs_node *)TMPFS_POOL_GET(&tmp->tm_node_pool, 0);
92 if (nnode == NULL)
93 return ENOSPC;
94 nnode->tn_id = tmp->tm_nodes_last++;
95 nnode->tn_gen = 0;
96 } else {
97 nnode = LIST_FIRST(&tmp->tm_nodes_avail);
98 LIST_REMOVE(nnode, tn_entries);
99 nnode->tn_gen++;
100 }
101 KASSERT(nnode != NULL);
102 LIST_INSERT_HEAD(&tmp->tm_nodes_used, nnode, tn_entries);
103
104 /* Generic initialization. */
105 nnode->tn_type = type;
106 nnode->tn_size = 0;
107 nnode->tn_status = 0;
108 nnode->tn_flags = 0;
109 nnode->tn_links = 0;
110 (void)nanotime(&nnode->tn_atime);
111 nnode->tn_birthtime = nnode->tn_ctime = nnode->tn_mtime =
112 nnode->tn_atime;
113 nnode->tn_uid = uid;
114 nnode->tn_gid = gid;
115 nnode->tn_mode = mode;
116 nnode->tn_vnode = NULL;
117
118 /* Type-specific initialization. */
119 switch (nnode->tn_type) {
120 case VBLK:
121 case VCHR:
122 nnode->tn_rdev = rdev;
123 break;
124
125 case VDIR:
126 TAILQ_INIT(&nnode->tn_dir);
127 nnode->tn_parent = (parent == NULL) ? nnode : parent;
128 nnode->tn_readdir_lastn = 0;
129 nnode->tn_readdir_lastp = NULL;
130 nnode->tn_links++;
131 nnode->tn_parent->tn_links++;
132 break;
133
134 case VFIFO:
135 /* FALLTHROUGH */
136 case VSOCK:
137 break;
138
139 case VLNK:
140 KASSERT(strlen(target) < MAXPATHLEN);
141 nnode->tn_link = tmpfs_str_pool_get(&tmp->tm_str_pool,
142 strlen(target), 0);
143 if (nnode->tn_link == NULL) {
144 nnode->tn_type = VNON;
145 tmpfs_free_node(tmp, nnode);
146 return ENOSPC;
147 }
148 strcpy(nnode->tn_link, target);
149 nnode->tn_size = strlen(target);
150 break;
151
152 case VREG:
153 nnode->tn_aobj = NULL;
154 nnode->tn_aobj_pages = 0;
155 nnode->tn_va = 0;
156 break;
157
158 default:
159 KASSERT(0);
160 }
161
162 *node = nnode;
163 return 0;
164 }
165
166 /* --------------------------------------------------------------------- */
167
168 void
169 tmpfs_free_node(struct tmpfs_mount *tmp, struct tmpfs_node *node)
170 {
171 ino_t id;
172 unsigned long gen;
173 size_t pages;
174
175 switch (node->tn_type) {
176 case VNON:
177 /* Do not do anything. VNON is provided to let the
178 * allocation routine clean itself easily by avoiding
179 * duplicating code in it. */
180 /* FALLTHROUGH */
181 case VBLK:
182 /* FALLTHROUGH */
183 case VCHR:
184 /* FALLTHROUGH */
185 case VDIR:
186 /* FALLTHROUGH */
187 case VFIFO:
188 /* FALLTHROUGH */
189 case VSOCK:
190 pages = 0;
191 break;
192
193 case VLNK:
194 tmpfs_str_pool_put(&tmp->tm_str_pool, node->tn_link,
195 strlen(node->tn_link));
196 pages = 0;
197 break;
198
199 case VREG:
200 if (node->tn_aobj != NULL)
201 uao_detach(node->tn_aobj);
202 pages = node->tn_aobj_pages;
203 break;
204
205 default:
206 KASSERT(0);
207 pages = 0; /* Shut up gcc when !DIAGNOSTIC. */
208 break;
209 }
210
211 tmp->tm_pages_used -= pages;
212
213 LIST_REMOVE(node, tn_entries);
214 id = node->tn_id;
215 gen = node->tn_gen;
216 memset(node, 0, sizeof(struct tmpfs_node));
217 node->tn_id = id;
218 node->tn_type = VNON;
219 node->tn_gen = gen;
220 LIST_INSERT_HEAD(&tmp->tm_nodes_avail, node, tn_entries);
221 }
222
223 /* --------------------------------------------------------------------- */
224
225 int
226 tmpfs_alloc_dirent(struct tmpfs_mount *tmp, struct tmpfs_node *node,
227 const char *name, uint16_t len, struct tmpfs_dirent **de)
228 {
229 struct tmpfs_dirent *nde;
230
231 nde = (struct tmpfs_dirent *)TMPFS_POOL_GET(&tmp->tm_dirent_pool, 0);
232 if (nde == NULL)
233 return ENOSPC;
234
235 nde->td_name = tmpfs_str_pool_get(&tmp->tm_str_pool, len, 0);
236 if (nde->td_name == NULL) {
237 TMPFS_POOL_PUT(&tmp->tm_dirent_pool, nde);
238 return ENOSPC;
239 }
240 nde->td_namelen = len;
241 memcpy(nde->td_name, name, len);
242 nde->td_node = node;
243
244 node->tn_links++;
245 *de = nde;
246
247 return 0;
248 }
249
250 /* --------------------------------------------------------------------- */
251
252 void
253 tmpfs_free_dirent(struct tmpfs_mount *tmp, struct tmpfs_dirent *de,
254 boolean_t node_exists)
255 {
256 if (node_exists) {
257 struct tmpfs_node *node;
258
259 node = de->td_node;
260
261 KASSERT(node->tn_links > 0);
262 node->tn_links--;
263 }
264
265 tmpfs_str_pool_put(&tmp->tm_str_pool, de->td_name, de->td_namelen);
266 TMPFS_POOL_PUT(&tmp->tm_dirent_pool, de);
267 }
268
269 /* --------------------------------------------------------------------- */
270
271 int
272 tmpfs_alloc_vp(struct mount *mp, struct tmpfs_node *node, struct vnode **vpp)
273 {
274 int error;
275 struct vnode *nvp;
276 struct vnode *vp;
277
278 vp = NULL;
279
280 if (node->tn_vnode != NULL) {
281 vp = node->tn_vnode;
282 vget(vp, LK_EXCLUSIVE | LK_RETRY);
283 error = 0;
284 goto out;
285 }
286
287 /* Get a new vnode and associate it with our node. */
288 error = getnewvnode(VT_TMPFS, mp, tmpfs_vnodeop_p, &vp);
289 if (error != 0)
290 goto out;
291 KASSERT(vp != NULL);
292
293 error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
294 if (error != 0) {
295 vp->v_data = NULL;
296 ungetnewvnode(vp);
297 vp = NULL;
298 goto out;
299 }
300
301 vp->v_data = node;
302 vp->v_type = node->tn_type;
303
304 /* Type-specific initialization. */
305 switch (node->tn_type) {
306 case VBLK:
307 /* FALLTHROUGH */
308 case VCHR:
309 vp->v_op = tmpfs_specop_p;
310 nvp = checkalias(vp, node->tn_rdev, mp);
311 if (nvp != NULL) {
312 /* Discard unneeded vnode, but save its inode. */
313 nvp->v_data = vp->v_data;
314 vp->v_data = NULL;
315
316 /* XXX spec_vnodeops has no locking, so we have to
317 * do it explicitly. */
318 VOP_UNLOCK(vp, 0);
319 vp->v_op = spec_vnodeop_p;
320 vp->v_flag &= ~VLOCKSWORK;
321 vrele(vp);
322 vgone(vp);
323
324 /* Reinitialize aliased node. */
325 vp = nvp;
326 error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
327 if (error != 0) {
328 vp->v_data = NULL;
329 vp = NULL;
330 goto out;
331 }
332 }
333 break;
334
335 case VDIR:
336 vp->v_flag = node->tn_parent == node ? VROOT : 0;
337 break;
338
339 case VFIFO:
340 vp->v_op = tmpfs_fifoop_p;
341 break;
342
343 case VLNK:
344 /* FALLTHROUGH */
345 case VREG:
346 /* FALLTHROUGH */
347 case VSOCK:
348 break;
349
350 default:
351 KASSERT(0);
352 }
353
354 uvm_vnp_setsize(vp, node->tn_size);
355
356 error = 0;
357
358 out:
359 *vpp = node->tn_vnode = vp;
360
361 KASSERT(IFF(error == 0, *vpp != NULL && VOP_ISLOCKED(*vpp)));
362 KASSERT(*vpp == node->tn_vnode);
363
364 return error;
365 }
366
367 /* --------------------------------------------------------------------- */
368
369 void
370 tmpfs_free_vp(struct vnode *vp)
371 {
372 struct tmpfs_node *node;
373
374 node = VP_TO_TMPFS_NODE(vp);
375
376 node->tn_vnode = NULL;
377 vp->v_data = NULL;
378 }
379
380 /* --------------------------------------------------------------------- */
381
382 /* Allocates a new file of type 'type' and adds it to the parent directory
383 * 'dvp'; this addition is done using the component name given in 'cnp'.
384 * The ownership of the new file is automatically assigned based on the
385 * credentials of the caller (through 'cnp'), the group is set based on
386 * the parent directory and the mode is determined from the 'vap' argument.
387 * If successful, *vpp holds a vnode to the newly created file and zero
388 * is returned. Otherwise *vpp is NULL and the function returns an
389 * appropriate error code .*/
390 int
391 tmpfs_alloc_file(struct vnode *dvp, struct vnode **vpp, struct vattr *vap,
392 struct componentname *cnp, char *target)
393 {
394 int error;
395 struct tmpfs_dirent *de;
396 struct tmpfs_mount *tmp;
397 struct tmpfs_node *dnode;
398 struct tmpfs_node *node;
399 struct tmpfs_node *parent;
400
401 KASSERT(VOP_ISLOCKED(dvp));
402 KASSERT(cnp->cn_flags & HASBUF);
403
404 tmp = VFS_TO_TMPFS(dvp->v_mount);
405 dnode = VP_TO_TMPFS_DIR(dvp);
406 *vpp = NULL;
407
408 /* If the entry we are creating is a directory, we cannot overflow
409 * the number of links of its parent, because it will get a new
410 * link. */
411 if (vap->va_type == VDIR) {
412 /* Ensure that we do not overflow the maximum number of links
413 * imposed by the system. */
414 KASSERT(dnode->tn_links <= LINK_MAX);
415 if (dnode->tn_links == LINK_MAX) {
416 error = EMLINK;
417 goto out;
418 }
419
420 parent = dnode;
421 } else
422 parent = NULL;
423
424 /* Allocate a node that represents the new file. */
425 error = tmpfs_alloc_node(tmp, vap->va_type, cnp->cn_cred->cr_uid,
426 dnode->tn_gid, vap->va_mode, parent, target, vap->va_rdev,
427 cnp->cn_proc, &node);
428 if (error != 0)
429 goto out;
430
431 /* Allocate a directory entry that points to the new file. */
432 error = tmpfs_alloc_dirent(tmp, node, cnp->cn_nameptr, cnp->cn_namelen,
433 &de);
434 if (error != 0) {
435 tmpfs_free_node(tmp, node);
436 goto out;
437 }
438
439 /* Allocate a vnode for the new file. */
440 error = tmpfs_alloc_vp(dvp->v_mount, node, vpp);
441 if (error != 0) {
442 tmpfs_free_dirent(tmp, de, TRUE);
443 tmpfs_free_node(tmp, node);
444 goto out;
445 }
446
447 /* Now that all required items are allocated, we can proceed to
448 * insert the new node into the directory, an operation that
449 * cannot fail. */
450 tmpfs_dir_attach(dvp, de);
451 VN_KNOTE(dvp, NOTE_WRITE);
452
453 out:
454 if (error != 0 || !(cnp->cn_flags & SAVESTART))
455 PNBUF_PUT(cnp->cn_pnbuf);
456 vput(dvp);
457
458 KASSERT(!VOP_ISLOCKED(dvp));
459 KASSERT(IFF(error == 0, *vpp != NULL));
460
461 return error;
462 }
463
464 /* --------------------------------------------------------------------- */
465
466 void
467 tmpfs_dir_attach(struct vnode *vp, struct tmpfs_dirent *de)
468 {
469 struct tmpfs_node *dnode;
470
471 dnode = VP_TO_TMPFS_DIR(vp);
472
473 TAILQ_INSERT_TAIL(&dnode->tn_dir, de, td_entries);
474 dnode->tn_size += sizeof(struct tmpfs_dirent);
475 dnode->tn_status |= TMPFS_NODE_ACCESSED | TMPFS_NODE_CHANGED | \
476 TMPFS_NODE_MODIFIED;
477 uvm_vnp_setsize(vp, dnode->tn_size);
478 }
479
480 /* --------------------------------------------------------------------- */
481
482 void
483 tmpfs_dir_detach(struct vnode *vp, struct tmpfs_dirent *de)
484 {
485 struct tmpfs_node *dnode;
486
487 dnode = VP_TO_TMPFS_DIR(vp);
488
489 TAILQ_REMOVE(&dnode->tn_dir, de, td_entries);
490 dnode->tn_size -= sizeof(struct tmpfs_dirent);
491 dnode->tn_status |= TMPFS_NODE_ACCESSED | TMPFS_NODE_CHANGED | \
492 TMPFS_NODE_MODIFIED;
493 uvm_vnp_setsize(vp, dnode->tn_size);
494 }
495
496 /* --------------------------------------------------------------------- */
497
498 struct tmpfs_dirent *
499 tmpfs_dir_lookup(struct tmpfs_node *node, struct componentname *cnp)
500 {
501 boolean_t found;
502 struct tmpfs_dirent *de;
503
504 KASSERT(IMPLIES(cnp->cn_namelen == 1, cnp->cn_nameptr[0] != '.'));
505 KASSERT(IMPLIES(cnp->cn_namelen == 2, !(cnp->cn_nameptr[0] == '.' &&
506 cnp->cn_nameptr[1] == '.')));
507 TMPFS_VALIDATE_DIR(node);
508
509 node->tn_status |= TMPFS_NODE_ACCESSED;
510
511 found = 0;
512 TAILQ_FOREACH(de, &node->tn_dir, td_entries) {
513 KASSERT(cnp->cn_namelen < 0xffff);
514 if (de->td_namelen == (uint16_t)cnp->cn_namelen &&
515 memcmp(de->td_name, cnp->cn_nameptr, de->td_namelen) == 0) {
516 found = 1;
517 break;
518 }
519 }
520
521 return found ? de : NULL;
522 }
523
524 /* --------------------------------------------------------------------- */
525
526 /* Helper function for tmpfs_readdir. Creates a '.' entry for the given
527 * directory and returns it in the uio space. The function returns 0
528 * on success, -1 if there was not enough space in the uio structure to
529 * hold the directory entry or an appropriate error code if another
530 * error happens. */
531 int
532 tmpfs_dir_getdotdent(struct tmpfs_node *node, struct uio *uio)
533 {
534 int error;
535 struct dirent dent;
536
537 TMPFS_VALIDATE_DIR(node);
538 KASSERT(uio->uio_offset == 0);
539
540 dent.d_fileno = node->tn_id;
541 dent.d_type = DT_DIR;
542 dent.d_namlen = 1;
543 dent.d_name[0] = '.';
544 dent.d_name[1] = '\0';
545 dent.d_reclen = _DIRENT_SIZE(&dent);
546
547 if (dent.d_reclen > uio->uio_resid)
548 error = -1;
549 else {
550 error = uiomove(&dent, dent.d_reclen, uio);
551 if (error == 0)
552 uio->uio_offset += sizeof(struct tmpfs_dirent) - \
553 dent.d_reclen;
554 }
555
556 node->tn_status |= TMPFS_NODE_ACCESSED;
557
558 return error;
559 }
560
561 /* --------------------------------------------------------------------- */
562
563 /* Helper function for tmpfs_readdir. Creates a '..' entry for the given
564 * directory and returns it in the uio space. The function returns 0
565 * on success, -1 if there was not enough space in the uio structure to
566 * hold the directory entry or an appropriate error code if another
567 * error happens. */
568 int
569 tmpfs_dir_getdotdotdent(struct tmpfs_node *node, struct uio *uio)
570 {
571 int error;
572 struct dirent dent;
573
574 TMPFS_VALIDATE_DIR(node);
575 KASSERT(uio->uio_offset == sizeof(struct tmpfs_dirent));
576
577 dent.d_fileno = node->tn_id;
578 dent.d_type = DT_DIR;
579 dent.d_namlen = 2;
580 dent.d_name[0] = '.';
581 dent.d_name[1] = '.';
582 dent.d_name[2] = '\0';
583 dent.d_reclen = _DIRENT_SIZE(&dent);
584
585 if (dent.d_reclen > uio->uio_resid)
586 error = -1;
587 else {
588 error = uiomove(&dent, dent.d_reclen, uio);
589 if (error == 0)
590 uio->uio_offset += sizeof(struct tmpfs_dirent) - \
591 dent.d_reclen;
592 }
593
594 node->tn_status |= TMPFS_NODE_ACCESSED;
595
596 return error;
597 }
598
599 /* --------------------------------------------------------------------- */
600
601 /* Helper function for tmpfs_readdir. Returns as much directory entries
602 * as can fit in the uio space. The read starts at uio->uio_offset.
603 * The function returns 0 on success, -1 if there was not enough space
604 * in the uio structure to hold the directory entry or an appropriate
605 * error code if another error happens. */
606 int
607 tmpfs_dir_getdents(struct tmpfs_node *node, struct uio *uio)
608 {
609 int error;
610 long cnt, startcnt;
611 struct tmpfs_dirent *de;
612
613 TMPFS_VALIDATE_DIR(node);
614 KASSERT(uio->uio_offset % sizeof(struct tmpfs_dirent) == 0);
615 KASSERT(uio->uio_offset >= sizeof(struct tmpfs_dirent) * 2);
616 KASSERT(uio->uio_offset < node->tn_size +
617 sizeof(struct tmpfs_dirent) * 2);
618
619 /* Locate the first directory entry we have to return. We have cached
620 * the last readdir in the node, so use those values if appropriate.
621 * Otherwise do a linear scan to find the requested entry. */
622 de = NULL;
623 startcnt = uio->uio_offset / sizeof(struct tmpfs_dirent) - 2;
624 if (startcnt == node->tn_readdir_lastn && \
625 node->tn_readdir_lastp != NULL) {
626 cnt = node->tn_readdir_lastn;
627 de = node->tn_readdir_lastp;
628 } else {
629 cnt = 0;
630 de = TAILQ_FIRST(&node->tn_dir);
631 while (cnt < startcnt) {
632 cnt++;
633 de = TAILQ_NEXT(de, td_entries);
634
635 /* Ensure that if we have not found the desired item,
636 * there are more entries in the directory to continue
637 * the search. */
638 KASSERT(IMPLIES(de == TAILQ_LAST(&node->tn_dir,
639 tmpfs_dir), cnt == startcnt));
640 }
641 }
642 KASSERT(cnt == startcnt);
643 KASSERT(de != NULL);
644
645 /* Read as much entries as possible; i.e., until we reach the end of
646 * the directory or we exhaust uio space. */
647 do {
648 struct dirent d;
649
650 /* Create a dirent structure representing the current
651 * tmpfs_node and fill it. */
652 d.d_fileno = de->td_node->tn_id;
653 switch (de->td_node->tn_type) {
654 case VBLK:
655 d.d_type = DT_BLK;
656 break;
657
658 case VCHR:
659 d.d_type = DT_CHR;
660 break;
661
662 case VDIR:
663 d.d_type = DT_DIR;
664 break;
665
666 case VFIFO:
667 d.d_type = DT_FIFO;
668 break;
669
670 case VLNK:
671 d.d_type = DT_LNK;
672 break;
673
674 case VREG:
675 d.d_type = DT_REG;
676 break;
677
678 case VSOCK:
679 d.d_type = DT_SOCK;
680 break;
681
682 default:
683 KASSERT(0);
684 }
685 d.d_namlen = de->td_namelen;
686 KASSERT(de->td_namelen < sizeof(d.d_name));
687 (void)memcpy(d.d_name, de->td_name, de->td_namelen);
688 d.d_name[de->td_namelen] = '\0';
689 d.d_reclen = _DIRENT_SIZE(&d);
690
691 /* Stop reading if the directory entry we are treating is
692 * bigger than the amount of data that can be returned. */
693 if (d.d_reclen > uio->uio_resid) {
694 error = -1;
695 break;
696 }
697
698 /* Copy the new dirent structure into the output buffer and
699 * advance pointers. */
700 error = uiomove(&d, d.d_reclen, uio);
701
702 cnt++;
703 de = TAILQ_NEXT(de, td_entries);
704 } while (error == 0 && uio->uio_resid > 0 && de != NULL);
705
706 /* Update the offset in the uio structure to be correctly aligned
707 * with tmpfs_dirent structures. Otherwise, the offset is the
708 * size of the returned dirent structures, which is useless for us. */
709 uio->uio_offset = (cnt + 2) * sizeof(struct tmpfs_dirent);
710
711 /* Cache the current status. */
712 if (de == NULL) {
713 KASSERT(cnt == node->tn_size / sizeof(struct tmpfs_dirent));
714 node->tn_readdir_lastn = 0;
715 node->tn_readdir_lastp = NULL;
716 } else {
717 node->tn_readdir_lastn = cnt;
718 node->tn_readdir_lastp = de;
719 }
720
721 node->tn_status |= TMPFS_NODE_ACCESSED;
722
723 return error;
724 }
725
726 /* --------------------------------------------------------------------- */
727
728 int
729 tmpfs_reg_resize(struct vnode *vp, off_t newsize)
730 {
731 int error;
732 size_t newpages, oldpages;
733 struct tmpfs_mount *tmp;
734 struct tmpfs_node *node;
735
736 KASSERT(vp->v_type == VREG);
737 KASSERT(newsize >= 0);
738 KASSERT(newsize != vp->v_size);
739
740 node = VP_TO_TMPFS_NODE(vp);
741 tmp = VFS_TO_TMPFS(vp->v_mount);
742
743 /* Convert the old and new sizes to the number of pages needed to
744 * store them. It may happen that we do not need to do anything
745 * because the last allocated page can accommodate the change on
746 * its own. */
747 oldpages = round_page(node->tn_size) / PAGE_SIZE;
748 KASSERT(oldpages == node->tn_aobj_pages);
749 newpages = round_page(newsize) / PAGE_SIZE;
750
751 if (newpages > oldpages &&
752 newpages - oldpages > TMPFS_PAGES_AVAIL(tmp)) {
753 error = ENOSPC;
754 goto out;
755 }
756
757 if (newpages == 0) {
758 uao_detach(node->tn_aobj);
759 node->tn_aobj = NULL;
760 node->tn_aobj_pages = 0;
761 node->tn_va = 0;
762 } else if (newpages > oldpages) {
763 vaddr_t va;
764 struct uvm_object *aobj;
765
766 aobj = uao_create(newpages * PAGE_SIZE, 0);
767 va = vm_map_min(kernel_map);
768 error = uvm_map(kernel_map, &va, newpages * PAGE_SIZE,
769 aobj, 0, 0,
770 UVM_MAPFLAG(UVM_PROT_RW, UVM_PROT_RW, UVM_INH_NONE,
771 UVM_ADV_RANDOM, 0));
772 if (error != 0) {
773 uao_detach(aobj);
774 error = ENOSPC;
775 goto out;
776 }
777
778 /* XXX This is really expensive. Is it possible to do a
779 * map entry passing? */
780 if (node->tn_size > 0) {
781 KASSERT(node->tn_aobj != NULL);
782 (void)memcpy((void *)va, (void *)node->tn_va,
783 node->tn_size);
784 uao_detach(node->tn_aobj);
785 }
786
787 node->tn_aobj = aobj;
788 node->tn_aobj_pages = newpages;
789 node->tn_va = va;
790 } else if (newpages < oldpages) {
791 /* XXX Do we need to shrink the aobj or is the unmap enough? */
792 uvm_unmap(kernel_map, node->tn_va + (vaddr_t)newpages,
793 (vaddr_t)node->tn_aobj_pages * PAGE_SIZE);
794 node->tn_aobj_pages = newpages;
795 }
796
797 tmp->tm_pages_used += (newpages - oldpages);
798 node->tn_size = newsize;
799 uvm_vnp_setsize(vp, newsize);
800
801 error = 0;
802
803 out:
804 return error;
805 }
806
807 /* --------------------------------------------------------------------- */
808
809 /* Returns information about the number of available memory pages,
810 * including physical and virtual ones.
811 *
812 * If 'total' is TRUE, the value returned is the total amount of memory
813 * pages configured for the system (either in use or free).
814 * If it is FALSE, the value returned is the amount of free memory pages.
815 *
816 * Remember to remove TMPFS_PAGES_RESERVED from the returned value to avoid
817 * excessive memory usage.
818 *
819 * XXX: This function is used every time TMPFS_PAGES_MAX is called to gather
820 * the amount of free memory, something that happens during _each_
821 * object allocation. The time it takes to run this function so many
822 * times is not negligible, so this value should be stored as an
823 * aggregate somewhere, possibly within UVM (we cannot do it ourselves
824 * because we can't get notifications on memory usage changes). */
825 size_t
826 tmpfs_mem_info(boolean_t total)
827 {
828 int i, sec;
829 register_t retval;
830 size_t size;
831 struct swapent *sep;
832
833 sec = uvmexp.nswapdev;
834 sep = (struct swapent *)malloc(sizeof(struct swapent) * sec, M_TEMP,
835 M_WAITOK);
836 KASSERT(sep != NULL);
837 uvm_swap_stats(SWAP_STATS, sep, sec, &retval);
838 KASSERT(retval == sec);
839
840 size = 0;
841 if (total) {
842 for (i = 0; i < sec; i++)
843 size += dbtob(sep[i].se_nblks) / PAGE_SIZE;
844 } else {
845 for (i = 0; i < sec; i++)
846 size += dbtob(sep[i].se_nblks - sep[i].se_inuse) /
847 PAGE_SIZE;
848 }
849 size += uvmexp.free;
850
851 free(sep, M_TEMP);
852
853 return size;
854 }
855
856 /* --------------------------------------------------------------------- */
857
858 /* Change flags of the given vnode.
859 * Caller should execute VOP_UPDATE on vp after a successful execution.
860 * The vnode must be locked on entry and remain locked on exit. */
861 int
862 tmpfs_chflags(struct vnode *vp, int flags, struct ucred *cred, struct proc *p)
863 {
864 int error;
865 struct tmpfs_node *node;
866
867 KASSERT(VOP_ISLOCKED(vp));
868
869 node = VP_TO_TMPFS_NODE(vp);
870
871 /* Disallow this operation if the file system is mounted read-only. */
872 if (vp->v_mount->mnt_flag & MNT_RDONLY)
873 return EROFS;
874
875 /* XXX: The following comes from UFS code, and can be found in
876 * several other file systems. Shouldn't this be centralized
877 * somewhere? */
878 if (cred->cr_uid != node->tn_uid &&
879 (error = suser(cred, &p->p_acflag)))
880 return error;
881 if (cred->cr_uid == 0) {
882 /* The super-user is only allowed to change flags if the file
883 * wasn't protected before and the securelevel is zero. */
884 if ((node->tn_flags & (SF_IMMUTABLE | SF_APPEND)) &&
885 securelevel > 0)
886 return EPERM;
887 node->tn_flags = flags;
888 } else {
889 /* Regular users can change flags provided they only want to
890 * change user-specific ones, not those reserved for the
891 * super-user. */
892 if ((node->tn_flags & (SF_IMMUTABLE | SF_APPEND)) ||
893 (flags & UF_SETTABLE) != flags)
894 return EPERM;
895 if ((node->tn_flags & SF_SETTABLE) != (flags & SF_SETTABLE))
896 return EPERM;
897 node->tn_flags &= SF_SETTABLE;
898 node->tn_flags |= (flags & UF_SETTABLE);
899 }
900
901 node->tn_status |= TMPFS_NODE_CHANGED;
902 VN_KNOTE(vp, NOTE_ATTRIB);
903
904 KASSERT(VOP_ISLOCKED(vp));
905
906 return 0;
907 }
908
909 /* --------------------------------------------------------------------- */
910
911 /* Change access mode on the given vnode.
912 * Caller should execute VOP_UPDATE on vp after a successful execution.
913 * The vnode must be locked on entry and remain locked on exit. */
914 int
915 tmpfs_chmod(struct vnode *vp, mode_t mode, struct ucred *cred, struct proc *p)
916 {
917 int error;
918 struct tmpfs_node *node;
919
920 KASSERT(VOP_ISLOCKED(vp));
921
922 node = VP_TO_TMPFS_NODE(vp);
923
924 /* Disallow this operation if the file system is mounted read-only. */
925 if (vp->v_mount->mnt_flag & MNT_RDONLY)
926 return EROFS;
927
928 /* Immutable or append-only files cannot be modified, either. */
929 if (node->tn_flags & (IMMUTABLE | APPEND))
930 return EPERM;
931
932 /* XXX: The following comes from UFS code, and can be found in
933 * several other file systems. Shouldn't this be centralized
934 * somewhere? */
935 if (cred->cr_uid != node->tn_uid &&
936 (error = suser(cred, &p->p_acflag)))
937 return error;
938 if (cred->cr_uid != 0) {
939 if (vp->v_type != VDIR && (mode & S_ISTXT))
940 return EFTYPE;
941
942 if (!groupmember(node->tn_gid, cred) && (mode & S_ISGID))
943 return EPERM;
944 }
945
946 node->tn_mode = (mode & ALLPERMS);
947
948 node->tn_status |= TMPFS_NODE_CHANGED;
949 VN_KNOTE(vp, NOTE_ATTRIB);
950
951 KASSERT(VOP_ISLOCKED(vp));
952
953 return 0;
954 }
955
956 /* --------------------------------------------------------------------- */
957
958 /* Change ownership of the given vnode. At least one of uid or gid must
959 * be different than VNOVAL. If one is set to that value, the attribute
960 * is unchanged.
961 * Caller should execute VOP_UPDATE on vp after a successful execution.
962 * The vnode must be locked on entry and remain locked on exit. */
963 int
964 tmpfs_chown(struct vnode *vp, uid_t uid, gid_t gid, struct ucred *cred,
965 struct proc *p)
966 {
967 int error;
968 struct tmpfs_node *node;
969
970 KASSERT(VOP_ISLOCKED(vp));
971
972 node = VP_TO_TMPFS_NODE(vp);
973
974 /* Assign default values if they are unknown. */
975 KASSERT(uid != VNOVAL || gid != VNOVAL);
976 if (uid == VNOVAL)
977 uid = node->tn_uid;
978 if (gid == VNOVAL)
979 gid = node->tn_gid;
980 KASSERT(uid != VNOVAL && gid != VNOVAL);
981
982 /* Disallow this operation if the file system is mounted read-only. */
983 if (vp->v_mount->mnt_flag & MNT_RDONLY)
984 return EROFS;
985
986 /* Immutable or append-only files cannot be modified, either. */
987 if (node->tn_flags & (IMMUTABLE | APPEND))
988 return EPERM;
989
990 /* XXX: The following comes from UFS code, and can be found in
991 * several other file systems. Shouldn't this be centralized
992 * somewhere? */
993 if ((cred->cr_uid != node->tn_uid || uid != node->tn_uid ||
994 (gid != node->tn_gid && !(cred->cr_gid == node->tn_gid ||
995 groupmember(gid, cred)))) &&
996 ((error = suser(cred, &p->p_acflag)) != 0))
997 return error;
998
999 node->tn_uid = uid;
1000 node->tn_gid = gid;
1001
1002 node->tn_status |= TMPFS_NODE_CHANGED;
1003 VN_KNOTE(vp, NOTE_ATTRIB);
1004
1005 KASSERT(VOP_ISLOCKED(vp));
1006
1007 return 0;
1008 }
1009
1010 /* --------------------------------------------------------------------- */
1011
1012 /* Change size of the given vnode.
1013 * Caller should execute VOP_UPDATE on vp after a successful execution.
1014 * The vnode must be locked on entry and remain locked on exit. */
1015 int
1016 tmpfs_chsize(struct vnode *vp, u_quad_t size, struct ucred *cred,
1017 struct proc *p)
1018 {
1019 int error;
1020 struct tmpfs_node *node;
1021
1022 KASSERT(VOP_ISLOCKED(vp));
1023
1024 node = VP_TO_TMPFS_NODE(vp);
1025
1026 /* Decide whether this is a valid operation based on the file type. */
1027 error = 0;
1028 switch (vp->v_type) {
1029 case VDIR:
1030 return EISDIR;
1031
1032 case VLNK:
1033 /* FALLTHROUGH */
1034 case VREG:
1035 if (vp->v_mount->mnt_flag & MNT_RDONLY)
1036 return EROFS;
1037 break;
1038
1039 case VBLK:
1040 /* FALLTHROUGH */
1041 case VCHR:
1042 /* FALLTHROUGH */
1043 case VSOCK:
1044 /* FALLTHROUGH */
1045 case VFIFO:
1046 /* Allow modifications of special files even if in the file
1047 * system is mounted read-only (we are not modifying the
1048 * files themselves, but the objects they represent). */
1049 break;
1050
1051 default:
1052 /* Anything else is unsupported. */
1053 return EINVAL;
1054 }
1055
1056 /* Immutable or append-only files cannot be modified, either. */
1057 if (node->tn_flags & (IMMUTABLE | APPEND))
1058 return EPERM;
1059
1060 error = VOP_TRUNCATE(vp, size, 0, cred, p);
1061 /* tmpfs_truncate will raise the NOTE_EXTEND and NOTE_ATTRIB kevents
1062 * for us, as will update tn_status; no need to do that here. */
1063
1064 KASSERT(VOP_ISLOCKED(vp));
1065
1066 return error;
1067 }
1068
1069 /* --------------------------------------------------------------------- */
1070
1071 /* Change access and modification times of the given vnode.
1072 * Caller should execute VOP_UPDATE on vp after a successful execution.
1073 * The vnode must be locked on entry and remain locked on exit. */
1074 int
1075 tmpfs_chtimes(struct vnode *vp, struct timespec *atime, struct timespec *mtime,
1076 int vaflags, struct ucred *cred, struct proc *p)
1077 {
1078 int error;
1079 struct tmpfs_node *node;
1080
1081 KASSERT(VOP_ISLOCKED(vp));
1082
1083 node = VP_TO_TMPFS_NODE(vp);
1084
1085 /* Disallow this operation if the file system is mounted read-only. */
1086 if (vp->v_mount->mnt_flag & MNT_RDONLY)
1087 return EROFS;
1088
1089 /* Immutable or append-only files cannot be modified, either. */
1090 if (node->tn_flags & (IMMUTABLE | APPEND))
1091 return EPERM;
1092
1093 /* XXX: The following comes from UFS code, and can be found in
1094 * several other file systems. Shouldn't this be centralized
1095 * somewhere? */
1096 if (cred->cr_uid != node->tn_uid &&
1097 (error = suser(cred, &p->p_acflag)) &&
1098 ((vaflags & VA_UTIMES_NULL) == 0 ||
1099 (error = VOP_ACCESS(vp, VWRITE, cred, p))))
1100 return error;
1101
1102 if (atime->tv_sec != VNOVAL && atime->tv_nsec != VNOVAL)
1103 node->tn_status |= TMPFS_NODE_ACCESSED;
1104
1105 if (mtime->tv_sec != VNOVAL && mtime->tv_nsec != VNOVAL)
1106 node->tn_status |= TMPFS_NODE_MODIFIED;
1107
1108 error = VOP_UPDATE(vp, atime, mtime, 0);
1109
1110 KASSERT(VOP_ISLOCKED(vp));
1111
1112 return error;
1113 }
1114