tmpfs_subr.c revision 1.2 1 /* $NetBSD: tmpfs_subr.c,v 1.2 2005/09/10 22:28:57 jmmv Exp $ */
2
3 /*
4 * Copyright (c) 2005 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Julio M. Merino Vidal.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 * must display the following acknowledgement:
20 * This product includes software developed by the NetBSD
21 * Foundation, Inc. and its contributors.
22 * 4. Neither the name of The NetBSD Foundation nor the names of its
23 * contributors may be used to endorse or promote products derived
24 * from this software without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
27 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 * POSSIBILITY OF SUCH DAMAGE.
37 */
38
39 /*
40 * Efficient memory file system supporting functions.
41 */
42
43 #include <sys/cdefs.h>
44 __KERNEL_RCSID(0, "$NetBSD: tmpfs_subr.c,v 1.2 2005/09/10 22:28:57 jmmv Exp $");
45
46 #include <sys/param.h>
47 #include <sys/dirent.h>
48 #include <sys/event.h>
49 #include <sys/malloc.h>
50 #include <sys/mount.h>
51 #include <sys/namei.h>
52 #include <sys/time.h>
53 #include <sys/stat.h>
54 #include <sys/systm.h>
55 #include <sys/swap.h>
56 #include <sys/vnode.h>
57
58 #include <uvm/uvm.h>
59
60 #include <miscfs/specfs/specdev.h>
61 #include <fs/tmpfs/tmpfs.h>
62 #include <fs/tmpfs/tmpfs_fifoops.h>
63 #include <fs/tmpfs/tmpfs_specops.h>
64 #include <fs/tmpfs/tmpfs_vnops.h>
65
66 /* --------------------------------------------------------------------- */
67
68 int
69 tmpfs_alloc_node(struct tmpfs_mount *tmp, enum vtype type,
70 uid_t uid, gid_t gid, mode_t mode, struct tmpfs_node *parent,
71 char *target, dev_t rdev, struct proc *p, struct tmpfs_node **node)
72 {
73 struct timeval tv;
74 struct tmpfs_node *nnode;
75
76 /* If the root directory of the 'tmp' file system is not yet
77 * allocated, this must be the request to do it. */
78 KASSERT(IMPLIES(tmp->tm_root == NULL, parent == NULL && type == VDIR));
79
80 KASSERT(IFF(type == VLNK, target != NULL));
81 KASSERT(IFF(type == VBLK || type == VCHR, rdev != VNOVAL));
82
83 KASSERT(uid != VNOVAL && gid != VNOVAL && mode != VNOVAL);
84
85 nnode = NULL;
86 if (LIST_EMPTY(&tmp->tm_nodes_avail)) {
87 KASSERT(tmp->tm_nodes_last <= tmp->tm_nodes_max);
88 if (tmp->tm_nodes_last == tmp->tm_nodes_max)
89 return ENOSPC;
90
91 nnode =
92 (struct tmpfs_node *)TMPFS_POOL_GET(&tmp->tm_node_pool, 0);
93 if (nnode == NULL)
94 return ENOSPC;
95 nnode->tn_id = tmp->tm_nodes_last++;
96 nnode->tn_gen = 0;
97 } else {
98 nnode = LIST_FIRST(&tmp->tm_nodes_avail);
99 LIST_REMOVE(nnode, tn_entries);
100 nnode->tn_gen++;
101 }
102 KASSERT(nnode != NULL);
103 LIST_INSERT_HEAD(&tmp->tm_nodes_used, nnode, tn_entries);
104
105 /* Generic initialization. */
106 nnode->tn_type = type;
107 nnode->tn_size = 0;
108 nnode->tn_status = 0;
109 nnode->tn_flags = 0;
110 nnode->tn_links = 0;
111 microtime(&tv);
112 TIMEVAL_TO_TIMESPEC(&tv, &nnode->tn_atime);
113 nnode->tn_birthtime = nnode->tn_ctime = nnode->tn_mtime =
114 nnode->tn_atime;
115 nnode->tn_uid = uid;
116 nnode->tn_gid = gid;
117 nnode->tn_mode = mode;
118 nnode->tn_vnode = NULL;
119
120 /* Type-specific initialization. */
121 switch (nnode->tn_type) {
122 case VBLK:
123 case VCHR:
124 nnode->tn_rdev = rdev;
125 break;
126
127 case VDIR:
128 TAILQ_INIT(&nnode->tn_dir);
129 nnode->tn_parent = (parent == NULL) ? nnode : parent;
130 nnode->tn_readdir_lastn = 0;
131 nnode->tn_readdir_lastp = NULL;
132 nnode->tn_links++;
133 nnode->tn_parent->tn_links++;
134 break;
135
136 case VFIFO:
137 /* FALLTHROUGH */
138 case VSOCK:
139 break;
140
141 case VLNK:
142 KASSERT(strlen(target) < MAXPATHLEN);
143 nnode->tn_link = tmpfs_str_pool_get(&tmp->tm_str_pool,
144 strlen(target), 0);
145 if (nnode->tn_link == NULL) {
146 nnode->tn_type = VNON;
147 tmpfs_free_node(tmp, nnode);
148 return ENOSPC;
149 }
150 strcpy(nnode->tn_link, target);
151 nnode->tn_size = strlen(target);
152 break;
153
154 case VREG:
155 nnode->tn_aobj = NULL;
156 nnode->tn_aobj_pages = 0;
157 nnode->tn_va = 0;
158 break;
159
160 default:
161 KASSERT(0);
162 }
163
164 *node = nnode;
165 return 0;
166 }
167
168 /* --------------------------------------------------------------------- */
169
170 void
171 tmpfs_free_node(struct tmpfs_mount *tmp, struct tmpfs_node *node)
172 {
173 ino_t id;
174 unsigned long gen;
175 size_t pages;
176
177 switch (node->tn_type) {
178 case VNON:
179 /* Do not do anything. VNON is provided to let the
180 * allocation routine clean itself easily by avoiding
181 * duplicating code in it. */
182 /* FALLTHROUGH */
183 case VBLK:
184 /* FALLTHROUGH */
185 case VCHR:
186 /* FALLTHROUGH */
187 case VDIR:
188 /* FALLTHROUGH */
189 case VFIFO:
190 /* FALLTHROUGH */
191 case VSOCK:
192 pages = 0;
193 break;
194
195 case VLNK:
196 tmpfs_str_pool_put(&tmp->tm_str_pool, node->tn_link,
197 strlen(node->tn_link));
198 pages = 0;
199 break;
200
201 case VREG:
202 if (node->tn_aobj != NULL)
203 uao_detach(node->tn_aobj);
204 pages = node->tn_aobj_pages;
205 break;
206
207 default:
208 KASSERT(0);
209 pages = 0; /* Shut up gcc when !DIAGNOSTIC. */
210 break;
211 }
212
213 tmp->tm_pages_used -= pages;
214
215 LIST_REMOVE(node, tn_entries);
216 id = node->tn_id;
217 gen = node->tn_gen;
218 memset(node, 0, sizeof(struct tmpfs_node));
219 node->tn_id = id;
220 node->tn_type = VNON;
221 node->tn_gen = gen;
222 LIST_INSERT_HEAD(&tmp->tm_nodes_avail, node, tn_entries);
223 }
224
225 /* --------------------------------------------------------------------- */
226
227 int
228 tmpfs_alloc_dirent(struct tmpfs_mount *tmp, struct tmpfs_node *node,
229 const char *name, uint16_t len, struct tmpfs_dirent **de)
230 {
231 struct tmpfs_dirent *nde;
232
233 nde = (struct tmpfs_dirent *)TMPFS_POOL_GET(&tmp->tm_dirent_pool, 0);
234 if (nde == NULL)
235 return ENOSPC;
236
237 nde->td_name = tmpfs_str_pool_get(&tmp->tm_str_pool, len, 0);
238 if (nde->td_name == NULL) {
239 TMPFS_POOL_PUT(&tmp->tm_dirent_pool, nde);
240 return ENOSPC;
241 }
242 nde->td_namelen = len;
243 memcpy(nde->td_name, name, len);
244 nde->td_node = node;
245
246 node->tn_links++;
247 *de = nde;
248
249 return 0;
250 }
251
252 /* --------------------------------------------------------------------- */
253
254 void
255 tmpfs_free_dirent(struct tmpfs_mount *tmp, struct tmpfs_dirent *de,
256 boolean_t node_exists)
257 {
258 if (node_exists) {
259 struct tmpfs_node *node;
260
261 node = de->td_node;
262
263 KASSERT(node->tn_links > 0);
264 node->tn_links--;
265 }
266
267 tmpfs_str_pool_put(&tmp->tm_str_pool, de->td_name, de->td_namelen);
268 TMPFS_POOL_PUT(&tmp->tm_dirent_pool, de);
269 }
270
271 /* --------------------------------------------------------------------- */
272
273 int
274 tmpfs_alloc_vp(struct mount *mp, struct tmpfs_node *node, struct vnode **vpp)
275 {
276 int error;
277 struct vnode *nvp;
278 struct vnode *vp;
279
280 vp = NULL;
281
282 if (node->tn_vnode != NULL) {
283 vp = node->tn_vnode;
284 vget(vp, LK_EXCLUSIVE | LK_RETRY);
285 error = 0;
286 goto out;
287 }
288
289 /* Get a new vnode and associate it with our node. */
290 error = getnewvnode(VT_TMPFS, mp, tmpfs_vnodeop_p, &vp);
291 if (error != 0)
292 goto out;
293 KASSERT(vp != NULL);
294
295 error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
296 if (error != 0) {
297 vp->v_data = NULL;
298 ungetnewvnode(vp);
299 vp = NULL;
300 goto out;
301 }
302
303 vp->v_data = node;
304 vp->v_type = node->tn_type;
305
306 /* Type-specific initialization. */
307 switch (node->tn_type) {
308 case VBLK:
309 /* FALLTHROUGH */
310 case VCHR:
311 vp->v_op = tmpfs_specop_p;
312 nvp = checkalias(vp, node->tn_rdev, mp);
313 if (nvp != NULL) {
314 /* Discard unneeded vnode, but save its inode. */
315 nvp->v_data = vp->v_data;
316 vp->v_data = NULL;
317
318 /* XXX spec_vnodeops has no locking, so we have to
319 * do it explicitly. */
320 VOP_UNLOCK(vp, 0);
321 vp->v_op = spec_vnodeop_p;
322 vp->v_flag &= ~VLOCKSWORK;
323 vrele(vp);
324 vgone(vp);
325
326 /* Reinitialize aliased node. */
327 vp = nvp;
328 error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
329 if (error != 0) {
330 vp->v_data = NULL;
331 vp = NULL;
332 goto out;
333 }
334 }
335 break;
336
337 case VDIR:
338 vp->v_flag = node->tn_parent == node ? VROOT : 0;
339 break;
340
341 case VFIFO:
342 vp->v_op = tmpfs_fifoop_p;
343 break;
344
345 case VLNK:
346 /* FALLTHROUGH */
347 case VREG:
348 /* FALLTHROUGH */
349 case VSOCK:
350 break;
351
352 default:
353 KASSERT(0);
354 }
355
356 uvm_vnp_setsize(vp, node->tn_size);
357
358 error = 0;
359
360 out:
361 *vpp = node->tn_vnode = vp;
362
363 KASSERT(IFF(error == 0, *vpp != NULL && VOP_ISLOCKED(*vpp)));
364 KASSERT(*vpp == node->tn_vnode);
365
366 return error;
367 }
368
369 /* --------------------------------------------------------------------- */
370
371 void
372 tmpfs_free_vp(struct vnode *vp)
373 {
374 struct tmpfs_node *node;
375
376 node = VP_TO_TMPFS_NODE(vp);
377
378 node->tn_vnode = NULL;
379 vp->v_data = NULL;
380 }
381
382 /* --------------------------------------------------------------------- */
383
384 /* Allocates a new file of type 'type' and adds it to the parent directory
385 * 'dvp'; this addition is done using the component name given in 'cnp'.
386 * The ownership of the new file is automatically assigned based on the
387 * credentials of the caller (through 'cnp'), the group is set based on
388 * the parent directory and the mode is determined from the 'vap' argument.
389 * If successful, *vpp holds a vnode to the newly created file and zero
390 * is returned. Otherwise *vpp is NULL and the function returns an
391 * appropriate error code .*/
392 int
393 tmpfs_alloc_file(struct vnode *dvp, struct vnode **vpp, struct vattr *vap,
394 struct componentname *cnp, char *target)
395 {
396 int error;
397 struct tmpfs_dirent *de;
398 struct tmpfs_mount *tmp;
399 struct tmpfs_node *dnode;
400 struct tmpfs_node *node;
401 struct tmpfs_node *parent;
402
403 KASSERT(VOP_ISLOCKED(dvp));
404 KASSERT(cnp->cn_flags & HASBUF);
405
406 tmp = VFS_TO_TMPFS(dvp->v_mount);
407 dnode = VP_TO_TMPFS_DIR(dvp);
408 *vpp = NULL;
409
410 /* If the entry we are creating is a directory, we cannot overflow
411 * the number of links of its parent, because it will get a new
412 * link. */
413 if (vap->va_type == VDIR) {
414 /* Ensure that we do not overflow the maximum number of links
415 * imposed by the system. */
416 KASSERT(dnode->tn_links <= LINK_MAX);
417 if (dnode->tn_links == LINK_MAX) {
418 error = EMLINK;
419 goto out;
420 }
421
422 parent = dnode;
423 } else
424 parent = NULL;
425
426 /* Allocate a node that represents the new file. */
427 error = tmpfs_alloc_node(tmp, vap->va_type, cnp->cn_cred->cr_uid,
428 dnode->tn_gid, vap->va_mode, parent, target, vap->va_rdev,
429 cnp->cn_proc, &node);
430 if (error != 0)
431 goto out;
432
433 /* Allocate a directory entry that points to the new file. */
434 error = tmpfs_alloc_dirent(tmp, node, cnp->cn_nameptr, cnp->cn_namelen,
435 &de);
436 if (error != 0) {
437 tmpfs_free_node(tmp, node);
438 goto out;
439 }
440
441 /* Allocate a vnode for the new file. */
442 error = tmpfs_alloc_vp(dvp->v_mount, node, vpp);
443 if (error != 0) {
444 tmpfs_free_dirent(tmp, de, TRUE);
445 tmpfs_free_node(tmp, node);
446 goto out;
447 }
448
449 /* Now that all required items are allocated, we can proceed to
450 * insert the new node into the directory, an operation that
451 * cannot fail. */
452 tmpfs_dir_attach(dvp, de);
453 VN_KNOTE(dvp, NOTE_WRITE);
454
455 out:
456 if (error != 0 || !(cnp->cn_flags & SAVESTART))
457 PNBUF_PUT(cnp->cn_pnbuf);
458 vput(dvp);
459
460 KASSERT(!VOP_ISLOCKED(dvp));
461 KASSERT(IFF(error == 0, *vpp != NULL));
462
463 return error;
464 }
465
466 /* --------------------------------------------------------------------- */
467
468 void
469 tmpfs_dir_attach(struct vnode *vp, struct tmpfs_dirent *de)
470 {
471 struct tmpfs_node *dnode;
472
473 dnode = VP_TO_TMPFS_DIR(vp);
474
475 TAILQ_INSERT_TAIL(&dnode->tn_dir, de, td_entries);
476 dnode->tn_size += sizeof(struct tmpfs_dirent);
477 dnode->tn_status |= TMPFS_NODE_ACCESSED | TMPFS_NODE_CHANGED | \
478 TMPFS_NODE_MODIFIED;
479 uvm_vnp_setsize(vp, dnode->tn_size);
480 }
481
482 /* --------------------------------------------------------------------- */
483
484 void
485 tmpfs_dir_detach(struct vnode *vp, struct tmpfs_dirent *de)
486 {
487 struct tmpfs_node *dnode;
488
489 dnode = VP_TO_TMPFS_DIR(vp);
490
491 TAILQ_REMOVE(&dnode->tn_dir, de, td_entries);
492 dnode->tn_size -= sizeof(struct tmpfs_dirent);
493 dnode->tn_status |= TMPFS_NODE_ACCESSED | TMPFS_NODE_CHANGED | \
494 TMPFS_NODE_MODIFIED;
495 uvm_vnp_setsize(vp, dnode->tn_size);
496 }
497
498 /* --------------------------------------------------------------------- */
499
500 struct tmpfs_dirent *
501 tmpfs_dir_lookup(struct tmpfs_node *node, struct componentname *cnp)
502 {
503 boolean_t found;
504 struct tmpfs_dirent *de;
505
506 KASSERT(IMPLIES(cnp->cn_namelen == 1, cnp->cn_nameptr[0] != '.'));
507 KASSERT(IMPLIES(cnp->cn_namelen == 2, !(cnp->cn_nameptr[0] == '.' &&
508 cnp->cn_nameptr[1] == '.')));
509 TMPFS_VALIDATE_DIR(node);
510
511 node->tn_status |= TMPFS_NODE_ACCESSED;
512
513 found = 0;
514 TAILQ_FOREACH(de, &node->tn_dir, td_entries) {
515 KASSERT(cnp->cn_namelen < 0xffff);
516 if (de->td_namelen == (uint16_t)cnp->cn_namelen &&
517 memcmp(de->td_name, cnp->cn_nameptr, de->td_namelen) == 0) {
518 found = 1;
519 break;
520 }
521 }
522
523 return found ? de : NULL;
524 }
525
526 /* --------------------------------------------------------------------- */
527
528 /* Helper function for tmpfs_readdir. Creates a '.' entry for the given
529 * directory and returns it in the uio space. The function returns 0
530 * on success, -1 if there was not enough space in the uio structure to
531 * hold the directory entry or an appropriate error code if another
532 * error happens. */
533 int
534 tmpfs_dir_getdotdent(struct tmpfs_node *node, struct uio *uio)
535 {
536 int error;
537 struct dirent dent;
538
539 TMPFS_VALIDATE_DIR(node);
540 KASSERT(uio->uio_offset == 0);
541
542 dent.d_fileno = node->tn_id;
543 dent.d_type = DT_DIR;
544 dent.d_namlen = 1;
545 dent.d_name[0] = '.';
546 dent.d_name[1] = '\0';
547 dent.d_reclen = _DIRENT_SIZE(&dent);
548
549 if (dent.d_reclen > uio->uio_resid)
550 error = -1;
551 else {
552 error = uiomove(&dent, dent.d_reclen, uio);
553 if (error == 0)
554 uio->uio_offset += sizeof(struct tmpfs_dirent) - \
555 dent.d_reclen;
556 }
557
558 node->tn_status |= TMPFS_NODE_ACCESSED;
559
560 return error;
561 }
562
563 /* --------------------------------------------------------------------- */
564
565 /* Helper function for tmpfs_readdir. Creates a '..' entry for the given
566 * directory and returns it in the uio space. The function returns 0
567 * on success, -1 if there was not enough space in the uio structure to
568 * hold the directory entry or an appropriate error code if another
569 * error happens. */
570 int
571 tmpfs_dir_getdotdotdent(struct tmpfs_node *node, struct uio *uio)
572 {
573 int error;
574 struct dirent dent;
575
576 TMPFS_VALIDATE_DIR(node);
577 KASSERT(uio->uio_offset == sizeof(struct tmpfs_dirent));
578
579 dent.d_fileno = node->tn_id;
580 dent.d_type = DT_DIR;
581 dent.d_namlen = 2;
582 dent.d_name[0] = '.';
583 dent.d_name[1] = '.';
584 dent.d_name[2] = '\0';
585 dent.d_reclen = _DIRENT_SIZE(&dent);
586
587 if (dent.d_reclen > uio->uio_resid)
588 error = -1;
589 else {
590 error = uiomove(&dent, dent.d_reclen, uio);
591 if (error == 0)
592 uio->uio_offset += sizeof(struct tmpfs_dirent) - \
593 dent.d_reclen;
594 }
595
596 node->tn_status |= TMPFS_NODE_ACCESSED;
597
598 return error;
599 }
600
601 /* --------------------------------------------------------------------- */
602
603 /* Helper function for tmpfs_readdir. Returns as much directory entries
604 * as can fit in the uio space. The read starts at uio->uio_offset.
605 * The function returns 0 on success, -1 if there was not enough space
606 * in the uio structure to hold the directory entry or an appropriate
607 * error code if another error happens. */
608 int
609 tmpfs_dir_getdents(struct tmpfs_node *node, struct uio *uio)
610 {
611 int error;
612 long cnt, startcnt;
613 struct tmpfs_dirent *de;
614
615 TMPFS_VALIDATE_DIR(node);
616 KASSERT(uio->uio_offset % sizeof(struct tmpfs_dirent) == 0);
617 KASSERT(uio->uio_offset >= sizeof(struct tmpfs_dirent) * 2);
618 KASSERT(uio->uio_offset < node->tn_size +
619 sizeof(struct tmpfs_dirent) * 2);
620
621 /* Locate the first directory entry we have to return. We have cached
622 * the last readdir in the node, so use those values if appropriate.
623 * Otherwise do a linear scan to find the requested entry. */
624 de = NULL;
625 startcnt = uio->uio_offset / sizeof(struct tmpfs_dirent) - 2;
626 if (startcnt == node->tn_readdir_lastn && \
627 node->tn_readdir_lastp != NULL) {
628 cnt = node->tn_readdir_lastn;
629 de = node->tn_readdir_lastp;
630 } else {
631 cnt = 0;
632 de = TAILQ_FIRST(&node->tn_dir);
633 while (cnt < startcnt) {
634 cnt++;
635 de = TAILQ_NEXT(de, td_entries);
636
637 /* Ensure that if we have not found the desired item,
638 * there are more entries in the directory to continue
639 * the search. */
640 KASSERT(IMPLIES(de == TAILQ_LAST(&node->tn_dir,
641 tmpfs_dir), cnt == startcnt));
642 }
643 }
644 KASSERT(cnt == startcnt);
645 KASSERT(de != NULL);
646
647 /* Read as much entries as possible; i.e., until we reach the end of
648 * the directory or we exhaust uio space. */
649 do {
650 struct dirent d;
651
652 /* Create a dirent structure representing the current
653 * tmpfs_node and fill it. */
654 d.d_fileno = de->td_node->tn_id;
655 switch (de->td_node->tn_type) {
656 case VBLK:
657 d.d_type = DT_BLK;
658 break;
659
660 case VCHR:
661 d.d_type = DT_CHR;
662 break;
663
664 case VDIR:
665 d.d_type = DT_DIR;
666 break;
667
668 case VFIFO:
669 d.d_type = DT_FIFO;
670 break;
671
672 case VLNK:
673 d.d_type = DT_LNK;
674 break;
675
676 case VREG:
677 d.d_type = DT_REG;
678 break;
679
680 case VSOCK:
681 d.d_type = DT_SOCK;
682 break;
683
684 default:
685 KASSERT(0);
686 }
687 d.d_namlen = de->td_namelen;
688 KASSERT(de->td_namelen < sizeof(d.d_name));
689 (void)memcpy(d.d_name, de->td_name, de->td_namelen);
690 d.d_name[de->td_namelen] = '\0';
691 d.d_reclen = _DIRENT_SIZE(&d);
692
693 /* Stop reading if the directory entry we are treating is
694 * bigger than the amount of data that can be returned. */
695 if (d.d_reclen > uio->uio_resid) {
696 error = -1;
697 break;
698 }
699
700 /* Copy the new dirent structure into the output buffer and
701 * advance pointers. */
702 error = uiomove(&d, d.d_reclen, uio);
703
704 cnt++;
705 de = TAILQ_NEXT(de, td_entries);
706 } while (error == 0 && uio->uio_resid > 0 && de != NULL);
707
708 /* Update the offset in the uio structure to be correctly aligned
709 * with tmpfs_dirent structures. Otherwise, the offset is the
710 * size of the returned dirent structures, which is useless for us. */
711 uio->uio_offset = (cnt + 2) * sizeof(struct tmpfs_dirent);
712
713 /* Cache the current status. */
714 if (de == NULL) {
715 KASSERT(cnt == node->tn_size / sizeof(struct tmpfs_dirent));
716 node->tn_readdir_lastn = 0;
717 node->tn_readdir_lastp = NULL;
718 } else {
719 node->tn_readdir_lastn = cnt;
720 node->tn_readdir_lastp = de;
721 }
722
723 node->tn_status |= TMPFS_NODE_ACCESSED;
724
725 return error;
726 }
727
728 /* --------------------------------------------------------------------- */
729
730 int
731 tmpfs_reg_resize(struct vnode *vp, off_t newsize)
732 {
733 int error;
734 size_t newpages, oldpages;
735 struct tmpfs_mount *tmp;
736 struct tmpfs_node *node;
737
738 KASSERT(vp->v_type == VREG);
739 KASSERT(newsize >= 0);
740 KASSERT(newsize != vp->v_size);
741
742 node = VP_TO_TMPFS_NODE(vp);
743 tmp = VFS_TO_TMPFS(vp->v_mount);
744
745 /* Convert the old and new sizes to the number of pages needed to
746 * store them. It may happen that we do not need to do anything
747 * because the last allocated page can accommodate the change on
748 * its own. */
749 oldpages = round_page(node->tn_size) / PAGE_SIZE;
750 KASSERT(oldpages == node->tn_aobj_pages);
751 newpages = round_page(newsize) / PAGE_SIZE;
752
753 if (newpages > oldpages &&
754 newpages - oldpages > TMPFS_PAGES_AVAIL(tmp)) {
755 error = ENOSPC;
756 goto out;
757 }
758
759 if (newpages == 0) {
760 uao_detach(node->tn_aobj);
761 node->tn_aobj = NULL;
762 node->tn_aobj_pages = 0;
763 node->tn_va = 0;
764 } else if (newpages > oldpages) {
765 vaddr_t va;
766 struct uvm_object *aobj;
767
768 aobj = uao_create(newpages * PAGE_SIZE, 0);
769 va = vm_map_min(kernel_map);
770 error = uvm_map(kernel_map, &va, newpages * PAGE_SIZE,
771 aobj, 0, 0,
772 UVM_MAPFLAG(UVM_PROT_RW, UVM_PROT_RW, UVM_INH_NONE,
773 UVM_ADV_RANDOM, 0));
774 if (error != 0) {
775 uao_detach(aobj);
776 error = ENOSPC;
777 goto out;
778 }
779
780 /* XXX This is really expensive. Is it possible to do a
781 * map entry passing? */
782 if (node->tn_size > 0) {
783 KASSERT(node->tn_aobj != NULL);
784 (void)memcpy((void *)va, (void *)node->tn_va,
785 node->tn_size);
786 uao_detach(node->tn_aobj);
787 }
788
789 node->tn_aobj = aobj;
790 node->tn_aobj_pages = newpages;
791 node->tn_va = va;
792 } else if (newpages < oldpages) {
793 /* XXX Do we need to shrink the aobj or is the unmap enough? */
794 uvm_unmap(kernel_map, node->tn_va + (vaddr_t)newpages,
795 (vaddr_t)node->tn_aobj_pages * PAGE_SIZE);
796 node->tn_aobj_pages = newpages;
797 }
798
799 tmp->tm_pages_used += (newpages - oldpages);
800 node->tn_size = newsize;
801 uvm_vnp_setsize(vp, newsize);
802
803 error = 0;
804
805 out:
806 return error;
807 }
808
809 /* --------------------------------------------------------------------- */
810
811 /* Returns information about the number of available memory pages,
812 * including physical and virtual ones.
813 *
814 * If 'total' is TRUE, the value returned is the total amount of memory
815 * pages configured for the system (either in use or free).
816 * If it is FALSE, the value returned is the amount of free memory pages.
817 *
818 * Remember to remove TMPFS_PAGES_RESERVED from the returned value to avoid
819 * excessive memory usage.
820 *
821 * XXX: This function is used every time TMPFS_PAGES_MAX is called to gather
822 * the amount of free memory, something that happens during _each_
823 * object allocation. The time it takes to run this function so many
824 * times is not negligible, so this value should be stored as an
825 * aggregate somewhere, possibly within UVM (we cannot do it ourselves
826 * because we can't get notifications on memory usage changes). */
827 size_t
828 tmpfs_mem_info(boolean_t total)
829 {
830 int i, sec;
831 register_t retval;
832 size_t size;
833 struct swapent *sep;
834
835 sec = uvmexp.nswapdev;
836 sep = (struct swapent *)malloc(sizeof(struct swapent) * sec, M_TEMP,
837 M_WAITOK);
838 KASSERT(sep != NULL);
839 uvm_swap_stats(SWAP_STATS, sep, sec, &retval);
840 KASSERT(retval == sec);
841
842 size = 0;
843 if (total) {
844 for (i = 0; i < sec; i++)
845 size += dbtob(sep[i].se_nblks) / PAGE_SIZE;
846 } else {
847 for (i = 0; i < sec; i++)
848 size += dbtob(sep[i].se_nblks - sep[i].se_inuse) /
849 PAGE_SIZE;
850 }
851 size += uvmexp.free;
852
853 free(sep, M_TEMP);
854
855 return size;
856 }
857
858 /* --------------------------------------------------------------------- */
859
860 /* Change flags of the given vnode.
861 * Caller should execute VOP_UPDATE on vp after a successful execution.
862 * The vnode must be locked on entry and remain locked on exit. */
863 int
864 tmpfs_chflags(struct vnode *vp, int flags, struct ucred *cred, struct proc *p)
865 {
866 int error;
867 struct tmpfs_node *node;
868
869 KASSERT(VOP_ISLOCKED(vp));
870
871 node = VP_TO_TMPFS_NODE(vp);
872
873 /* Disallow this operation if the file system is mounted read-only. */
874 if (vp->v_mount->mnt_flag & MNT_RDONLY)
875 return EROFS;
876
877 /* XXX: The following comes from UFS code, and can be found in
878 * several other file systems. Shouldn't this be centralized
879 * somewhere? */
880 if (cred->cr_uid != node->tn_uid &&
881 (error = suser(cred, &p->p_acflag)))
882 return error;
883 if (cred->cr_uid == 0) {
884 /* The super-user is only allowed to change flags if the file
885 * wasn't protected before and the securelevel is zero. */
886 if ((node->tn_flags & (SF_IMMUTABLE | SF_APPEND)) &&
887 securelevel > 0)
888 return EPERM;
889 node->tn_flags = flags;
890 } else {
891 /* Regular users can change flags provided they only want to
892 * change user-specific ones, not those reserved for the
893 * super-user. */
894 if ((node->tn_flags & (SF_IMMUTABLE | SF_APPEND)) ||
895 (flags & UF_SETTABLE) != flags)
896 return EPERM;
897 if ((node->tn_flags & SF_SETTABLE) != (flags & SF_SETTABLE))
898 return EPERM;
899 node->tn_flags &= SF_SETTABLE;
900 node->tn_flags |= (flags & UF_SETTABLE);
901 }
902
903 node->tn_status |= TMPFS_NODE_CHANGED;
904 VN_KNOTE(vp, NOTE_ATTRIB);
905
906 KASSERT(VOP_ISLOCKED(vp));
907
908 return 0;
909 }
910
911 /* --------------------------------------------------------------------- */
912
913 /* Change access mode on the given vnode.
914 * Caller should execute VOP_UPDATE on vp after a successful execution.
915 * The vnode must be locked on entry and remain locked on exit. */
916 int
917 tmpfs_chmod(struct vnode *vp, mode_t mode, struct ucred *cred, struct proc *p)
918 {
919 int error;
920 struct tmpfs_node *node;
921
922 KASSERT(VOP_ISLOCKED(vp));
923
924 node = VP_TO_TMPFS_NODE(vp);
925
926 /* Disallow this operation if the file system is mounted read-only. */
927 if (vp->v_mount->mnt_flag & MNT_RDONLY)
928 return EROFS;
929
930 /* Immutable or append-only files cannot be modified, either. */
931 if (node->tn_flags & (IMMUTABLE | APPEND))
932 return EPERM;
933
934 /* XXX: The following comes from UFS code, and can be found in
935 * several other file systems. Shouldn't this be centralized
936 * somewhere? */
937 if (cred->cr_uid != node->tn_uid &&
938 (error = suser(cred, &p->p_acflag)))
939 return error;
940 if (cred->cr_uid != 0) {
941 if (vp->v_type != VDIR && (mode & S_ISTXT))
942 return EFTYPE;
943
944 if (!groupmember(node->tn_gid, cred) && (mode & S_ISGID))
945 return EPERM;
946 }
947
948 node->tn_mode = (mode & ALLPERMS);
949
950 node->tn_status |= TMPFS_NODE_CHANGED;
951 VN_KNOTE(vp, NOTE_ATTRIB);
952
953 KASSERT(VOP_ISLOCKED(vp));
954
955 return 0;
956 }
957
958 /* --------------------------------------------------------------------- */
959
960 /* Change ownership of the given vnode. At least one of uid or gid must
961 * be different than VNOVAL. If one is set to that value, the attribute
962 * is unchanged.
963 * Caller should execute VOP_UPDATE on vp after a successful execution.
964 * The vnode must be locked on entry and remain locked on exit. */
965 int
966 tmpfs_chown(struct vnode *vp, uid_t uid, gid_t gid, struct ucred *cred,
967 struct proc *p)
968 {
969 int error;
970 struct tmpfs_node *node;
971
972 KASSERT(VOP_ISLOCKED(vp));
973
974 node = VP_TO_TMPFS_NODE(vp);
975
976 /* Assign default values if they are unknown. */
977 KASSERT(uid != VNOVAL || gid != VNOVAL);
978 if (uid == VNOVAL)
979 uid = node->tn_uid;
980 if (gid == VNOVAL)
981 gid = node->tn_gid;
982 KASSERT(uid != VNOVAL && gid != VNOVAL);
983
984 /* Disallow this operation if the file system is mounted read-only. */
985 if (vp->v_mount->mnt_flag & MNT_RDONLY)
986 return EROFS;
987
988 /* Immutable or append-only files cannot be modified, either. */
989 if (node->tn_flags & (IMMUTABLE | APPEND))
990 return EPERM;
991
992 /* XXX: The following comes from UFS code, and can be found in
993 * several other file systems. Shouldn't this be centralized
994 * somewhere? */
995 if ((cred->cr_uid != node->tn_uid || uid != node->tn_uid ||
996 (gid != node->tn_gid && !(cred->cr_gid == node->tn_gid ||
997 groupmember(gid, cred)))) &&
998 ((error = suser(cred, &p->p_acflag)) != 0))
999 return error;
1000
1001 node->tn_uid = uid;
1002 node->tn_gid = gid;
1003
1004 node->tn_status |= TMPFS_NODE_CHANGED;
1005 VN_KNOTE(vp, NOTE_ATTRIB);
1006
1007 KASSERT(VOP_ISLOCKED(vp));
1008
1009 return 0;
1010 }
1011
1012 /* --------------------------------------------------------------------- */
1013
1014 /* Change size of the given vnode.
1015 * Caller should execute VOP_UPDATE on vp after a successful execution.
1016 * The vnode must be locked on entry and remain locked on exit. */
1017 int
1018 tmpfs_chsize(struct vnode *vp, u_quad_t size, struct ucred *cred,
1019 struct proc *p)
1020 {
1021 int error;
1022 struct tmpfs_node *node;
1023
1024 KASSERT(VOP_ISLOCKED(vp));
1025
1026 node = VP_TO_TMPFS_NODE(vp);
1027
1028 /* Decide whether this is a valid operation based on the file type. */
1029 error = 0;
1030 switch (vp->v_type) {
1031 case VDIR:
1032 return EISDIR;
1033
1034 case VLNK:
1035 /* FALLTHROUGH */
1036 case VREG:
1037 if (vp->v_mount->mnt_flag & MNT_RDONLY)
1038 return EROFS;
1039 break;
1040
1041 case VBLK:
1042 /* FALLTHROUGH */
1043 case VCHR:
1044 /* FALLTHROUGH */
1045 case VSOCK:
1046 /* FALLTHROUGH */
1047 case VFIFO:
1048 /* Allow modifications of special files even if in the file
1049 * system is mounted read-only (we are not modifying the
1050 * files themselves, but the objects they represent). */
1051 break;
1052
1053 default:
1054 /* Anything else is unsupported. */
1055 return EINVAL;
1056 }
1057
1058 /* Immutable or append-only files cannot be modified, either. */
1059 if (node->tn_flags & (IMMUTABLE | APPEND))
1060 return EPERM;
1061
1062 error = VOP_TRUNCATE(vp, size, 0, cred, p);
1063 /* tmpfs_truncate will raise the NOTE_EXTEND and NOTE_ATTRIB kevents
1064 * for us, as will update tn_status; no need to do that here. */
1065
1066 KASSERT(VOP_ISLOCKED(vp));
1067
1068 return error;
1069 }
1070
1071 /* --------------------------------------------------------------------- */
1072
1073 /* Change access and modification times of the given vnode.
1074 * Caller should execute VOP_UPDATE on vp after a successful execution.
1075 * The vnode must be locked on entry and remain locked on exit. */
1076 int
1077 tmpfs_chtimes(struct vnode *vp, struct timespec *atime, struct timespec *mtime,
1078 int vaflags, struct ucred *cred, struct proc *p)
1079 {
1080 int error;
1081 struct tmpfs_node *node;
1082
1083 KASSERT(VOP_ISLOCKED(vp));
1084
1085 node = VP_TO_TMPFS_NODE(vp);
1086
1087 /* Disallow this operation if the file system is mounted read-only. */
1088 if (vp->v_mount->mnt_flag & MNT_RDONLY)
1089 return EROFS;
1090
1091 /* Immutable or append-only files cannot be modified, either. */
1092 if (node->tn_flags & (IMMUTABLE | APPEND))
1093 return EPERM;
1094
1095 /* XXX: The following comes from UFS code, and can be found in
1096 * several other file systems. Shouldn't this be centralized
1097 * somewhere? */
1098 if (cred->cr_uid != node->tn_uid &&
1099 (error = suser(cred, &p->p_acflag)) &&
1100 ((vaflags & VA_UTIMES_NULL) == 0 ||
1101 (error = VOP_ACCESS(vp, VWRITE, cred, p))))
1102 return error;
1103
1104 if (atime->tv_sec != VNOVAL && atime->tv_nsec != VNOVAL)
1105 node->tn_status |= TMPFS_NODE_ACCESSED;
1106
1107 if (mtime->tv_sec != VNOVAL && mtime->tv_nsec != VNOVAL)
1108 node->tn_status |= TMPFS_NODE_MODIFIED;
1109
1110 error = VOP_UPDATE(vp, atime, mtime, 0);
1111
1112 KASSERT(VOP_ISLOCKED(vp));
1113
1114 return error;
1115 }
1116