1 1.117 riastrad /* $NetBSD: tmpfs_subr.c,v 1.117 2023/04/29 08:15:13 riastradh Exp $ */ 2 1.1 jmmv 3 1.1 jmmv /* 4 1.112 ad * Copyright (c) 2005-2020 The NetBSD Foundation, Inc. 5 1.1 jmmv * All rights reserved. 6 1.1 jmmv * 7 1.1 jmmv * This code is derived from software contributed to The NetBSD Foundation 8 1.8 jmmv * by Julio M. Merino Vidal, developed as part of Google's Summer of Code 9 1.71 rmind * 2005 program, and by Mindaugas Rasiukevicius. 10 1.1 jmmv * 11 1.1 jmmv * Redistribution and use in source and binary forms, with or without 12 1.1 jmmv * modification, are permitted provided that the following conditions 13 1.1 jmmv * are met: 14 1.1 jmmv * 1. Redistributions of source code must retain the above copyright 15 1.1 jmmv * notice, this list of conditions and the following disclaimer. 16 1.1 jmmv * 2. Redistributions in binary form must reproduce the above copyright 17 1.1 jmmv * notice, this list of conditions and the following disclaimer in the 18 1.1 jmmv * documentation and/or other materials provided with the distribution. 19 1.1 jmmv * 20 1.1 jmmv * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 21 1.1 jmmv * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 22 1.1 jmmv * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 23 1.1 jmmv * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 24 1.1 jmmv * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 1.1 jmmv * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 1.1 jmmv * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 1.1 jmmv * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 1.1 jmmv * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 1.1 jmmv * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 1.1 jmmv * POSSIBILITY OF SUCH DAMAGE. 31 1.1 jmmv */ 32 1.1 jmmv 33 1.1 jmmv /* 34 1.71 rmind * Efficient memory file system: interfaces for inode and directory entry 35 1.71 rmind * construction, destruction and manipulation. 36 1.71 rmind * 37 1.71 rmind * Reference counting 38 1.71 rmind * 39 1.71 rmind * The link count of inode (tmpfs_node_t::tn_links) is used as a 40 1.71 rmind * reference counter. However, it has slightly different semantics. 41 1.71 rmind * 42 1.71 rmind * For directories - link count represents directory entries, which 43 1.71 rmind * refer to the directories. In other words, it represents the count 44 1.71 rmind * of sub-directories. It also takes into account the virtual '.' 45 1.71 rmind * entry (which has no real entry in the list). For files - link count 46 1.71 rmind * represents the hard links. Since only empty directories can be 47 1.71 rmind * removed - link count aligns the reference counting requirements 48 1.71 rmind * enough. Note: to check whether directory is not empty, the inode 49 1.71 rmind * size (tmpfs_node_t::tn_size) can be used. 50 1.71 rmind * 51 1.71 rmind * The inode itself, as an object, gathers its first reference when 52 1.71 rmind * directory entry is attached via tmpfs_dir_attach(9). For instance, 53 1.71 rmind * after regular tmpfs_create(), a file would have a link count of 1, 54 1.71 rmind * while directory after tmpfs_mkdir() would have 2 (due to '.'). 55 1.71 rmind * 56 1.71 rmind * Reclamation 57 1.71 rmind * 58 1.71 rmind * It should be noted that tmpfs inodes rely on a combination of vnode 59 1.71 rmind * reference counting and link counting. That is, an inode can only be 60 1.71 rmind * destroyed if its associated vnode is inactive. The destruction is 61 1.71 rmind * done on vnode reclamation i.e. tmpfs_reclaim(). It should be noted 62 1.90 rmind * that tmpfs_node_t::tn_links being 0 is a destruction criterion. 63 1.71 rmind * 64 1.71 rmind * If an inode has references within the file system (tn_links > 0) and 65 1.71 rmind * its inactive vnode gets reclaimed/recycled - then the association is 66 1.71 rmind * broken in tmpfs_reclaim(). In such case, an inode will always pass 67 1.99 hannken * tmpfs_lookup() and thus vcache_get() to associate a new vnode. 68 1.71 rmind * 69 1.71 rmind * Lock order 70 1.71 rmind * 71 1.99 hannken * vnode_t::v_vlock -> 72 1.99 hannken * vnode_t::v_interlock 73 1.1 jmmv */ 74 1.1 jmmv 75 1.1 jmmv #include <sys/cdefs.h> 76 1.117 riastrad __KERNEL_RCSID(0, "$NetBSD: tmpfs_subr.c,v 1.117 2023/04/29 08:15:13 riastradh Exp $"); 77 1.1 jmmv 78 1.1 jmmv #include <sys/param.h> 79 1.86 rmind #include <sys/cprng.h> 80 1.1 jmmv #include <sys/dirent.h> 81 1.1 jmmv #include <sys/event.h> 82 1.43 ad #include <sys/kmem.h> 83 1.1 jmmv #include <sys/mount.h> 84 1.1 jmmv #include <sys/namei.h> 85 1.1 jmmv #include <sys/time.h> 86 1.1 jmmv #include <sys/stat.h> 87 1.1 jmmv #include <sys/systm.h> 88 1.1 jmmv #include <sys/vnode.h> 89 1.20 christos #include <sys/kauth.h> 90 1.43 ad #include <sys/atomic.h> 91 1.1 jmmv 92 1.113 riastrad #include <uvm/uvm_aobj.h> 93 1.113 riastrad #include <uvm/uvm_extern.h> 94 1.113 riastrad #include <uvm/uvm_object.h> 95 1.1 jmmv 96 1.1 jmmv #include <miscfs/specfs/specdev.h> 97 1.53 elad #include <miscfs/genfs/genfs.h> 98 1.1 jmmv #include <fs/tmpfs/tmpfs.h> 99 1.1 jmmv #include <fs/tmpfs/tmpfs_fifoops.h> 100 1.1 jmmv #include <fs/tmpfs/tmpfs_specops.h> 101 1.1 jmmv #include <fs/tmpfs/tmpfs_vnops.h> 102 1.1 jmmv 103 1.83 rmind static void tmpfs_dir_putseq(tmpfs_node_t *, tmpfs_dirent_t *); 104 1.83 rmind 105 1.8 jmmv /* 106 1.99 hannken * Initialize vnode with tmpfs node. 107 1.99 hannken */ 108 1.99 hannken static void 109 1.99 hannken tmpfs_init_vnode(struct vnode *vp, tmpfs_node_t *node) 110 1.99 hannken { 111 1.106 ad krwlock_t *slock; 112 1.99 hannken 113 1.99 hannken KASSERT(node->tn_vnode == NULL); 114 1.99 hannken 115 1.99 hannken /* Share the interlock with the node. */ 116 1.99 hannken if (node->tn_type == VREG) { 117 1.99 hannken slock = node->tn_spec.tn_reg.tn_aobj->vmobjlock; 118 1.106 ad rw_obj_hold(slock); 119 1.99 hannken uvm_obj_setlock(&vp->v_uobj, slock); 120 1.99 hannken } 121 1.99 hannken 122 1.99 hannken vp->v_tag = VT_TMPFS; 123 1.99 hannken vp->v_type = node->tn_type; 124 1.99 hannken 125 1.99 hannken /* Type-specific initialization. */ 126 1.99 hannken switch (vp->v_type) { 127 1.99 hannken case VBLK: 128 1.99 hannken case VCHR: 129 1.99 hannken vp->v_op = tmpfs_specop_p; 130 1.99 hannken spec_node_init(vp, node->tn_spec.tn_dev.tn_rdev); 131 1.99 hannken break; 132 1.99 hannken case VFIFO: 133 1.99 hannken vp->v_op = tmpfs_fifoop_p; 134 1.99 hannken break; 135 1.99 hannken case VDIR: 136 1.99 hannken if (node->tn_spec.tn_dir.tn_parent == node) 137 1.99 hannken vp->v_vflag |= VV_ROOT; 138 1.99 hannken /* FALLTHROUGH */ 139 1.99 hannken case VLNK: 140 1.99 hannken case VREG: 141 1.99 hannken case VSOCK: 142 1.99 hannken vp->v_op = tmpfs_vnodeop_p; 143 1.99 hannken break; 144 1.99 hannken default: 145 1.99 hannken panic("bad node type %d", vp->v_type); 146 1.99 hannken break; 147 1.99 hannken } 148 1.99 hannken 149 1.99 hannken vp->v_data = node; 150 1.99 hannken node->tn_vnode = vp; 151 1.99 hannken uvm_vnp_setsize(vp, node->tn_size); 152 1.108 ad KASSERT(node->tn_mode != VNOVAL); 153 1.110 ad cache_enter_id(vp, node->tn_mode, node->tn_uid, node->tn_gid, true); 154 1.99 hannken } 155 1.99 hannken 156 1.99 hannken /* 157 1.99 hannken * tmpfs_loadvnode: initialise a vnode for a specified inode. 158 1.8 jmmv */ 159 1.1 jmmv int 160 1.99 hannken tmpfs_loadvnode(struct mount *mp, struct vnode *vp, 161 1.99 hannken const void *key, size_t key_len, const void **new_key) 162 1.1 jmmv { 163 1.99 hannken tmpfs_node_t *node; 164 1.99 hannken 165 1.99 hannken KASSERT(key_len == sizeof(node)); 166 1.99 hannken memcpy(&node, key, key_len); 167 1.99 hannken 168 1.99 hannken if (node->tn_links == 0) 169 1.99 hannken return ENOENT; 170 1.99 hannken 171 1.99 hannken tmpfs_init_vnode(vp, node); 172 1.99 hannken 173 1.99 hannken *new_key = &vp->v_data; 174 1.99 hannken 175 1.99 hannken return 0; 176 1.99 hannken } 177 1.1 jmmv 178 1.99 hannken /* 179 1.99 hannken * tmpfs_newvnode: allocate a new inode of a specified type and 180 1.99 hannken * attach the vonode. 181 1.99 hannken */ 182 1.99 hannken int 183 1.99 hannken tmpfs_newvnode(struct mount *mp, struct vnode *dvp, struct vnode *vp, 184 1.104 hannken struct vattr *vap, kauth_cred_t cred, void *extra, 185 1.99 hannken size_t *key_len, const void **new_key) 186 1.99 hannken { 187 1.99 hannken tmpfs_mount_t *tmp = VFS_TO_TMPFS(mp); 188 1.99 hannken tmpfs_node_t *node, *dnode; 189 1.99 hannken 190 1.99 hannken if (dvp != NULL) { 191 1.99 hannken KASSERT(VOP_ISLOCKED(dvp)); 192 1.99 hannken dnode = VP_TO_TMPFS_DIR(dvp); 193 1.99 hannken if (dnode->tn_links == 0) 194 1.99 hannken return ENOENT; 195 1.99 hannken if (vap->va_type == VDIR) { 196 1.99 hannken /* Check for maximum links limit. */ 197 1.99 hannken if (dnode->tn_links == LINK_MAX) 198 1.99 hannken return EMLINK; 199 1.99 hannken KASSERT(dnode->tn_links < LINK_MAX); 200 1.99 hannken } 201 1.99 hannken } else 202 1.99 hannken dnode = NULL; 203 1.99 hannken 204 1.99 hannken node = tmpfs_node_get(tmp); 205 1.99 hannken if (node == NULL) 206 1.43 ad return ENOSPC; 207 1.43 ad 208 1.71 rmind /* Initially, no references and no associations. */ 209 1.99 hannken node->tn_links = 0; 210 1.99 hannken node->tn_vnode = NULL; 211 1.99 hannken node->tn_holdcount = 0; 212 1.99 hannken node->tn_dirent_hint = NULL; 213 1.71 rmind 214 1.43 ad /* 215 1.43 ad * XXX Where the pool is backed by a map larger than (4GB * 216 1.99 hannken * sizeof(*node)), this may produce duplicate inode numbers 217 1.43 ad * for applications that do not understand 64-bit ino_t. 218 1.43 ad */ 219 1.99 hannken node->tn_id = (ino_t)((uintptr_t)node / sizeof(*node)); 220 1.93 hannken /* 221 1.93 hannken * Make sure the generation number is not zero. 222 1.93 hannken * tmpfs_inactive() uses generation zero to mark dead nodes. 223 1.93 hannken */ 224 1.93 hannken do { 225 1.99 hannken node->tn_gen = TMPFS_NODE_GEN_MASK & cprng_fast32(); 226 1.99 hannken } while (node->tn_gen == 0); 227 1.1 jmmv 228 1.1 jmmv /* Generic initialization. */ 229 1.100 justin KASSERT((int)vap->va_type != VNOVAL); 230 1.99 hannken node->tn_type = vap->va_type; 231 1.99 hannken node->tn_size = 0; 232 1.99 hannken node->tn_flags = 0; 233 1.99 hannken node->tn_lockf = NULL; 234 1.99 hannken 235 1.112 ad node->tn_tflags = 0; 236 1.99 hannken vfs_timestamp(&node->tn_atime); 237 1.99 hannken node->tn_birthtime = node->tn_atime; 238 1.99 hannken node->tn_ctime = node->tn_atime; 239 1.99 hannken node->tn_mtime = node->tn_atime; 240 1.112 ad mutex_init(&node->tn_timelock, MUTEX_DEFAULT, IPL_NONE); 241 1.99 hannken 242 1.99 hannken if (dvp == NULL) { 243 1.99 hannken KASSERT(vap->va_uid != VNOVAL && vap->va_gid != VNOVAL); 244 1.99 hannken node->tn_uid = vap->va_uid; 245 1.99 hannken node->tn_gid = vap->va_gid; 246 1.99 hannken vp->v_vflag |= VV_ROOT; 247 1.99 hannken } else { 248 1.99 hannken KASSERT(dnode != NULL); 249 1.99 hannken node->tn_uid = kauth_cred_geteuid(cred); 250 1.99 hannken node->tn_gid = dnode->tn_gid; 251 1.99 hannken } 252 1.99 hannken KASSERT(vap->va_mode != VNOVAL); 253 1.99 hannken node->tn_mode = vap->va_mode; 254 1.1 jmmv 255 1.1 jmmv /* Type-specific initialization. */ 256 1.99 hannken switch (node->tn_type) { 257 1.1 jmmv case VBLK: 258 1.1 jmmv case VCHR: 259 1.65 rmind /* Character/block special device. */ 260 1.99 hannken KASSERT(vap->va_rdev != VNOVAL); 261 1.99 hannken node->tn_spec.tn_dev.tn_rdev = vap->va_rdev; 262 1.1 jmmv break; 263 1.65 rmind case VDIR: 264 1.71 rmind /* Directory. */ 265 1.99 hannken TAILQ_INIT(&node->tn_spec.tn_dir.tn_dir); 266 1.99 hannken node->tn_spec.tn_dir.tn_parent = NULL; 267 1.99 hannken node->tn_spec.tn_dir.tn_seq_arena = NULL; 268 1.99 hannken node->tn_spec.tn_dir.tn_next_seq = TMPFS_DIRSEQ_START; 269 1.99 hannken node->tn_spec.tn_dir.tn_readdir_lastp = NULL; 270 1.71 rmind 271 1.71 rmind /* Extra link count for the virtual '.' entry. */ 272 1.99 hannken node->tn_links++; 273 1.1 jmmv break; 274 1.1 jmmv case VFIFO: 275 1.1 jmmv case VSOCK: 276 1.1 jmmv break; 277 1.65 rmind case VLNK: 278 1.99 hannken node->tn_size = 0; 279 1.99 hannken node->tn_spec.tn_lnk.tn_link = NULL; 280 1.1 jmmv break; 281 1.1 jmmv case VREG: 282 1.65 rmind /* Regular file. Create an underlying UVM object. */ 283 1.99 hannken node->tn_spec.tn_reg.tn_aobj = 284 1.103 chs uao_create(INT64_MAX - PAGE_SIZE, 0); 285 1.99 hannken node->tn_spec.tn_reg.tn_aobj_pages = 0; 286 1.1 jmmv break; 287 1.1 jmmv default: 288 1.99 hannken panic("bad node type %d", vp->v_type); 289 1.99 hannken break; 290 1.1 jmmv } 291 1.1 jmmv 292 1.99 hannken tmpfs_init_vnode(vp, node); 293 1.43 ad 294 1.43 ad mutex_enter(&tmp->tm_lock); 295 1.99 hannken LIST_INSERT_HEAD(&tmp->tm_nodes, node, tn_entries); 296 1.43 ad mutex_exit(&tmp->tm_lock); 297 1.43 ad 298 1.99 hannken *key_len = sizeof(vp->v_data); 299 1.99 hannken *new_key = &vp->v_data; 300 1.99 hannken 301 1.1 jmmv return 0; 302 1.1 jmmv } 303 1.1 jmmv 304 1.8 jmmv /* 305 1.65 rmind * tmpfs_free_node: remove the inode from a list in the mount point and 306 1.65 rmind * destroy the inode structures. 307 1.8 jmmv */ 308 1.1 jmmv void 309 1.67 rmind tmpfs_free_node(tmpfs_mount_t *tmp, tmpfs_node_t *node) 310 1.1 jmmv { 311 1.57 rmind size_t objsz; 312 1.99 hannken uint32_t hold; 313 1.43 ad 314 1.43 ad mutex_enter(&tmp->tm_lock); 315 1.99 hannken hold = atomic_or_32_nv(&node->tn_holdcount, TMPFS_NODE_RECLAIMED); 316 1.99 hannken /* Defer destruction to last thread holding this node. */ 317 1.99 hannken if (hold != TMPFS_NODE_RECLAIMED) { 318 1.99 hannken mutex_exit(&tmp->tm_lock); 319 1.99 hannken return; 320 1.99 hannken } 321 1.43 ad LIST_REMOVE(node, tn_entries); 322 1.43 ad mutex_exit(&tmp->tm_lock); 323 1.1 jmmv 324 1.40 ad switch (node->tn_type) { 325 1.1 jmmv case VLNK: 326 1.65 rmind if (node->tn_size > 0) { 327 1.63 hannken tmpfs_strname_free(tmp, node->tn_spec.tn_lnk.tn_link, 328 1.63 hannken node->tn_size); 329 1.65 rmind } 330 1.1 jmmv break; 331 1.1 jmmv case VREG: 332 1.57 rmind /* 333 1.65 rmind * Calculate the size of inode data, decrease the used-memory 334 1.65 rmind * counter, and destroy the unerlying UVM object (if any). 335 1.57 rmind */ 336 1.57 rmind objsz = PAGE_SIZE * node->tn_spec.tn_reg.tn_aobj_pages; 337 1.57 rmind if (objsz != 0) { 338 1.57 rmind tmpfs_mem_decr(tmp, objsz); 339 1.57 rmind } 340 1.57 rmind if (node->tn_spec.tn_reg.tn_aobj != NULL) { 341 1.18 jmmv uao_detach(node->tn_spec.tn_reg.tn_aobj); 342 1.57 rmind } 343 1.1 jmmv break; 344 1.65 rmind case VDIR: 345 1.90 rmind KASSERT(node->tn_size == 0); 346 1.83 rmind KASSERT(node->tn_spec.tn_dir.tn_seq_arena == NULL); 347 1.83 rmind KASSERT(TAILQ_EMPTY(&node->tn_spec.tn_dir.tn_dir)); 348 1.83 rmind KASSERT(node->tn_spec.tn_dir.tn_parent == NULL || 349 1.83 rmind node == tmp->tm_root); 350 1.65 rmind break; 351 1.1 jmmv default: 352 1.1 jmmv break; 353 1.1 jmmv } 354 1.93 hannken KASSERT(node->tn_vnode == NULL); 355 1.90 rmind KASSERT(node->tn_links == 0); 356 1.1 jmmv 357 1.112 ad mutex_destroy(&node->tn_timelock); 358 1.57 rmind tmpfs_node_put(tmp, node); 359 1.1 jmmv } 360 1.1 jmmv 361 1.8 jmmv /* 362 1.90 rmind * tmpfs_construct_node: allocate a new file of specified type and adds it 363 1.67 rmind * into the parent directory. 364 1.67 rmind * 365 1.67 rmind * => Credentials of the caller are used. 366 1.9 jmmv */ 367 1.1 jmmv int 368 1.90 rmind tmpfs_construct_node(vnode_t *dvp, vnode_t **vpp, struct vattr *vap, 369 1.1 jmmv struct componentname *cnp, char *target) 370 1.1 jmmv { 371 1.67 rmind tmpfs_mount_t *tmp = VFS_TO_TMPFS(dvp->v_mount); 372 1.71 rmind tmpfs_node_t *dnode = VP_TO_TMPFS_DIR(dvp), *node; 373 1.77 hannken tmpfs_dirent_t *de, *wde; 374 1.99 hannken char *slink = NULL; 375 1.99 hannken int ssize = 0; 376 1.1 jmmv int error; 377 1.1 jmmv 378 1.99 hannken /* Allocate symlink target. */ 379 1.99 hannken if (target != NULL) { 380 1.99 hannken KASSERT(vap->va_type == VLNK); 381 1.99 hannken ssize = strlen(target); 382 1.99 hannken KASSERT(ssize < MAXPATHLEN); 383 1.99 hannken if (ssize > 0) { 384 1.99 hannken slink = tmpfs_strname_alloc(tmp, ssize); 385 1.99 hannken if (slink == NULL) 386 1.99 hannken return ENOSPC; 387 1.99 hannken memcpy(slink, target, ssize); 388 1.1 jmmv } 389 1.67 rmind } 390 1.1 jmmv 391 1.1 jmmv /* Allocate a directory entry that points to the new file. */ 392 1.71 rmind error = tmpfs_alloc_dirent(tmp, cnp->cn_nameptr, cnp->cn_namelen, &de); 393 1.67 rmind if (error) { 394 1.99 hannken if (slink != NULL) 395 1.99 hannken tmpfs_strname_free(tmp, slink, ssize); 396 1.99 hannken return error; 397 1.1 jmmv } 398 1.1 jmmv 399 1.99 hannken /* Allocate a vnode that represents the new file. */ 400 1.104 hannken error = vcache_new(dvp->v_mount, dvp, vap, cnp->cn_cred, NULL, vpp); 401 1.67 rmind if (error) { 402 1.99 hannken if (slink != NULL) 403 1.99 hannken tmpfs_strname_free(tmp, slink, ssize); 404 1.71 rmind tmpfs_free_dirent(tmp, de); 405 1.99 hannken return error; 406 1.99 hannken } 407 1.99 hannken error = vn_lock(*vpp, LK_EXCLUSIVE); 408 1.99 hannken if (error) { 409 1.99 hannken vrele(*vpp); 410 1.99 hannken *vpp = NULL; 411 1.99 hannken if (slink != NULL) 412 1.99 hannken tmpfs_strname_free(tmp, slink, ssize); 413 1.99 hannken tmpfs_free_dirent(tmp, de); 414 1.99 hannken return error; 415 1.99 hannken } 416 1.99 hannken 417 1.99 hannken node = VP_TO_TMPFS_NODE(*vpp); 418 1.99 hannken 419 1.99 hannken if (slink != NULL) { 420 1.99 hannken node->tn_spec.tn_lnk.tn_link = slink; 421 1.99 hannken node->tn_size = ssize; 422 1.1 jmmv } 423 1.1 jmmv 424 1.77 hannken /* Remove whiteout before adding the new entry. */ 425 1.77 hannken if (cnp->cn_flags & ISWHITEOUT) { 426 1.77 hannken wde = tmpfs_dir_lookup(dnode, cnp); 427 1.77 hannken KASSERT(wde != NULL && wde->td_node == TMPFS_NODE_WHITEOUT); 428 1.83 rmind tmpfs_dir_detach(dnode, wde); 429 1.77 hannken tmpfs_free_dirent(tmp, wde); 430 1.77 hannken } 431 1.77 hannken 432 1.71 rmind /* Associate inode and attach the entry into the directory. */ 433 1.83 rmind tmpfs_dir_attach(dnode, de, node); 434 1.77 hannken 435 1.77 hannken /* Make node opaque if requested. */ 436 1.77 hannken if (cnp->cn_flags & ISWHITEOUT) 437 1.77 hannken node->tn_flags |= UF_OPAQUE; 438 1.90 rmind 439 1.90 rmind /* Update the parent's timestamps. */ 440 1.90 rmind tmpfs_update(dvp, TMPFS_UPDATE_MTIME | TMPFS_UPDATE_CTIME); 441 1.96 hannken 442 1.99 hannken VOP_UNLOCK(*vpp); 443 1.99 hannken 444 1.105 christos cache_enter(dvp, *vpp, cnp->cn_nameptr, cnp->cn_namelen, cnp->cn_flags); 445 1.99 hannken return 0; 446 1.1 jmmv } 447 1.1 jmmv 448 1.8 jmmv /* 449 1.68 rmind * tmpfs_alloc_dirent: allocates a new directory entry for the inode. 450 1.71 rmind * The directory entry contains a path name component. 451 1.68 rmind */ 452 1.68 rmind int 453 1.71 rmind tmpfs_alloc_dirent(tmpfs_mount_t *tmp, const char *name, uint16_t len, 454 1.71 rmind tmpfs_dirent_t **de) 455 1.68 rmind { 456 1.68 rmind tmpfs_dirent_t *nde; 457 1.68 rmind 458 1.68 rmind nde = tmpfs_dirent_get(tmp); 459 1.68 rmind if (nde == NULL) 460 1.68 rmind return ENOSPC; 461 1.68 rmind 462 1.68 rmind nde->td_name = tmpfs_strname_alloc(tmp, len); 463 1.68 rmind if (nde->td_name == NULL) { 464 1.68 rmind tmpfs_dirent_put(tmp, nde); 465 1.68 rmind return ENOSPC; 466 1.68 rmind } 467 1.68 rmind nde->td_namelen = len; 468 1.68 rmind memcpy(nde->td_name, name, len); 469 1.83 rmind nde->td_seq = TMPFS_DIRSEQ_NONE; 470 1.101 leot nde->td_node = NULL; /* for asserts */ 471 1.68 rmind 472 1.68 rmind *de = nde; 473 1.68 rmind return 0; 474 1.68 rmind } 475 1.68 rmind 476 1.68 rmind /* 477 1.68 rmind * tmpfs_free_dirent: free a directory entry. 478 1.68 rmind */ 479 1.68 rmind void 480 1.71 rmind tmpfs_free_dirent(tmpfs_mount_t *tmp, tmpfs_dirent_t *de) 481 1.68 rmind { 482 1.83 rmind KASSERT(de->td_node == NULL); 483 1.83 rmind KASSERT(de->td_seq == TMPFS_DIRSEQ_NONE); 484 1.68 rmind tmpfs_strname_free(tmp, de->td_name, de->td_namelen); 485 1.68 rmind tmpfs_dirent_put(tmp, de); 486 1.68 rmind } 487 1.68 rmind 488 1.68 rmind /* 489 1.71 rmind * tmpfs_dir_attach: associate directory entry with a specified inode, 490 1.71 rmind * and attach the entry into the directory, specified by vnode. 491 1.29 jmmv * 492 1.71 rmind * => Increases link count on the associated node. 493 1.90 rmind * => Increases link count on directory node if our node is VDIR. 494 1.90 rmind * => It is caller's responsibility to check for the LINK_MAX limit. 495 1.71 rmind * => Triggers kqueue events here. 496 1.8 jmmv */ 497 1.1 jmmv void 498 1.83 rmind tmpfs_dir_attach(tmpfs_node_t *dnode, tmpfs_dirent_t *de, tmpfs_node_t *node) 499 1.1 jmmv { 500 1.83 rmind vnode_t *dvp = dnode->tn_vnode; 501 1.71 rmind int events = NOTE_WRITE; 502 1.71 rmind 503 1.83 rmind KASSERT(dvp != NULL); 504 1.71 rmind KASSERT(VOP_ISLOCKED(dvp)); 505 1.71 rmind 506 1.83 rmind /* Get a new sequence number. */ 507 1.83 rmind KASSERT(de->td_seq == TMPFS_DIRSEQ_NONE); 508 1.83 rmind de->td_seq = tmpfs_dir_getseq(dnode, de); 509 1.83 rmind 510 1.71 rmind /* Associate directory entry and the inode. */ 511 1.77 hannken de->td_node = node; 512 1.71 rmind if (node != TMPFS_NODE_WHITEOUT) { 513 1.71 rmind KASSERT(node->tn_links < LINK_MAX); 514 1.71 rmind node->tn_links++; 515 1.1 jmmv 516 1.71 rmind /* Save the hint (might overwrite). */ 517 1.71 rmind node->tn_dirent_hint = de; 518 1.90 rmind } else if ((dnode->tn_gen & TMPFS_WHITEOUT_BIT) == 0) { 519 1.90 rmind /* Flag that there are whiteout entries. */ 520 1.90 rmind atomic_or_32(&dnode->tn_gen, TMPFS_WHITEOUT_BIT); 521 1.71 rmind } 522 1.1 jmmv 523 1.71 rmind /* Insert the entry to the directory (parent of inode). */ 524 1.18 jmmv TAILQ_INSERT_TAIL(&dnode->tn_spec.tn_dir.tn_dir, de, td_entries); 525 1.117 riastrad KASSERT(dnode->tn_size <= __type_max(off_t) - sizeof(tmpfs_dirent_t)); 526 1.67 rmind dnode->tn_size += sizeof(tmpfs_dirent_t); 527 1.71 rmind uvm_vnp_setsize(dvp, dnode->tn_size); 528 1.71 rmind 529 1.71 rmind if (node != TMPFS_NODE_WHITEOUT && node->tn_type == VDIR) { 530 1.71 rmind /* Set parent. */ 531 1.71 rmind KASSERT(node->tn_spec.tn_dir.tn_parent == NULL); 532 1.71 rmind node->tn_spec.tn_dir.tn_parent = dnode; 533 1.71 rmind 534 1.71 rmind /* Increase the link count of parent. */ 535 1.71 rmind KASSERT(dnode->tn_links < LINK_MAX); 536 1.71 rmind dnode->tn_links++; 537 1.71 rmind events |= NOTE_LINK; 538 1.71 rmind 539 1.71 rmind TMPFS_VALIDATE_DIR(node); 540 1.71 rmind } 541 1.1 jmmv } 542 1.1 jmmv 543 1.8 jmmv /* 544 1.71 rmind * tmpfs_dir_detach: disassociate directory entry and its inode, 545 1.71 rmind * and detach the entry from the directory, specified by vnode. 546 1.29 jmmv * 547 1.71 rmind * => Decreases link count on the associated node. 548 1.71 rmind * => Decreases the link count on directory node, if our node is VDIR. 549 1.71 rmind * => Triggers kqueue events here. 550 1.83 rmind * 551 1.83 rmind * => Note: dvp and vp may be NULL only if called by tmpfs_unmount(). 552 1.8 jmmv */ 553 1.1 jmmv void 554 1.83 rmind tmpfs_dir_detach(tmpfs_node_t *dnode, tmpfs_dirent_t *de) 555 1.1 jmmv { 556 1.71 rmind tmpfs_node_t *node = de->td_node; 557 1.114 thorpej vnode_t *dvp = dnode->tn_vnode; 558 1.71 rmind 559 1.83 rmind KASSERT(dvp == NULL || VOP_ISLOCKED(dvp)); 560 1.71 rmind 561 1.83 rmind if (__predict_true(node != TMPFS_NODE_WHITEOUT)) { 562 1.71 rmind /* Deassociate the inode and entry. */ 563 1.71 rmind node->tn_dirent_hint = NULL; 564 1.1 jmmv 565 1.71 rmind KASSERT(node->tn_links > 0); 566 1.71 rmind node->tn_links--; 567 1.83 rmind 568 1.71 rmind /* If directory - decrease the link count of parent. */ 569 1.71 rmind if (node->tn_type == VDIR) { 570 1.71 rmind KASSERT(node->tn_spec.tn_dir.tn_parent == dnode); 571 1.71 rmind node->tn_spec.tn_dir.tn_parent = NULL; 572 1.71 rmind 573 1.71 rmind KASSERT(dnode->tn_links > 0); 574 1.71 rmind dnode->tn_links--; 575 1.71 rmind } 576 1.71 rmind } 577 1.85 rmind de->td_node = NULL; 578 1.1 jmmv 579 1.71 rmind /* Remove the entry from the directory. */ 580 1.18 jmmv if (dnode->tn_spec.tn_dir.tn_readdir_lastp == de) { 581 1.18 jmmv dnode->tn_spec.tn_dir.tn_readdir_lastp = NULL; 582 1.5 yamt } 583 1.67 rmind TAILQ_REMOVE(&dnode->tn_spec.tn_dir.tn_dir, de, td_entries); 584 1.117 riastrad KASSERT(dnode->tn_size >= sizeof(tmpfs_dirent_t)); 585 1.67 rmind dnode->tn_size -= sizeof(tmpfs_dirent_t); 586 1.83 rmind tmpfs_dir_putseq(dnode, de); 587 1.83 rmind 588 1.83 rmind if (dvp) { 589 1.83 rmind uvm_vnp_setsize(dvp, dnode->tn_size); 590 1.83 rmind } 591 1.1 jmmv } 592 1.1 jmmv 593 1.8 jmmv /* 594 1.67 rmind * tmpfs_dir_lookup: find a directory entry in the specified inode. 595 1.8 jmmv * 596 1.67 rmind * Note that the . and .. components are not allowed as they do not 597 1.67 rmind * physically exist within directories. 598 1.8 jmmv */ 599 1.67 rmind tmpfs_dirent_t * 600 1.67 rmind tmpfs_dir_lookup(tmpfs_node_t *node, struct componentname *cnp) 601 1.1 jmmv { 602 1.67 rmind const char *name = cnp->cn_nameptr; 603 1.67 rmind const uint16_t nlen = cnp->cn_namelen; 604 1.67 rmind tmpfs_dirent_t *de; 605 1.1 jmmv 606 1.49 yamt KASSERT(VOP_ISLOCKED(node->tn_vnode)); 607 1.67 rmind KASSERT(nlen != 1 || !(name[0] == '.')); 608 1.67 rmind KASSERT(nlen != 2 || !(name[0] == '.' && name[1] == '.')); 609 1.71 rmind TMPFS_VALIDATE_DIR(node); 610 1.1 jmmv 611 1.18 jmmv TAILQ_FOREACH(de, &node->tn_spec.tn_dir.tn_dir, td_entries) { 612 1.67 rmind if (de->td_namelen != nlen) 613 1.67 rmind continue; 614 1.69 rmind if (memcmp(de->td_name, name, nlen) != 0) 615 1.67 rmind continue; 616 1.67 rmind break; 617 1.1 jmmv } 618 1.49 yamt return de; 619 1.1 jmmv } 620 1.1 jmmv 621 1.9 jmmv /* 622 1.71 rmind * tmpfs_dir_cached: get a cached directory entry if it is valid. Used to 623 1.83 rmind * avoid unnecessary tmpfs_dir_lookup(). 624 1.71 rmind * 625 1.71 rmind * => The vnode must be locked. 626 1.71 rmind */ 627 1.71 rmind tmpfs_dirent_t * 628 1.71 rmind tmpfs_dir_cached(tmpfs_node_t *node) 629 1.71 rmind { 630 1.71 rmind tmpfs_dirent_t *de = node->tn_dirent_hint; 631 1.71 rmind 632 1.71 rmind KASSERT(VOP_ISLOCKED(node->tn_vnode)); 633 1.71 rmind 634 1.71 rmind if (de == NULL) { 635 1.71 rmind return NULL; 636 1.71 rmind } 637 1.71 rmind KASSERT(de->td_node == node); 638 1.71 rmind 639 1.71 rmind /* 640 1.71 rmind * Directories always have a valid hint. For files, check if there 641 1.71 rmind * are any hard links. If there are - hint might be invalid. 642 1.71 rmind */ 643 1.71 rmind return (node->tn_type != VDIR && node->tn_links > 1) ? NULL : de; 644 1.71 rmind } 645 1.71 rmind 646 1.71 rmind /* 647 1.83 rmind * tmpfs_dir_getseq: get a per-directory sequence number for the entry. 648 1.83 rmind * 649 1.83 rmind * => Shall not be larger than 2^31 for linux32 compatibility. 650 1.9 jmmv */ 651 1.83 rmind uint32_t 652 1.83 rmind tmpfs_dir_getseq(tmpfs_node_t *dnode, tmpfs_dirent_t *de) 653 1.1 jmmv { 654 1.83 rmind uint32_t seq = de->td_seq; 655 1.83 rmind vmem_t *seq_arena; 656 1.83 rmind vmem_addr_t off; 657 1.84 christos int error __diagused; 658 1.1 jmmv 659 1.83 rmind TMPFS_VALIDATE_DIR(dnode); 660 1.83 rmind 661 1.83 rmind if (__predict_true(seq != TMPFS_DIRSEQ_NONE)) { 662 1.83 rmind /* Already set. */ 663 1.83 rmind KASSERT(seq >= TMPFS_DIRSEQ_START); 664 1.83 rmind return seq; 665 1.83 rmind } 666 1.83 rmind 667 1.83 rmind /* 668 1.83 rmind * The "." and ".." and the end-of-directory have reserved numbers. 669 1.83 rmind * The other sequence numbers are allocated as following: 670 1.83 rmind * 671 1.83 rmind * - The first half of the 2^31 is assigned incrementally. 672 1.83 rmind * 673 1.83 rmind * - If that range is exceeded, then the second half of 2^31 674 1.83 rmind * is used, but managed by vmem(9). 675 1.83 rmind */ 676 1.83 rmind 677 1.83 rmind seq = dnode->tn_spec.tn_dir.tn_next_seq; 678 1.83 rmind KASSERT(seq >= TMPFS_DIRSEQ_START); 679 1.83 rmind 680 1.83 rmind if (__predict_true(seq < TMPFS_DIRSEQ_END)) { 681 1.83 rmind /* First half: just increment and return. */ 682 1.83 rmind dnode->tn_spec.tn_dir.tn_next_seq++; 683 1.83 rmind return seq; 684 1.83 rmind } 685 1.83 rmind 686 1.83 rmind /* 687 1.83 rmind * First half exceeded, use the second half. May need to create 688 1.83 rmind * vmem(9) arena for the directory first. 689 1.83 rmind */ 690 1.83 rmind if ((seq_arena = dnode->tn_spec.tn_dir.tn_seq_arena) == NULL) { 691 1.83 rmind seq_arena = vmem_create("tmpfscoo", 0, 692 1.83 rmind TMPFS_DIRSEQ_END - 1, 1, NULL, NULL, NULL, 0, 693 1.83 rmind VM_SLEEP, IPL_NONE); 694 1.83 rmind dnode->tn_spec.tn_dir.tn_seq_arena = seq_arena; 695 1.83 rmind KASSERT(seq_arena != NULL); 696 1.83 rmind } 697 1.83 rmind error = vmem_alloc(seq_arena, 1, VM_SLEEP | VM_BESTFIT, &off); 698 1.83 rmind KASSERT(error == 0); 699 1.83 rmind 700 1.83 rmind KASSERT(off < TMPFS_DIRSEQ_END); 701 1.83 rmind seq = off | TMPFS_DIRSEQ_END; 702 1.83 rmind return seq; 703 1.83 rmind } 704 1.83 rmind 705 1.83 rmind static void 706 1.83 rmind tmpfs_dir_putseq(tmpfs_node_t *dnode, tmpfs_dirent_t *de) 707 1.83 rmind { 708 1.83 rmind vmem_t *seq_arena = dnode->tn_spec.tn_dir.tn_seq_arena; 709 1.83 rmind uint32_t seq = de->td_seq; 710 1.83 rmind 711 1.83 rmind TMPFS_VALIDATE_DIR(dnode); 712 1.83 rmind 713 1.83 rmind if (seq == TMPFS_DIRSEQ_NONE || seq < TMPFS_DIRSEQ_END) { 714 1.83 rmind /* First half (or no sequence number set yet). */ 715 1.83 rmind KASSERT(de->td_seq >= TMPFS_DIRSEQ_START); 716 1.83 rmind } else { 717 1.83 rmind /* Second half. */ 718 1.83 rmind KASSERT(seq_arena != NULL); 719 1.83 rmind KASSERT(seq >= TMPFS_DIRSEQ_END); 720 1.83 rmind seq &= ~TMPFS_DIRSEQ_END; 721 1.83 rmind vmem_free(seq_arena, seq, 1); 722 1.83 rmind } 723 1.83 rmind de->td_seq = TMPFS_DIRSEQ_NONE; 724 1.1 jmmv 725 1.83 rmind /* Empty? We can reset. */ 726 1.83 rmind if (seq_arena && dnode->tn_size == 0) { 727 1.83 rmind dnode->tn_spec.tn_dir.tn_seq_arena = NULL; 728 1.83 rmind dnode->tn_spec.tn_dir.tn_next_seq = TMPFS_DIRSEQ_START; 729 1.83 rmind vmem_destroy(seq_arena); 730 1.1 jmmv } 731 1.1 jmmv } 732 1.1 jmmv 733 1.9 jmmv /* 734 1.83 rmind * tmpfs_dir_lookupbyseq: lookup a directory entry by the sequence number. 735 1.9 jmmv */ 736 1.83 rmind tmpfs_dirent_t * 737 1.83 rmind tmpfs_dir_lookupbyseq(tmpfs_node_t *node, off_t seq) 738 1.1 jmmv { 739 1.83 rmind tmpfs_dirent_t *de = node->tn_spec.tn_dir.tn_readdir_lastp; 740 1.1 jmmv 741 1.1 jmmv TMPFS_VALIDATE_DIR(node); 742 1.1 jmmv 743 1.83 rmind /* 744 1.83 rmind * First, check the cache. If does not match - perform a lookup. 745 1.83 rmind */ 746 1.83 rmind if (de && de->td_seq == seq) { 747 1.83 rmind KASSERT(de->td_seq >= TMPFS_DIRSEQ_START); 748 1.83 rmind KASSERT(de->td_seq != TMPFS_DIRSEQ_NONE); 749 1.83 rmind return de; 750 1.83 rmind } 751 1.83 rmind TAILQ_FOREACH(de, &node->tn_spec.tn_dir.tn_dir, td_entries) { 752 1.83 rmind KASSERT(de->td_seq >= TMPFS_DIRSEQ_START); 753 1.83 rmind KASSERT(de->td_seq != TMPFS_DIRSEQ_NONE); 754 1.83 rmind if (de->td_seq == seq) 755 1.83 rmind return de; 756 1.1 jmmv } 757 1.83 rmind return NULL; 758 1.1 jmmv } 759 1.1 jmmv 760 1.9 jmmv /* 761 1.83 rmind * tmpfs_dir_getdotents: helper function for tmpfs_readdir() to get the 762 1.83 rmind * dot meta entries, that is, "." or "..". Copy it to the UIO space. 763 1.9 jmmv */ 764 1.83 rmind static int 765 1.83 rmind tmpfs_dir_getdotents(tmpfs_node_t *node, struct dirent *dp, struct uio *uio) 766 1.5 yamt { 767 1.67 rmind tmpfs_dirent_t *de; 768 1.83 rmind off_t next = 0; 769 1.83 rmind int error; 770 1.83 rmind 771 1.83 rmind switch (uio->uio_offset) { 772 1.83 rmind case TMPFS_DIRSEQ_DOT: 773 1.89 rmind dp->d_fileno = node->tn_id; 774 1.83 rmind strlcpy(dp->d_name, ".", sizeof(dp->d_name)); 775 1.83 rmind next = TMPFS_DIRSEQ_DOTDOT; 776 1.83 rmind break; 777 1.83 rmind case TMPFS_DIRSEQ_DOTDOT: 778 1.89 rmind dp->d_fileno = node->tn_spec.tn_dir.tn_parent->tn_id; 779 1.83 rmind strlcpy(dp->d_name, "..", sizeof(dp->d_name)); 780 1.83 rmind de = TAILQ_FIRST(&node->tn_spec.tn_dir.tn_dir); 781 1.83 rmind next = de ? tmpfs_dir_getseq(node, de) : TMPFS_DIRSEQ_EOF; 782 1.83 rmind break; 783 1.83 rmind default: 784 1.83 rmind KASSERT(false); 785 1.83 rmind } 786 1.89 rmind dp->d_type = DT_DIR; 787 1.83 rmind dp->d_namlen = strlen(dp->d_name); 788 1.83 rmind dp->d_reclen = _DIRENT_SIZE(dp); 789 1.49 yamt 790 1.83 rmind if (dp->d_reclen > uio->uio_resid) { 791 1.83 rmind return EJUSTRETURN; 792 1.5 yamt } 793 1.83 rmind if ((error = uiomove(dp, dp->d_reclen, uio)) != 0) { 794 1.83 rmind return error; 795 1.5 yamt } 796 1.83 rmind 797 1.83 rmind uio->uio_offset = next; 798 1.83 rmind return error; 799 1.5 yamt } 800 1.5 yamt 801 1.9 jmmv /* 802 1.83 rmind * tmpfs_dir_getdents: helper function for tmpfs_readdir. 803 1.67 rmind * 804 1.67 rmind * => Returns as much directory entries as can fit in the uio space. 805 1.67 rmind * => The read starts at uio->uio_offset. 806 1.9 jmmv */ 807 1.1 jmmv int 808 1.67 rmind tmpfs_dir_getdents(tmpfs_node_t *node, struct uio *uio, off_t *cntp) 809 1.1 jmmv { 810 1.67 rmind tmpfs_dirent_t *de; 811 1.94 pedro struct dirent dent; 812 1.83 rmind int error = 0; 813 1.1 jmmv 814 1.49 yamt KASSERT(VOP_ISLOCKED(node->tn_vnode)); 815 1.1 jmmv TMPFS_VALIDATE_DIR(node); 816 1.1 jmmv 817 1.67 rmind /* 818 1.94 pedro * First check for the "." and ".." cases. 819 1.83 rmind * Note: tmpfs_dir_getdotents() will "seek" for us. 820 1.67 rmind */ 821 1.94 pedro memset(&dent, 0, sizeof(dent)); 822 1.83 rmind 823 1.83 rmind if (uio->uio_offset == TMPFS_DIRSEQ_DOT) { 824 1.94 pedro if ((error = tmpfs_dir_getdotents(node, &dent, uio)) != 0) { 825 1.83 rmind goto done; 826 1.83 rmind } 827 1.83 rmind (*cntp)++; 828 1.83 rmind } 829 1.83 rmind if (uio->uio_offset == TMPFS_DIRSEQ_DOTDOT) { 830 1.94 pedro if ((error = tmpfs_dir_getdotents(node, &dent, uio)) != 0) { 831 1.83 rmind goto done; 832 1.83 rmind } 833 1.83 rmind (*cntp)++; 834 1.83 rmind } 835 1.83 rmind 836 1.83 rmind /* Done if we reached the end. */ 837 1.83 rmind if (uio->uio_offset == TMPFS_DIRSEQ_EOF) { 838 1.83 rmind goto done; 839 1.5 yamt } 840 1.83 rmind 841 1.83 rmind /* Locate the directory entry given by the given sequence number. */ 842 1.83 rmind de = tmpfs_dir_lookupbyseq(node, uio->uio_offset); 843 1.5 yamt if (de == NULL) { 844 1.83 rmind error = EINVAL; 845 1.83 rmind goto done; 846 1.1 jmmv } 847 1.1 jmmv 848 1.67 rmind /* 849 1.83 rmind * Read as many entries as possible; i.e., until we reach the end 850 1.83 rmind * of the directory or we exhaust UIO space. 851 1.67 rmind */ 852 1.1 jmmv do { 853 1.62 pooka if (de->td_node == TMPFS_NODE_WHITEOUT) { 854 1.94 pedro dent.d_fileno = 1; 855 1.94 pedro dent.d_type = DT_WHT; 856 1.62 pooka } else { 857 1.94 pedro dent.d_fileno = de->td_node->tn_id; 858 1.94 pedro dent.d_type = vtype2dt(de->td_node->tn_type); 859 1.1 jmmv } 860 1.94 pedro dent.d_namlen = de->td_namelen; 861 1.94 pedro KASSERT(de->td_namelen < sizeof(dent.d_name)); 862 1.94 pedro memcpy(dent.d_name, de->td_name, de->td_namelen); 863 1.94 pedro dent.d_name[de->td_namelen] = '\0'; 864 1.94 pedro dent.d_reclen = _DIRENT_SIZE(&dent); 865 1.1 jmmv 866 1.94 pedro if (dent.d_reclen > uio->uio_resid) { 867 1.83 rmind /* Exhausted UIO space. */ 868 1.83 rmind error = EJUSTRETURN; 869 1.1 jmmv break; 870 1.1 jmmv } 871 1.1 jmmv 872 1.83 rmind /* Copy out the directory entry and continue. */ 873 1.94 pedro error = uiomove(&dent, dent.d_reclen, uio); 874 1.83 rmind if (error) { 875 1.83 rmind break; 876 1.83 rmind } 877 1.5 yamt (*cntp)++; 878 1.1 jmmv de = TAILQ_NEXT(de, td_entries); 879 1.1 jmmv 880 1.83 rmind } while (uio->uio_resid > 0 && de); 881 1.83 rmind 882 1.83 rmind /* Cache the last entry or clear and mark EOF. */ 883 1.83 rmind uio->uio_offset = de ? tmpfs_dir_getseq(node, de) : TMPFS_DIRSEQ_EOF; 884 1.83 rmind node->tn_spec.tn_dir.tn_readdir_lastp = de; 885 1.83 rmind done: 886 1.90 rmind tmpfs_update(node->tn_vnode, TMPFS_UPDATE_ATIME); 887 1.83 rmind 888 1.83 rmind if (error == EJUSTRETURN) { 889 1.83 rmind /* Exhausted UIO space - just return. */ 890 1.83 rmind error = 0; 891 1.83 rmind } 892 1.83 rmind KASSERT(error >= 0); 893 1.1 jmmv return error; 894 1.1 jmmv } 895 1.1 jmmv 896 1.8 jmmv /* 897 1.115 riastrad * tmpfs_reg_resize: resize the underlying UVM object associated with the 898 1.67 rmind * specified regular file. 899 1.8 jmmv */ 900 1.1 jmmv int 901 1.1 jmmv tmpfs_reg_resize(struct vnode *vp, off_t newsize) 902 1.1 jmmv { 903 1.67 rmind tmpfs_mount_t *tmp = VFS_TO_TMPFS(vp->v_mount); 904 1.67 rmind tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp); 905 1.74 hannken struct uvm_object *uobj = node->tn_spec.tn_reg.tn_aobj; 906 1.57 rmind size_t newpages, oldpages; 907 1.13 yamt off_t oldsize; 908 1.1 jmmv 909 1.1 jmmv KASSERT(vp->v_type == VREG); 910 1.1 jmmv KASSERT(newsize >= 0); 911 1.1 jmmv 912 1.116 riastrad if (newsize > __type_max(off_t) - PAGE_SIZE + 1) 913 1.116 riastrad return EFBIG; 914 1.116 riastrad 915 1.13 yamt oldsize = node->tn_size; 916 1.57 rmind oldpages = round_page(oldsize) >> PAGE_SHIFT; 917 1.57 rmind newpages = round_page(newsize) >> PAGE_SHIFT; 918 1.18 jmmv KASSERT(oldpages == node->tn_spec.tn_reg.tn_aobj_pages); 919 1.1 jmmv 920 1.107 ad if (newsize == oldsize) { 921 1.107 ad return 0; 922 1.107 ad } 923 1.107 ad 924 1.57 rmind if (newpages > oldpages) { 925 1.57 rmind /* Increase the used-memory counter if getting extra pages. */ 926 1.57 rmind if (!tmpfs_mem_incr(tmp, (newpages - oldpages) << PAGE_SHIFT)) { 927 1.57 rmind return ENOSPC; 928 1.57 rmind } 929 1.57 rmind } else if (newsize < oldsize) { 930 1.91 rmind size_t zerolen; 931 1.13 yamt 932 1.91 rmind zerolen = MIN(round_page(newsize), node->tn_size) - newsize; 933 1.109 ad ubc_zerorange(uobj, newsize, zerolen, UBC_VNODE_FLAGS(vp)); 934 1.13 yamt } 935 1.1 jmmv 936 1.36 pooka node->tn_spec.tn_reg.tn_aobj_pages = newpages; 937 1.36 pooka node->tn_size = newsize; 938 1.36 pooka uvm_vnp_setsize(vp, newsize); 939 1.36 pooka 940 1.76 enami /* 941 1.76 enami * Free "backing store". 942 1.76 enami */ 943 1.43 ad if (newpages < oldpages) { 944 1.106 ad rw_enter(uobj->vmobjlock, RW_WRITER); 945 1.76 enami uao_dropswap_range(uobj, newpages, oldpages); 946 1.106 ad rw_exit(uobj->vmobjlock); 947 1.76 enami 948 1.57 rmind /* Decrease the used-memory counter. */ 949 1.57 rmind tmpfs_mem_decr(tmp, (oldpages - newpages) << PAGE_SHIFT); 950 1.43 ad } 951 1.57 rmind return 0; 952 1.1 jmmv } 953 1.1 jmmv 954 1.9 jmmv /* 955 1.67 rmind * tmpfs_chflags: change flags of the given vnode. 956 1.9 jmmv */ 957 1.1 jmmv int 958 1.67 rmind tmpfs_chflags(vnode_t *vp, int flags, kauth_cred_t cred, lwp_t *l) 959 1.1 jmmv { 960 1.67 rmind tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp); 961 1.55 pooka kauth_action_t action = KAUTH_VNODE_WRITE_FLAGS; 962 1.79 elad int error; 963 1.79 elad bool changing_sysflags = false; 964 1.1 jmmv 965 1.1 jmmv KASSERT(VOP_ISLOCKED(vp)); 966 1.1 jmmv 967 1.1 jmmv /* Disallow this operation if the file system is mounted read-only. */ 968 1.1 jmmv if (vp->v_mount->mnt_flag & MNT_RDONLY) 969 1.1 jmmv return EROFS; 970 1.1 jmmv 971 1.54 elad /* 972 1.54 elad * If the new flags have non-user flags that are different than 973 1.54 elad * those on the node, we need special permission to change them. 974 1.54 elad */ 975 1.54 elad if ((flags & SF_SETTABLE) != (node->tn_flags & SF_SETTABLE)) { 976 1.54 elad action |= KAUTH_VNODE_WRITE_SYSFLAGS; 977 1.79 elad changing_sysflags = true; 978 1.54 elad } 979 1.54 elad 980 1.54 elad /* 981 1.54 elad * Indicate that this node's flags have system attributes in them if 982 1.54 elad * that's the case. 983 1.54 elad */ 984 1.54 elad if (node->tn_flags & (SF_IMMUTABLE | SF_APPEND)) { 985 1.54 elad action |= KAUTH_VNODE_HAS_SYSFLAGS; 986 1.54 elad } 987 1.54 elad 988 1.79 elad error = kauth_authorize_vnode(cred, action, vp, NULL, 989 1.111 christos genfs_can_chflags(vp, cred, node->tn_uid, changing_sysflags)); 990 1.54 elad if (error) 991 1.1 jmmv return error; 992 1.54 elad 993 1.54 elad /* 994 1.54 elad * Set the flags. If we're not setting non-user flags, be careful not 995 1.54 elad * to overwrite them. 996 1.54 elad * 997 1.54 elad * XXX: Can't we always assign here? if the system flags are different, 998 1.54 elad * the code above should catch attempts to change them without 999 1.54 elad * proper permissions, and if we're here it means it's okay to 1000 1.54 elad * change them... 1001 1.54 elad */ 1002 1.79 elad if (!changing_sysflags) { 1003 1.54 elad /* Clear all user-settable flags and re-set them. */ 1004 1.1 jmmv node->tn_flags &= SF_SETTABLE; 1005 1.1 jmmv node->tn_flags |= (flags & UF_SETTABLE); 1006 1.67 rmind } else { 1007 1.67 rmind node->tn_flags = flags; 1008 1.1 jmmv } 1009 1.90 rmind tmpfs_update(vp, TMPFS_UPDATE_CTIME); 1010 1.1 jmmv return 0; 1011 1.1 jmmv } 1012 1.1 jmmv 1013 1.9 jmmv /* 1014 1.67 rmind * tmpfs_chmod: change access mode on the given vnode. 1015 1.9 jmmv */ 1016 1.1 jmmv int 1017 1.67 rmind tmpfs_chmod(vnode_t *vp, mode_t mode, kauth_cred_t cred, lwp_t *l) 1018 1.1 jmmv { 1019 1.67 rmind tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp); 1020 1.51 elad int error; 1021 1.1 jmmv 1022 1.1 jmmv KASSERT(VOP_ISLOCKED(vp)); 1023 1.1 jmmv 1024 1.1 jmmv /* Disallow this operation if the file system is mounted read-only. */ 1025 1.1 jmmv if (vp->v_mount->mnt_flag & MNT_RDONLY) 1026 1.1 jmmv return EROFS; 1027 1.1 jmmv 1028 1.1 jmmv /* Immutable or append-only files cannot be modified, either. */ 1029 1.1 jmmv if (node->tn_flags & (IMMUTABLE | APPEND)) 1030 1.1 jmmv return EPERM; 1031 1.1 jmmv 1032 1.54 elad error = kauth_authorize_vnode(cred, KAUTH_VNODE_WRITE_SECURITY, vp, 1033 1.111 christos NULL, genfs_can_chmod(vp, cred, node->tn_uid, node->tn_gid, mode)); 1034 1.67 rmind if (error) { 1035 1.67 rmind return error; 1036 1.67 rmind } 1037 1.1 jmmv node->tn_mode = (mode & ALLPERMS); 1038 1.90 rmind tmpfs_update(vp, TMPFS_UPDATE_CTIME); 1039 1.110 ad cache_enter_id(vp, node->tn_mode, node->tn_uid, node->tn_gid, true); 1040 1.1 jmmv return 0; 1041 1.1 jmmv } 1042 1.1 jmmv 1043 1.9 jmmv /* 1044 1.67 rmind * tmpfs_chown: change ownership of the given vnode. 1045 1.67 rmind * 1046 1.67 rmind * => At least one of uid or gid must be different than VNOVAL. 1047 1.67 rmind * => Attribute is unchanged for VNOVAL case. 1048 1.9 jmmv */ 1049 1.1 jmmv int 1050 1.67 rmind tmpfs_chown(vnode_t *vp, uid_t uid, gid_t gid, kauth_cred_t cred, lwp_t *l) 1051 1.1 jmmv { 1052 1.67 rmind tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp); 1053 1.51 elad int error; 1054 1.1 jmmv 1055 1.1 jmmv KASSERT(VOP_ISLOCKED(vp)); 1056 1.1 jmmv 1057 1.1 jmmv /* Assign default values if they are unknown. */ 1058 1.1 jmmv KASSERT(uid != VNOVAL || gid != VNOVAL); 1059 1.67 rmind if (uid == VNOVAL) { 1060 1.1 jmmv uid = node->tn_uid; 1061 1.67 rmind } 1062 1.67 rmind if (gid == VNOVAL) { 1063 1.1 jmmv gid = node->tn_gid; 1064 1.67 rmind } 1065 1.1 jmmv 1066 1.1 jmmv /* Disallow this operation if the file system is mounted read-only. */ 1067 1.1 jmmv if (vp->v_mount->mnt_flag & MNT_RDONLY) 1068 1.1 jmmv return EROFS; 1069 1.1 jmmv 1070 1.1 jmmv /* Immutable or append-only files cannot be modified, either. */ 1071 1.1 jmmv if (node->tn_flags & (IMMUTABLE | APPEND)) 1072 1.1 jmmv return EPERM; 1073 1.1 jmmv 1074 1.54 elad error = kauth_authorize_vnode(cred, KAUTH_VNODE_CHANGE_OWNERSHIP, vp, 1075 1.111 christos NULL, genfs_can_chown(vp, cred, node->tn_uid, node->tn_gid, uid, 1076 1.67 rmind gid)); 1077 1.67 rmind if (error) { 1078 1.67 rmind return error; 1079 1.67 rmind } 1080 1.1 jmmv node->tn_uid = uid; 1081 1.1 jmmv node->tn_gid = gid; 1082 1.90 rmind tmpfs_update(vp, TMPFS_UPDATE_CTIME); 1083 1.110 ad cache_enter_id(vp, node->tn_mode, node->tn_uid, node->tn_gid, true); 1084 1.1 jmmv return 0; 1085 1.1 jmmv } 1086 1.1 jmmv 1087 1.9 jmmv /* 1088 1.67 rmind * tmpfs_chsize: change size of the given vnode. 1089 1.9 jmmv */ 1090 1.1 jmmv int 1091 1.67 rmind tmpfs_chsize(vnode_t *vp, u_quad_t size, kauth_cred_t cred, lwp_t *l) 1092 1.1 jmmv { 1093 1.67 rmind tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp); 1094 1.90 rmind const off_t length = size; 1095 1.90 rmind int error; 1096 1.1 jmmv 1097 1.1 jmmv KASSERT(VOP_ISLOCKED(vp)); 1098 1.1 jmmv 1099 1.1 jmmv /* Decide whether this is a valid operation based on the file type. */ 1100 1.1 jmmv switch (vp->v_type) { 1101 1.1 jmmv case VDIR: 1102 1.1 jmmv return EISDIR; 1103 1.1 jmmv case VREG: 1104 1.67 rmind if (vp->v_mount->mnt_flag & MNT_RDONLY) { 1105 1.1 jmmv return EROFS; 1106 1.67 rmind } 1107 1.1 jmmv break; 1108 1.1 jmmv case VBLK: 1109 1.1 jmmv case VCHR: 1110 1.1 jmmv case VFIFO: 1111 1.67 rmind /* 1112 1.67 rmind * Allow modifications of special files even if in the file 1113 1.1 jmmv * system is mounted read-only (we are not modifying the 1114 1.67 rmind * files themselves, but the objects they represent). 1115 1.67 rmind */ 1116 1.14 yamt return 0; 1117 1.1 jmmv default: 1118 1.14 yamt return EOPNOTSUPP; 1119 1.1 jmmv } 1120 1.1 jmmv 1121 1.1 jmmv /* Immutable or append-only files cannot be modified, either. */ 1122 1.67 rmind if (node->tn_flags & (IMMUTABLE | APPEND)) { 1123 1.1 jmmv return EPERM; 1124 1.67 rmind } 1125 1.1 jmmv 1126 1.90 rmind if (length < 0) { 1127 1.90 rmind return EINVAL; 1128 1.90 rmind } 1129 1.90 rmind 1130 1.90 rmind /* Note: tmpfs_reg_resize() will raise NOTE_EXTEND and NOTE_ATTRIB. */ 1131 1.102 hannken if (node->tn_size != length && 1132 1.102 hannken (error = tmpfs_reg_resize(vp, length)) != 0) { 1133 1.90 rmind return error; 1134 1.90 rmind } 1135 1.90 rmind tmpfs_update(vp, TMPFS_UPDATE_CTIME | TMPFS_UPDATE_MTIME); 1136 1.90 rmind return 0; 1137 1.1 jmmv } 1138 1.1 jmmv 1139 1.9 jmmv /* 1140 1.67 rmind * tmpfs_chtimes: change access and modification times for vnode. 1141 1.9 jmmv */ 1142 1.1 jmmv int 1143 1.67 rmind tmpfs_chtimes(vnode_t *vp, const struct timespec *atime, 1144 1.48 christos const struct timespec *mtime, const struct timespec *btime, 1145 1.67 rmind int vaflags, kauth_cred_t cred, lwp_t *l) 1146 1.1 jmmv { 1147 1.67 rmind tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp); 1148 1.1 jmmv int error; 1149 1.1 jmmv 1150 1.1 jmmv KASSERT(VOP_ISLOCKED(vp)); 1151 1.1 jmmv 1152 1.1 jmmv /* Disallow this operation if the file system is mounted read-only. */ 1153 1.1 jmmv if (vp->v_mount->mnt_flag & MNT_RDONLY) 1154 1.1 jmmv return EROFS; 1155 1.1 jmmv 1156 1.1 jmmv /* Immutable or append-only files cannot be modified, either. */ 1157 1.1 jmmv if (node->tn_flags & (IMMUTABLE | APPEND)) 1158 1.1 jmmv return EPERM; 1159 1.1 jmmv 1160 1.54 elad error = kauth_authorize_vnode(cred, KAUTH_VNODE_WRITE_TIMES, vp, NULL, 1161 1.111 christos genfs_can_chtimes(vp, cred, node->tn_uid, vaflags)); 1162 1.53 elad if (error) 1163 1.67 rmind return error; 1164 1.1 jmmv 1165 1.112 ad mutex_enter(&node->tn_timelock); 1166 1.90 rmind if (atime->tv_sec != VNOVAL) { 1167 1.112 ad atomic_and_uint(&node->tn_tflags, ~TMPFS_UPDATE_ATIME); 1168 1.90 rmind node->tn_atime = *atime; 1169 1.90 rmind } 1170 1.90 rmind if (mtime->tv_sec != VNOVAL) { 1171 1.112 ad atomic_and_uint(&node->tn_tflags, ~TMPFS_UPDATE_MTIME); 1172 1.90 rmind node->tn_mtime = *mtime; 1173 1.90 rmind } 1174 1.90 rmind if (btime->tv_sec != VNOVAL) { 1175 1.90 rmind node->tn_birthtime = *btime; 1176 1.90 rmind } 1177 1.112 ad mutex_exit(&node->tn_timelock); 1178 1.12 yamt return 0; 1179 1.1 jmmv } 1180 1.10 yamt 1181 1.67 rmind /* 1182 1.112 ad * tmpfs_update_locked: update the timestamps as indicated by the flags. 1183 1.67 rmind */ 1184 1.10 yamt void 1185 1.112 ad tmpfs_update_locked(vnode_t *vp, unsigned tflags) 1186 1.10 yamt { 1187 1.67 rmind tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp); 1188 1.56 rmind struct timespec nowtm; 1189 1.10 yamt 1190 1.112 ad KASSERT(mutex_owned(&node->tn_timelock)); 1191 1.112 ad 1192 1.112 ad if ((tflags |= atomic_swap_uint(&node->tn_tflags, 0)) == 0) { 1193 1.10 yamt return; 1194 1.67 rmind } 1195 1.56 rmind vfs_timestamp(&nowtm); 1196 1.48 christos 1197 1.90 rmind if (tflags & TMPFS_UPDATE_ATIME) { 1198 1.90 rmind node->tn_atime = nowtm; 1199 1.10 yamt } 1200 1.90 rmind if (tflags & TMPFS_UPDATE_MTIME) { 1201 1.90 rmind node->tn_mtime = nowtm; 1202 1.10 yamt } 1203 1.90 rmind if (tflags & TMPFS_UPDATE_CTIME) { 1204 1.56 rmind node->tn_ctime = nowtm; 1205 1.48 christos } 1206 1.12 yamt } 1207 1.112 ad 1208 1.112 ad /* 1209 1.112 ad * tmpfs_update: update the timestamps as indicated by the flags. 1210 1.112 ad */ 1211 1.112 ad void 1212 1.112 ad tmpfs_update(vnode_t *vp, unsigned tflags) 1213 1.112 ad { 1214 1.112 ad tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp); 1215 1.112 ad 1216 1.112 ad if ((tflags | atomic_load_relaxed(&node->tn_tflags)) == 0) { 1217 1.112 ad return; 1218 1.112 ad } 1219 1.112 ad 1220 1.112 ad mutex_enter(&node->tn_timelock); 1221 1.112 ad tmpfs_update_locked(vp, tflags); 1222 1.112 ad mutex_exit(&node->tn_timelock); 1223 1.112 ad } 1224 1.112 ad 1225 1.112 ad /* 1226 1.112 ad * tmpfs_update_lazily: schedule a deferred timestamp update. 1227 1.112 ad */ 1228 1.112 ad void 1229 1.112 ad tmpfs_update_lazily(vnode_t *vp, unsigned tflags) 1230 1.112 ad { 1231 1.112 ad tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp); 1232 1.112 ad unsigned cur; 1233 1.112 ad 1234 1.112 ad cur = atomic_load_relaxed(&node->tn_tflags); 1235 1.112 ad if ((cur & tflags) != tflags) { 1236 1.112 ad atomic_or_uint(&node->tn_tflags, tflags); 1237 1.112 ad return; 1238 1.112 ad } 1239 1.112 ad } 1240