1 1.246 bad /* $NetBSD: vfs_vnops.c,v 1.246 2025/07/09 07:39:39 bad Exp $ */ 2 1.164 ad 3 1.164 ad /*- 4 1.164 ad * Copyright (c) 2009 The NetBSD Foundation, Inc. 5 1.164 ad * All rights reserved. 6 1.164 ad * 7 1.164 ad * This code is derived from software contributed to The NetBSD Foundation 8 1.164 ad * by Andrew Doran. 9 1.164 ad * 10 1.164 ad * Redistribution and use in source and binary forms, with or without 11 1.164 ad * modification, are permitted provided that the following conditions 12 1.164 ad * are met: 13 1.164 ad * 1. Redistributions of source code must retain the above copyright 14 1.164 ad * notice, this list of conditions and the following disclaimer. 15 1.164 ad * 2. Redistributions in binary form must reproduce the above copyright 16 1.164 ad * notice, this list of conditions and the following disclaimer in the 17 1.164 ad * documentation and/or other materials provided with the distribution. 18 1.164 ad * 19 1.164 ad * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 1.164 ad * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 1.164 ad * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 1.164 ad * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 1.164 ad * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 1.164 ad * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 1.164 ad * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 1.164 ad * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 1.164 ad * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 1.164 ad * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 1.164 ad * POSSIBILITY OF SUCH DAMAGE. 30 1.164 ad */ 31 1.12 cgd 32 1.10 cgd /* 33 1.11 mycroft * Copyright (c) 1982, 1986, 1989, 1993 34 1.11 mycroft * The Regents of the University of California. All rights reserved. 35 1.10 cgd * (c) UNIX System Laboratories, Inc. 36 1.10 cgd * All or some portions of this file are derived from material licensed 37 1.10 cgd * to the University of California by American Telephone and Telegraph 38 1.10 cgd * Co. or Unix System Laboratories, Inc. and are reproduced herein with 39 1.10 cgd * the permission of UNIX System Laboratories, Inc. 40 1.10 cgd * 41 1.10 cgd * Redistribution and use in source and binary forms, with or without 42 1.10 cgd * modification, are permitted provided that the following conditions 43 1.10 cgd * are met: 44 1.10 cgd * 1. Redistributions of source code must retain the above copyright 45 1.10 cgd * notice, this list of conditions and the following disclaimer. 46 1.10 cgd * 2. Redistributions in binary form must reproduce the above copyright 47 1.10 cgd * notice, this list of conditions and the following disclaimer in the 48 1.10 cgd * documentation and/or other materials provided with the distribution. 49 1.73 agc * 3. Neither the name of the University nor the names of its contributors 50 1.10 cgd * may be used to endorse or promote products derived from this software 51 1.10 cgd * without specific prior written permission. 52 1.10 cgd * 53 1.10 cgd * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 54 1.10 cgd * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 55 1.10 cgd * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 56 1.10 cgd * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 57 1.10 cgd * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 58 1.10 cgd * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 59 1.10 cgd * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 60 1.10 cgd * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 61 1.10 cgd * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 62 1.10 cgd * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 63 1.10 cgd * SUCH DAMAGE. 64 1.10 cgd * 65 1.28 fvdl * @(#)vfs_vnops.c 8.14 (Berkeley) 6/15/95 66 1.10 cgd */ 67 1.52 lukem 68 1.52 lukem #include <sys/cdefs.h> 69 1.246 bad __KERNEL_RCSID(0, "$NetBSD: vfs_vnops.c,v 1.246 2025/07/09 07:39:39 bad Exp $"); 70 1.26 mrg 71 1.122 dogcow #include "veriexec.h" 72 1.10 cgd 73 1.10 cgd #include <sys/param.h> 74 1.243 riastrad #include <sys/types.h> 75 1.243 riastrad 76 1.243 riastrad #include <sys/atomic.h> 77 1.243 riastrad #include <sys/buf.h> 78 1.243 riastrad #include <sys/file.h> 79 1.243 riastrad #include <sys/filedesc.h> 80 1.243 riastrad #include <sys/fstrans.h> 81 1.243 riastrad #include <sys/ioctl.h> 82 1.243 riastrad #include <sys/kauth.h> 83 1.10 cgd #include <sys/kernel.h> 84 1.243 riastrad #include <sys/mman.h> 85 1.10 cgd #include <sys/mount.h> 86 1.10 cgd #include <sys/namei.h> 87 1.21 mycroft #include <sys/poll.h> 88 1.243 riastrad #include <sys/proc.h> 89 1.244 riastrad #include <sys/sdt.h> 90 1.243 riastrad #include <sys/stat.h> 91 1.128 elad #include <sys/syslog.h> 92 1.243 riastrad #include <sys/systm.h> 93 1.243 riastrad #include <sys/tty.h> 94 1.243 riastrad #include <sys/verified_exec.h> 95 1.243 riastrad #include <sys/vnode_impl.h> 96 1.159 simonb #include <sys/wapbl.h> 97 1.11 mycroft 98 1.243 riastrad #include <miscfs/fifofs/fifo.h> 99 1.77 hannken #include <miscfs/specfs/specdev.h> 100 1.77 hannken 101 1.243 riastrad #include <uvm/uvm_device.h> 102 1.25 mrg #include <uvm/uvm_extern.h> 103 1.100 yamt #include <uvm/uvm_readahead.h> 104 1.64 jdolecek 105 1.192 chs #ifndef COMPAT_ZERODEV 106 1.192 chs #define COMPAT_ZERODEV(dev) (0) 107 1.192 chs #endif 108 1.192 chs 109 1.231 riastrad int (*vn_union_readdir_hook)(struct vnode **, struct file *, struct lwp *); 110 1.66 jdolecek 111 1.155 ad static int vn_read(file_t *fp, off_t *offset, struct uio *uio, 112 1.231 riastrad kauth_cred_t cred, int flags); 113 1.155 ad static int vn_write(file_t *fp, off_t *offset, struct uio *uio, 114 1.231 riastrad kauth_cred_t cred, int flags); 115 1.155 ad static int vn_closefile(file_t *fp); 116 1.155 ad static int vn_poll(file_t *fp, int events); 117 1.155 ad static int vn_fcntl(file_t *fp, u_int com, void *data); 118 1.155 ad static int vn_statfile(file_t *fp, struct stat *sb); 119 1.155 ad static int vn_ioctl(file_t *fp, u_long com, void *data); 120 1.192 chs static int vn_mmap(struct file *, off_t *, size_t, int, int *, int *, 121 1.231 riastrad struct uvm_object **, int *); 122 1.222 riastrad static int vn_seek(struct file *, off_t, int, off_t *, int); 123 1.239 riastrad static int vn_advlock(struct file *, void *, int, struct flock *, int); 124 1.240 riastrad static int vn_fpathconf(struct file *, int, register_t *); 125 1.241 riastrad static int vn_posix_fadvise(struct file *, off_t, off_t, int); 126 1.242 christos static int vn_truncate(file_t *, off_t); 127 1.48 jdolecek 128 1.83 christos const struct fileops vnops = { 129 1.197 christos .fo_name = "vn", 130 1.164 ad .fo_read = vn_read, 131 1.164 ad .fo_write = vn_write, 132 1.164 ad .fo_ioctl = vn_ioctl, 133 1.164 ad .fo_fcntl = vn_fcntl, 134 1.164 ad .fo_poll = vn_poll, 135 1.164 ad .fo_stat = vn_statfile, 136 1.164 ad .fo_close = vn_closefile, 137 1.164 ad .fo_kqfilter = vn_kqfilter, 138 1.168 dsl .fo_restart = fnullop_restart, 139 1.192 chs .fo_mmap = vn_mmap, 140 1.222 riastrad .fo_seek = vn_seek, 141 1.239 riastrad .fo_advlock = vn_advlock, 142 1.240 riastrad .fo_fpathconf = vn_fpathconf, 143 1.241 riastrad .fo_posix_fadvise = vn_posix_fadvise, 144 1.242 christos .fo_truncate = vn_truncate, 145 1.48 jdolecek }; 146 1.10 cgd 147 1.10 cgd /* 148 1.10 cgd * Common code for vnode open operations. 149 1.10 cgd * Check permissions, and call the VOP_OPEN or VOP_CREATE routine. 150 1.216 dholland * 151 1.216 dholland * at_dvp is the directory for openat(), if any. 152 1.216 dholland * pb is the path. 153 1.216 dholland * nmode is additional namei flags, restricted to TRYEMULROOT and NOCHROOT. 154 1.216 dholland * fmode is the open flags, converted from O_* to F* 155 1.216 dholland * cmode is the creation file permissions. 156 1.216 dholland * 157 1.216 dholland * XXX shouldn't cmode be mode_t? 158 1.216 dholland * 159 1.236 riastrad * On success produces either a locked vnode in *ret_vp, or NULL in 160 1.236 riastrad * *ret_vp and a file descriptor number in *ret_fd. 161 1.216 dholland * 162 1.216 dholland * The caller may pass NULL for ret_fd (and ret_domove), in which case 163 1.216 dholland * EOPNOTSUPP will be produced in the cases that would otherwise return 164 1.216 dholland * a file descriptor. 165 1.216 dholland * 166 1.221 dholland * Note that callers that want no-follow behavior should pass 167 1.221 dholland * O_NOFOLLOW in fmode. Neither FOLLOW nor NOFOLLOW in nmode is 168 1.221 dholland * honored. 169 1.10 cgd */ 170 1.20 christos int 171 1.216 dholland vn_open(struct vnode *at_dvp, struct pathbuf *pb, 172 1.243 riastrad int nmode, int fmode, int cmode, 173 1.243 riastrad struct vnode **ret_vp, bool *ret_domove, int *ret_fd) 174 1.10 cgd { 175 1.216 dholland struct nameidata nd; 176 1.220 martin struct vnode *vp = NULL; 177 1.148 pooka struct lwp *l = curlwp; 178 1.117 ad kauth_cred_t cred = l->l_cred; 179 1.19 mycroft struct vattr va; 180 1.10 cgd int error; 181 1.180 dholland const char *pathstring; 182 1.75 hannken 183 1.216 dholland KASSERT((nmode & (TRYEMULROOT | NOCHROOT)) == nmode); 184 1.216 dholland 185 1.218 dholland KASSERT(ret_vp != NULL); 186 1.218 dholland KASSERT((ret_domove == NULL) == (ret_fd == NULL)); 187 1.218 dholland 188 1.178 chs if ((fmode & (O_CREAT | O_DIRECTORY)) == (O_CREAT | O_DIRECTORY)) 189 1.244 riastrad return SET_ERROR(EINVAL); 190 1.178 chs 191 1.221 dholland NDINIT(&nd, LOOKUP, nmode, pb); 192 1.216 dholland if (at_dvp != NULL) 193 1.216 dholland NDAT(&nd, at_dvp); 194 1.216 dholland 195 1.216 dholland nd.ni_cnd.cn_flags &= TRYEMULROOT | NOCHROOT; 196 1.138 dsl 197 1.10 cgd if (fmode & O_CREAT) { 198 1.216 dholland nd.ni_cnd.cn_nameiop = CREATE; 199 1.216 dholland nd.ni_cnd.cn_flags |= LOCKPARENT | LOCKLEAF; 200 1.38 bouyer if ((fmode & O_EXCL) == 0 && 201 1.59 christos ((fmode & O_NOFOLLOW) == 0)) 202 1.216 dholland nd.ni_cnd.cn_flags |= FOLLOW; 203 1.215 dholland if ((fmode & O_EXCL) == 0) 204 1.216 dholland nd.ni_cnd.cn_flags |= NONEXCLHACK; 205 1.130 elad } else { 206 1.216 dholland nd.ni_cnd.cn_nameiop = LOOKUP; 207 1.216 dholland nd.ni_cnd.cn_flags |= LOCKLEAF; 208 1.130 elad if ((fmode & O_NOFOLLOW) == 0) 209 1.216 dholland nd.ni_cnd.cn_flags |= FOLLOW; 210 1.130 elad } 211 1.139 christos 212 1.216 dholland pathstring = pathbuf_stringcopy_get(nd.ni_pathbuf); 213 1.180 dholland if (pathstring == NULL) { 214 1.244 riastrad return SET_ERROR(ENOMEM); 215 1.180 dholland } 216 1.139 christos 217 1.216 dholland /* 218 1.216 dholland * When this "interface" was exposed to do_open() it used 219 1.216 dholland * to initialize l_dupfd to -newfd-1 (thus passing in the 220 1.216 dholland * new file handle number to use)... but nothing in the 221 1.216 dholland * kernel uses that value. So just send 0. 222 1.216 dholland */ 223 1.216 dholland l->l_dupfd = 0; 224 1.216 dholland 225 1.216 dholland error = namei(&nd); 226 1.130 elad if (error) 227 1.139 christos goto out; 228 1.91 elad 229 1.216 dholland vp = nd.ni_vp; 230 1.91 elad 231 1.130 elad #if NVERIEXEC > 0 232 1.216 dholland error = veriexec_openchk(l, nd.ni_vp, pathstring, fmode); 233 1.190 maxv if (error) { 234 1.190 maxv /* We have to release the locks ourselves */ 235 1.215 dholland /* 236 1.215 dholland * 20210604 dholland passing NONEXCLHACK means we can 237 1.215 dholland * get ni_dvp == NULL back if ni_vp exists, and we should 238 1.215 dholland * treat that like the non-O_CREAT case. 239 1.215 dholland */ 240 1.216 dholland if ((fmode & O_CREAT) != 0 && nd.ni_dvp != NULL) { 241 1.190 maxv if (vp == NULL) { 242 1.216 dholland vput(nd.ni_dvp); 243 1.190 maxv } else { 244 1.216 dholland VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 245 1.216 dholland if (nd.ni_dvp == nd.ni_vp) 246 1.216 dholland vrele(nd.ni_dvp); 247 1.190 maxv else 248 1.216 dholland vput(nd.ni_dvp); 249 1.216 dholland nd.ni_dvp = NULL; 250 1.190 maxv vput(vp); 251 1.237 riastrad vp = NULL; 252 1.190 maxv } 253 1.190 maxv } else { 254 1.190 maxv vput(vp); 255 1.237 riastrad vp = NULL; 256 1.190 maxv } 257 1.190 maxv goto out; 258 1.190 maxv } 259 1.115 elad #endif /* NVERIEXEC > 0 */ 260 1.91 elad 261 1.215 dholland /* 262 1.215 dholland * 20210604 dholland ditto 263 1.215 dholland */ 264 1.216 dholland if ((fmode & O_CREAT) != 0 && nd.ni_dvp != NULL) { 265 1.216 dholland if (nd.ni_vp == NULL) { 266 1.169 pooka vattr_null(&va); 267 1.19 mycroft va.va_type = VREG; 268 1.19 mycroft va.va_mode = cmode; 269 1.28 fvdl if (fmode & O_EXCL) 270 1.243 riastrad va.va_vaflags |= VA_EXCLUSIVE; 271 1.216 dholland error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, 272 1.243 riastrad &nd.ni_cnd, &va); 273 1.194 hannken if (error) { 274 1.216 dholland vput(nd.ni_dvp); 275 1.139 christos goto out; 276 1.194 hannken } 277 1.10 cgd fmode &= ~O_TRUNC; 278 1.216 dholland vp = nd.ni_vp; 279 1.188 hannken vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 280 1.216 dholland vput(nd.ni_dvp); 281 1.10 cgd } else { 282 1.216 dholland VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 283 1.216 dholland if (nd.ni_dvp == nd.ni_vp) 284 1.216 dholland vrele(nd.ni_dvp); 285 1.10 cgd else 286 1.216 dholland vput(nd.ni_dvp); 287 1.216 dholland nd.ni_dvp = NULL; 288 1.216 dholland vp = nd.ni_vp; 289 1.10 cgd if (fmode & O_EXCL) { 290 1.244 riastrad error = SET_ERROR(EEXIST); 291 1.38 bouyer goto bad; 292 1.38 bouyer } 293 1.10 cgd fmode &= ~O_CREAT; 294 1.10 cgd } 295 1.215 dholland } else if ((fmode & O_CREAT) != 0) { 296 1.215 dholland /* 297 1.215 dholland * 20210606 dholland passing NONEXCLHACK means this 298 1.215 dholland * case exists; it is the same as the following one 299 1.215 dholland * but also needs to do things in the second (exists) 300 1.215 dholland * half of the following block. (Besides handle 301 1.215 dholland * ni_dvp, anyway.) 302 1.215 dholland */ 303 1.216 dholland vp = nd.ni_vp; 304 1.215 dholland KASSERT((fmode & O_EXCL) == 0); 305 1.215 dholland fmode &= ~O_CREAT; 306 1.10 cgd } else { 307 1.216 dholland vp = nd.ni_vp; 308 1.10 cgd } 309 1.10 cgd if (vp->v_type == VSOCK) { 310 1.244 riastrad error = SET_ERROR(EOPNOTSUPP); 311 1.74 cb goto bad; 312 1.74 cb } 313 1.216 dholland if (nd.ni_vp->v_type == VLNK) { 314 1.244 riastrad error = SET_ERROR(EFTYPE); 315 1.10 cgd goto bad; 316 1.10 cgd } 317 1.58 blymn 318 1.10 cgd if ((fmode & O_CREAT) == 0) { 319 1.146 yamt error = vn_openchk(vp, cred, fmode); 320 1.146 yamt if (error != 0) 321 1.146 yamt goto bad; 322 1.10 cgd } 323 1.87 blymn 324 1.10 cgd if (fmode & O_TRUNC) { 325 1.169 pooka vattr_null(&va); 326 1.19 mycroft va.va_size = 0; 327 1.144 pooka error = VOP_SETATTR(vp, &va, cred); 328 1.75 hannken if (error != 0) 329 1.10 cgd goto bad; 330 1.10 cgd } 331 1.144 pooka if ((error = VOP_OPEN(vp, fmode, cred)) != 0) 332 1.10 cgd goto bad; 333 1.143 ad if (fmode & FWRITE) { 334 1.181 rmind mutex_enter(vp->v_interlock); 335 1.10 cgd vp->v_writecount++; 336 1.181 rmind mutex_exit(vp->v_interlock); 337 1.143 ad } 338 1.45 chs 339 1.10 cgd bad: 340 1.237 riastrad if (error) { 341 1.127 elad vput(vp); 342 1.237 riastrad vp = NULL; 343 1.237 riastrad } 344 1.139 christos out: 345 1.216 dholland pathbuf_stringcopy_put(nd.ni_pathbuf, pathstring); 346 1.216 dholland 347 1.217 christos switch (error) { 348 1.217 christos case EDUPFD: 349 1.217 christos case EMOVEFD: 350 1.217 christos /* if the caller isn't prepared to handle fds, fail for them */ 351 1.218 dholland if (ret_fd == NULL) { 352 1.244 riastrad error = SET_ERROR(EOPNOTSUPP); 353 1.218 dholland break; 354 1.218 dholland } 355 1.216 dholland *ret_vp = NULL; 356 1.217 christos *ret_domove = error == EMOVEFD; 357 1.216 dholland *ret_fd = l->l_dupfd; 358 1.219 christos error = 0; 359 1.217 christos break; 360 1.217 christos case 0: 361 1.237 riastrad KASSERT(VOP_ISLOCKED(vp) == LK_EXCLUSIVE); 362 1.216 dholland *ret_vp = vp; 363 1.217 christos break; 364 1.216 dholland } 365 1.216 dholland l->l_dupfd = 0; 366 1.216 dholland return error; 367 1.10 cgd } 368 1.10 cgd 369 1.10 cgd /* 370 1.10 cgd * Check for write permissions on the specified vnode. 371 1.28 fvdl * Prototype text segments cannot be written. 372 1.10 cgd */ 373 1.20 christos int 374 1.89 thorpej vn_writechk(struct vnode *vp) 375 1.10 cgd { 376 1.10 cgd 377 1.10 cgd /* 378 1.45 chs * If the vnode is in use as a process's text, 379 1.45 chs * we can't allow writing. 380 1.10 cgd */ 381 1.143 ad if (vp->v_iflag & VI_TEXT) 382 1.244 riastrad return SET_ERROR(ETXTBSY); 383 1.231 riastrad return 0; 384 1.42 chs } 385 1.42 chs 386 1.146 yamt int 387 1.146 yamt vn_openchk(struct vnode *vp, kauth_cred_t cred, int fflags) 388 1.146 yamt { 389 1.146 yamt int permbits = 0; 390 1.146 yamt int error; 391 1.146 yamt 392 1.200 hannken if (vp->v_type == VNON || vp->v_type == VBAD) 393 1.244 riastrad return SET_ERROR(ENXIO); 394 1.200 hannken 395 1.178 chs if ((fflags & O_DIRECTORY) != 0 && vp->v_type != VDIR) 396 1.244 riastrad return SET_ERROR(ENOTDIR); 397 1.178 chs 398 1.196 christos if ((fflags & O_REGULAR) != 0 && vp->v_type != VREG) 399 1.244 riastrad return SET_ERROR(EFTYPE); 400 1.196 christos 401 1.146 yamt if ((fflags & FREAD) != 0) { 402 1.146 yamt permbits = VREAD; 403 1.146 yamt } 404 1.201 christos if ((fflags & FEXEC) != 0) { 405 1.201 christos permbits |= VEXEC; 406 1.201 christos } 407 1.146 yamt if ((fflags & (FWRITE | O_TRUNC)) != 0) { 408 1.146 yamt permbits |= VWRITE; 409 1.146 yamt if (vp->v_type == VDIR) { 410 1.244 riastrad error = SET_ERROR(EISDIR); 411 1.146 yamt goto bad; 412 1.146 yamt } 413 1.146 yamt error = vn_writechk(vp); 414 1.146 yamt if (error != 0) 415 1.146 yamt goto bad; 416 1.146 yamt } 417 1.146 yamt error = VOP_ACCESS(vp, permbits, cred); 418 1.146 yamt bad: 419 1.146 yamt return error; 420 1.146 yamt } 421 1.146 yamt 422 1.42 chs /* 423 1.51 thorpej * Mark a vnode as having executable mappings. 424 1.42 chs */ 425 1.42 chs void 426 1.89 thorpej vn_markexec(struct vnode *vp) 427 1.42 chs { 428 1.143 ad 429 1.158 ad if ((vp->v_iflag & VI_EXECMAP) != 0) { 430 1.158 ad /* Safe unlocked, as long as caller holds a reference. */ 431 1.158 ad return; 432 1.158 ad } 433 1.143 ad 434 1.207 ad rw_enter(vp->v_uobj.vmobjlock, RW_WRITER); 435 1.181 rmind mutex_enter(vp->v_interlock); 436 1.143 ad if ((vp->v_iflag & VI_EXECMAP) == 0) { 437 1.204 ad cpu_count(CPU_COUNT_EXECPAGES, vp->v_uobj.uo_npages); 438 1.158 ad vp->v_iflag |= VI_EXECMAP; 439 1.46 chs } 440 1.181 rmind mutex_exit(vp->v_interlock); 441 1.207 ad rw_exit(vp->v_uobj.vmobjlock); 442 1.55 chs } 443 1.55 chs 444 1.55 chs /* 445 1.55 chs * Mark a vnode as being the text of a process. 446 1.55 chs * Fail if the vnode is currently writable. 447 1.55 chs */ 448 1.55 chs int 449 1.89 thorpej vn_marktext(struct vnode *vp) 450 1.55 chs { 451 1.55 chs 452 1.157 ad if ((vp->v_iflag & (VI_TEXT|VI_EXECMAP)) == (VI_TEXT|VI_EXECMAP)) { 453 1.157 ad /* Safe unlocked, as long as caller holds a reference. */ 454 1.231 riastrad return 0; 455 1.157 ad } 456 1.157 ad 457 1.207 ad rw_enter(vp->v_uobj.vmobjlock, RW_WRITER); 458 1.181 rmind mutex_enter(vp->v_interlock); 459 1.55 chs if (vp->v_writecount != 0) { 460 1.143 ad KASSERT((vp->v_iflag & VI_TEXT) == 0); 461 1.181 rmind mutex_exit(vp->v_interlock); 462 1.207 ad rw_exit(vp->v_uobj.vmobjlock); 463 1.244 riastrad return SET_ERROR(ETXTBSY); 464 1.55 chs } 465 1.158 ad if ((vp->v_iflag & VI_EXECMAP) == 0) { 466 1.204 ad cpu_count(CPU_COUNT_EXECPAGES, vp->v_uobj.uo_npages); 467 1.158 ad } 468 1.158 ad vp->v_iflag |= (VI_TEXT | VI_EXECMAP); 469 1.181 rmind mutex_exit(vp->v_interlock); 470 1.207 ad rw_exit(vp->v_uobj.vmobjlock); 471 1.231 riastrad return 0; 472 1.10 cgd } 473 1.10 cgd 474 1.10 cgd /* 475 1.10 cgd * Vnode close call 476 1.32 wrstuden * 477 1.32 wrstuden * Note: takes an unlocked vnode, while VOP_CLOSE takes a locked node. 478 1.10 cgd */ 479 1.20 christos int 480 1.155 ad vn_close(struct vnode *vp, int flags, kauth_cred_t cred) 481 1.10 cgd { 482 1.10 cgd int error; 483 1.10 cgd 484 1.195 hannken vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 485 1.158 ad if (flags & FWRITE) { 486 1.181 rmind mutex_enter(vp->v_interlock); 487 1.182 yamt KASSERT(vp->v_writecount > 0); 488 1.10 cgd vp->v_writecount--; 489 1.181 rmind mutex_exit(vp->v_interlock); 490 1.158 ad } 491 1.144 pooka error = VOP_CLOSE(vp, flags, cred); 492 1.32 wrstuden vput(vp); 493 1.231 riastrad return error; 494 1.10 cgd } 495 1.10 cgd 496 1.171 pooka static int 497 1.171 pooka enforce_rlimit_fsize(struct vnode *vp, struct uio *uio, int ioflag) 498 1.171 pooka { 499 1.171 pooka struct lwp *l = curlwp; 500 1.171 pooka off_t testoff; 501 1.171 pooka 502 1.171 pooka if (uio->uio_rw != UIO_WRITE || vp->v_type != VREG) 503 1.171 pooka return 0; 504 1.171 pooka 505 1.171 pooka KASSERT(VOP_ISLOCKED(vp) == LK_EXCLUSIVE); 506 1.171 pooka if (ioflag & IO_APPEND) 507 1.171 pooka testoff = vp->v_size; 508 1.171 pooka else 509 1.171 pooka testoff = uio->uio_offset; 510 1.171 pooka 511 1.171 pooka if (testoff + uio->uio_resid > 512 1.171 pooka l->l_proc->p_rlimit[RLIMIT_FSIZE].rlim_cur) { 513 1.212 ad mutex_enter(&proc_lock); 514 1.171 pooka psignal(l->l_proc, SIGXFSZ); 515 1.212 ad mutex_exit(&proc_lock); 516 1.244 riastrad return SET_ERROR(EFBIG); 517 1.171 pooka } 518 1.171 pooka 519 1.171 pooka return 0; 520 1.171 pooka } 521 1.171 pooka 522 1.10 cgd /* 523 1.10 cgd * Package up an I/O request on a vnode into a uio and do it. 524 1.10 cgd */ 525 1.20 christos int 526 1.134 christos vn_rdwr(enum uio_rw rw, struct vnode *vp, void *base, int len, off_t offset, 527 1.111 elad enum uio_seg segflg, int ioflg, kauth_cred_t cred, size_t *aresid, 528 1.101 christos struct lwp *l) 529 1.10 cgd { 530 1.10 cgd struct uio auio; 531 1.10 cgd struct iovec aiov; 532 1.10 cgd int error; 533 1.10 cgd 534 1.50 chs if ((ioflg & IO_NODELOCKED) == 0) { 535 1.50 chs if (rw == UIO_READ) { 536 1.50 chs vn_lock(vp, LK_SHARED | LK_RETRY); 537 1.75 hannken } else /* UIO_WRITE */ { 538 1.50 chs vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 539 1.50 chs } 540 1.50 chs } 541 1.10 cgd auio.uio_iov = &aiov; 542 1.10 cgd auio.uio_iovcnt = 1; 543 1.10 cgd aiov.iov_base = base; 544 1.10 cgd aiov.iov_len = len; 545 1.10 cgd auio.uio_resid = len; 546 1.10 cgd auio.uio_offset = offset; 547 1.10 cgd auio.uio_rw = rw; 548 1.106 yamt if (segflg == UIO_SYSSPACE) { 549 1.106 yamt UIO_SETUP_SYSSPACE(&auio); 550 1.106 yamt } else { 551 1.106 yamt auio.uio_vmspace = l->l_proc->p_vmspace; 552 1.106 yamt } 553 1.171 pooka 554 1.171 pooka if ((error = enforce_rlimit_fsize(vp, &auio, ioflg)) != 0) 555 1.171 pooka goto out; 556 1.171 pooka 557 1.11 mycroft if (rw == UIO_READ) { 558 1.10 cgd error = VOP_READ(vp, &auio, ioflg, cred); 559 1.11 mycroft } else { 560 1.10 cgd error = VOP_WRITE(vp, &auio, ioflg, cred); 561 1.11 mycroft } 562 1.171 pooka 563 1.10 cgd if (aresid) 564 1.10 cgd *aresid = auio.uio_resid; 565 1.10 cgd else 566 1.10 cgd if (auio.uio_resid && error == 0) 567 1.244 riastrad error = SET_ERROR(EIO); 568 1.171 pooka 569 1.243 riastrad out: 570 1.75 hannken if ((ioflg & IO_NODELOCKED) == 0) { 571 1.174 hannken VOP_UNLOCK(vp); 572 1.75 hannken } 573 1.231 riastrad return error; 574 1.23 fvdl } 575 1.23 fvdl 576 1.23 fvdl int 577 1.155 ad vn_readdir(file_t *fp, char *bf, int segflg, u_int count, int *done, 578 1.101 christos struct lwp *l, off_t **cookies, int *ncookies) 579 1.23 fvdl { 580 1.191 matt struct vnode *vp = fp->f_vnode; 581 1.23 fvdl struct iovec aiov; 582 1.23 fvdl struct uio auio; 583 1.23 fvdl int error, eofflag; 584 1.23 fvdl 585 1.112 simonb /* Limit the size on any kernel buffers used by VOP_READDIR */ 586 1.198 riastrad count = uimin(MAXBSIZE, count); 587 1.112 simonb 588 1.23 fvdl unionread: 589 1.23 fvdl if (vp->v_type != VDIR) 590 1.244 riastrad return SET_ERROR(EINVAL); 591 1.88 christos aiov.iov_base = bf; 592 1.23 fvdl aiov.iov_len = count; 593 1.23 fvdl auio.uio_iov = &aiov; 594 1.23 fvdl auio.uio_iovcnt = 1; 595 1.23 fvdl auio.uio_rw = UIO_READ; 596 1.106 yamt if (segflg == UIO_SYSSPACE) { 597 1.106 yamt UIO_SETUP_SYSSPACE(&auio); 598 1.106 yamt } else { 599 1.106 yamt KASSERT(l == curlwp); 600 1.106 yamt auio.uio_vmspace = l->l_proc->p_vmspace; 601 1.106 yamt } 602 1.23 fvdl auio.uio_resid = count; 603 1.50 chs vn_lock(vp, LK_SHARED | LK_RETRY); 604 1.238 riastrad mutex_enter(&fp->f_lock); 605 1.23 fvdl auio.uio_offset = fp->f_offset; 606 1.238 riastrad mutex_exit(&fp->f_lock); 607 1.28 fvdl error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, cookies, 608 1.238 riastrad ncookies); 609 1.166 yamt mutex_enter(&fp->f_lock); 610 1.23 fvdl fp->f_offset = auio.uio_offset; 611 1.166 yamt mutex_exit(&fp->f_lock); 612 1.174 hannken VOP_UNLOCK(vp); 613 1.23 fvdl if (error) 614 1.232 riastrad return error; 615 1.23 fvdl 616 1.66 jdolecek if (count == auio.uio_resid && vn_union_readdir_hook) { 617 1.66 jdolecek struct vnode *ovp = vp; 618 1.23 fvdl 619 1.101 christos error = (*vn_union_readdir_hook)(&vp, fp, l); 620 1.66 jdolecek if (error) 621 1.231 riastrad return error; 622 1.66 jdolecek if (vp != ovp) 623 1.23 fvdl goto unionread; 624 1.23 fvdl } 625 1.23 fvdl 626 1.143 ad if (count == auio.uio_resid && (vp->v_vflag & VV_ROOT) && 627 1.23 fvdl (vp->v_mount->mnt_flag & MNT_UNION)) { 628 1.23 fvdl struct vnode *tvp = vp; 629 1.23 fvdl vp = vp->v_mount->mnt_vnodecovered; 630 1.169 pooka vref(vp); 631 1.166 yamt mutex_enter(&fp->f_lock); 632 1.191 matt fp->f_vnode = vp; 633 1.23 fvdl fp->f_offset = 0; 634 1.166 yamt mutex_exit(&fp->f_lock); 635 1.23 fvdl vrele(tvp); 636 1.23 fvdl goto unionread; 637 1.23 fvdl } 638 1.23 fvdl *done = count - auio.uio_resid; 639 1.23 fvdl return error; 640 1.10 cgd } 641 1.10 cgd 642 1.10 cgd /* 643 1.10 cgd * File table vnode read routine. 644 1.10 cgd */ 645 1.56 gmcgarry static int 646 1.155 ad vn_read(file_t *fp, off_t *offset, struct uio *uio, kauth_cred_t cred, 647 1.89 thorpej int flags) 648 1.10 cgd { 649 1.191 matt struct vnode *vp = fp->f_vnode; 650 1.185 dholland int error, ioflag, fflag; 651 1.185 dholland size_t count; 652 1.10 cgd 653 1.100 yamt ioflag = IO_ADV_ENCODE(fp->f_advice); 654 1.155 ad fflag = fp->f_flag; 655 1.155 ad if (fflag & FNONBLOCK) 656 1.31 kleink ioflag |= IO_NDELAY; 657 1.155 ad if ((fflag & (FFSYNC | FRSYNC)) == (FFSYNC | FRSYNC)) 658 1.31 kleink ioflag |= IO_SYNC; 659 1.155 ad if (fflag & FALTIO) 660 1.37 wrstuden ioflag |= IO_ALTSEMANTICS; 661 1.155 ad if (fflag & FDIRECT) 662 1.125 chs ioflag |= IO_DIRECT; 663 1.235 riastrad if (offset == &fp->f_offset && (flags & FOF_UPDATE_OFFSET) != 0) 664 1.235 riastrad vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 665 1.235 riastrad else 666 1.235 riastrad vn_lock(vp, LK_SHARED | LK_RETRY); 667 1.238 riastrad if (__predict_false(vp->v_type == VDIR) && 668 1.238 riastrad offset == &fp->f_offset && (flags & FOF_UPDATE_OFFSET) == 0) 669 1.238 riastrad mutex_enter(&fp->f_lock); 670 1.29 thorpej uio->uio_offset = *offset; 671 1.238 riastrad if (__predict_false(vp->v_type == VDIR) && 672 1.238 riastrad offset == &fp->f_offset && (flags & FOF_UPDATE_OFFSET) == 0) 673 1.246 bad mutex_exit(&fp->f_lock); 674 1.10 cgd count = uio->uio_resid; 675 1.31 kleink error = VOP_READ(vp, uio, ioflag, cred); 676 1.29 thorpej if (flags & FOF_UPDATE_OFFSET) 677 1.29 thorpej *offset += count - uio->uio_resid; 678 1.174 hannken VOP_UNLOCK(vp); 679 1.231 riastrad return error; 680 1.10 cgd } 681 1.10 cgd 682 1.10 cgd /* 683 1.10 cgd * File table vnode write routine. 684 1.10 cgd */ 685 1.56 gmcgarry static int 686 1.155 ad vn_write(file_t *fp, off_t *offset, struct uio *uio, kauth_cred_t cred, 687 1.89 thorpej int flags) 688 1.10 cgd { 689 1.191 matt struct vnode *vp = fp->f_vnode; 690 1.185 dholland int error, ioflag, fflag; 691 1.185 dholland size_t count; 692 1.10 cgd 693 1.151 pooka ioflag = IO_ADV_ENCODE(fp->f_advice) | IO_UNIT; 694 1.155 ad fflag = fp->f_flag; 695 1.155 ad if (vp->v_type == VREG && (fflag & O_APPEND)) 696 1.10 cgd ioflag |= IO_APPEND; 697 1.155 ad if (fflag & FNONBLOCK) 698 1.10 cgd ioflag |= IO_NDELAY; 699 1.155 ad if (fflag & FFSYNC || 700 1.24 thorpej (vp->v_mount && (vp->v_mount->mnt_flag & MNT_SYNCHRONOUS))) 701 1.24 thorpej ioflag |= IO_SYNC; 702 1.155 ad else if (fflag & FDSYNC) 703 1.31 kleink ioflag |= IO_DSYNC; 704 1.155 ad if (fflag & FALTIO) 705 1.37 wrstuden ioflag |= IO_ALTSEMANTICS; 706 1.155 ad if (fflag & FDIRECT) 707 1.125 chs ioflag |= IO_DIRECT; 708 1.28 fvdl vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 709 1.29 thorpej uio->uio_offset = *offset; 710 1.10 cgd count = uio->uio_resid; 711 1.171 pooka 712 1.171 pooka if ((error = enforce_rlimit_fsize(vp, uio, ioflag)) != 0) 713 1.171 pooka goto out; 714 1.171 pooka 715 1.10 cgd error = VOP_WRITE(vp, uio, ioflag, cred); 716 1.171 pooka 717 1.29 thorpej if (flags & FOF_UPDATE_OFFSET) { 718 1.160 christos if (ioflag & IO_APPEND) { 719 1.160 christos /* 720 1.243 riastrad * SUSv3 describes behaviour for count = 0 as 721 1.243 riastrad * following: "Before any action ... is taken, 722 1.243 riastrad * and if nbyte is zero and the file is a 723 1.243 riastrad * regular file, the write() function ... in 724 1.243 riastrad * the absence of errors ... shall return zero 725 1.160 christos * and have no other results." 726 1.243 riastrad */ 727 1.160 christos if (count) 728 1.160 christos *offset = uio->uio_offset; 729 1.160 christos } else 730 1.29 thorpej *offset += count - uio->uio_resid; 731 1.29 thorpej } 732 1.171 pooka 733 1.243 riastrad out: 734 1.174 hannken VOP_UNLOCK(vp); 735 1.231 riastrad return error; 736 1.10 cgd } 737 1.10 cgd 738 1.10 cgd /* 739 1.10 cgd * File table vnode stat routine. 740 1.10 cgd */ 741 1.48 jdolecek static int 742 1.155 ad vn_statfile(file_t *fp, struct stat *sb) 743 1.48 jdolecek { 744 1.191 matt struct vnode *vp = fp->f_vnode; 745 1.165 christos int error; 746 1.48 jdolecek 747 1.165 christos vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 748 1.165 christos error = vn_stat(vp, sb); 749 1.174 hannken VOP_UNLOCK(vp); 750 1.165 christos return error; 751 1.48 jdolecek } 752 1.48 jdolecek 753 1.20 christos int 754 1.155 ad vn_stat(struct vnode *vp, struct stat *sb) 755 1.10 cgd { 756 1.19 mycroft struct vattr va; 757 1.10 cgd int error; 758 1.35 wrstuden mode_t mode; 759 1.10 cgd 760 1.175 pooka memset(&va, 0, sizeof(va)); 761 1.155 ad error = VOP_GETATTR(vp, &va, kauth_cred_get()); 762 1.10 cgd if (error) 763 1.231 riastrad return error; 764 1.10 cgd /* 765 1.10 cgd * Copy from vattr table 766 1.10 cgd */ 767 1.179 pooka memset(sb, 0, sizeof(*sb)); 768 1.19 mycroft sb->st_dev = va.va_fsid; 769 1.19 mycroft sb->st_ino = va.va_fileid; 770 1.19 mycroft mode = va.va_mode; 771 1.10 cgd switch (vp->v_type) { 772 1.10 cgd case VREG: 773 1.10 cgd mode |= S_IFREG; 774 1.10 cgd break; 775 1.10 cgd case VDIR: 776 1.10 cgd mode |= S_IFDIR; 777 1.10 cgd break; 778 1.10 cgd case VBLK: 779 1.10 cgd mode |= S_IFBLK; 780 1.10 cgd break; 781 1.10 cgd case VCHR: 782 1.10 cgd mode |= S_IFCHR; 783 1.10 cgd break; 784 1.10 cgd case VLNK: 785 1.10 cgd mode |= S_IFLNK; 786 1.10 cgd break; 787 1.10 cgd case VSOCK: 788 1.10 cgd mode |= S_IFSOCK; 789 1.10 cgd break; 790 1.10 cgd case VFIFO: 791 1.10 cgd mode |= S_IFIFO; 792 1.10 cgd break; 793 1.10 cgd default: 794 1.244 riastrad return SET_ERROR(EBADF); 795 1.193 msaitoh } 796 1.10 cgd sb->st_mode = mode; 797 1.19 mycroft sb->st_nlink = va.va_nlink; 798 1.19 mycroft sb->st_uid = va.va_uid; 799 1.19 mycroft sb->st_gid = va.va_gid; 800 1.19 mycroft sb->st_rdev = va.va_rdev; 801 1.19 mycroft sb->st_size = va.va_size; 802 1.19 mycroft sb->st_atimespec = va.va_atime; 803 1.19 mycroft sb->st_mtimespec = va.va_mtime; 804 1.19 mycroft sb->st_ctimespec = va.va_ctime; 805 1.69 fvdl sb->st_birthtimespec = va.va_birthtime; 806 1.19 mycroft sb->st_blksize = va.va_blocksize; 807 1.19 mycroft sb->st_flags = va.va_flags; 808 1.22 mycroft sb->st_gen = 0; 809 1.19 mycroft sb->st_blocks = va.va_bytes / S_BLKSIZE; 810 1.231 riastrad return 0; 811 1.37 wrstuden } 812 1.37 wrstuden 813 1.37 wrstuden /* 814 1.37 wrstuden * File table vnode fcntl routine. 815 1.37 wrstuden */ 816 1.56 gmcgarry static int 817 1.155 ad vn_fcntl(file_t *fp, u_int com, void *data) 818 1.37 wrstuden { 819 1.191 matt struct vnode *vp = fp->f_vnode; 820 1.37 wrstuden int error; 821 1.37 wrstuden 822 1.155 ad error = VOP_FCNTL(vp, com, data, fp->f_flag, kauth_cred_get()); 823 1.231 riastrad return error; 824 1.10 cgd } 825 1.10 cgd 826 1.10 cgd /* 827 1.10 cgd * File table vnode ioctl routine. 828 1.10 cgd */ 829 1.56 gmcgarry static int 830 1.155 ad vn_ioctl(file_t *fp, u_long com, void *data) 831 1.10 cgd { 832 1.191 matt struct vnode *vp = fp->f_vnode, *ovp; 833 1.10 cgd struct vattr vattr; 834 1.10 cgd int error; 835 1.10 cgd 836 1.10 cgd switch (vp->v_type) { 837 1.10 cgd 838 1.10 cgd case VREG: 839 1.10 cgd case VDIR: 840 1.10 cgd if (com == FIONREAD) { 841 1.183 hannken vn_lock(vp, LK_SHARED | LK_RETRY); 842 1.183 hannken error = VOP_GETATTR(vp, &vattr, kauth_cred_get()); 843 1.238 riastrad if (error == 0) { 844 1.238 riastrad if (vp->v_type == VDIR) 845 1.238 riastrad mutex_enter(&fp->f_lock); 846 1.223 riastrad *(int *)data = vattr.va_size - fp->f_offset; 847 1.238 riastrad if (vp->v_type == VDIR) 848 1.238 riastrad mutex_exit(&fp->f_lock); 849 1.238 riastrad } 850 1.183 hannken VOP_UNLOCK(vp); 851 1.20 christos if (error) 852 1.231 riastrad return error; 853 1.231 riastrad return 0; 854 1.60 atatat } 855 1.82 christos if ((com == FIONWRITE) || (com == FIONSPACE)) { 856 1.81 wrstuden /* 857 1.81 wrstuden * Files don't have send queues, so there never 858 1.81 wrstuden * are any bytes in them, nor is there any 859 1.81 wrstuden * open space in them. 860 1.81 wrstuden */ 861 1.81 wrstuden *(int *)data = 0; 862 1.231 riastrad return 0; 863 1.81 wrstuden } 864 1.60 atatat if (com == FIOGETBMAP) { 865 1.60 atatat daddr_t *block; 866 1.60 atatat 867 1.62 atatat if (*(daddr_t *)data < 0) 868 1.244 riastrad return SET_ERROR(EINVAL); 869 1.60 atatat block = (daddr_t *)data; 870 1.214 chs vn_lock(vp, LK_SHARED | LK_RETRY); 871 1.214 chs error = VOP_BMAP(vp, *block, NULL, block, NULL); 872 1.214 chs VOP_UNLOCK(vp); 873 1.214 chs return error; 874 1.61 fvdl } 875 1.61 fvdl if (com == OFIOGETBMAP) { 876 1.61 fvdl daddr_t ibn, obn; 877 1.61 fvdl 878 1.62 atatat if (*(int32_t *)data < 0) 879 1.244 riastrad return SET_ERROR(EINVAL); 880 1.61 fvdl ibn = (daddr_t)*(int32_t *)data; 881 1.214 chs vn_lock(vp, LK_SHARED | LK_RETRY); 882 1.61 fvdl error = VOP_BMAP(vp, ibn, NULL, &obn, NULL); 883 1.214 chs VOP_UNLOCK(vp); 884 1.61 fvdl *(int32_t *)data = (int32_t)obn; 885 1.61 fvdl return error; 886 1.10 cgd } 887 1.10 cgd if (com == FIONBIO || com == FIOASYNC) /* XXX */ 888 1.231 riastrad return 0; /* XXX */ 889 1.199 mrg /* FALLTHROUGH */ 890 1.10 cgd case VFIFO: 891 1.10 cgd case VCHR: 892 1.10 cgd case VBLK: 893 1.243 riastrad error = VOP_IOCTL(vp, com, data, fp->f_flag, kauth_cred_get()); 894 1.10 cgd if (error == 0 && com == TIOCSCTTY) { 895 1.169 pooka vref(vp); 896 1.212 ad mutex_enter(&proc_lock); 897 1.155 ad ovp = curproc->p_session->s_ttyvp; 898 1.155 ad curproc->p_session->s_ttyvp = vp; 899 1.212 ad mutex_exit(&proc_lock); 900 1.132 ad if (ovp != NULL) 901 1.132 ad vrele(ovp); 902 1.10 cgd } 903 1.231 riastrad return error; 904 1.63 perseant 905 1.63 perseant default: 906 1.244 riastrad return SET_ERROR(EPASSTHROUGH); 907 1.10 cgd } 908 1.10 cgd } 909 1.10 cgd 910 1.10 cgd /* 911 1.21 mycroft * File table vnode poll routine. 912 1.10 cgd */ 913 1.56 gmcgarry static int 914 1.155 ad vn_poll(file_t *fp, int events) 915 1.10 cgd { 916 1.10 cgd 917 1.231 riastrad return VOP_POLL(fp->f_vnode, events); 918 1.57 jdolecek } 919 1.57 jdolecek 920 1.57 jdolecek /* 921 1.57 jdolecek * File table vnode kqfilter routine. 922 1.57 jdolecek */ 923 1.57 jdolecek int 924 1.155 ad vn_kqfilter(file_t *fp, struct knote *kn) 925 1.57 jdolecek { 926 1.57 jdolecek 927 1.231 riastrad return VOP_KQFILTER(fp->f_vnode, kn); 928 1.28 fvdl } 929 1.28 fvdl 930 1.192 chs static int 931 1.192 chs vn_mmap(struct file *fp, off_t *offp, size_t size, int prot, int *flagsp, 932 1.231 riastrad int *advicep, struct uvm_object **uobjp, int *maxprotp) 933 1.192 chs { 934 1.192 chs struct uvm_object *uobj; 935 1.192 chs struct vnode *vp; 936 1.192 chs struct vattr va; 937 1.192 chs struct lwp *l; 938 1.192 chs vm_prot_t maxprot; 939 1.192 chs off_t off; 940 1.192 chs int error, flags; 941 1.192 chs bool needwritemap; 942 1.192 chs 943 1.192 chs l = curlwp; 944 1.192 chs 945 1.192 chs off = *offp; 946 1.192 chs flags = *flagsp; 947 1.192 chs maxprot = VM_PROT_EXECUTE; 948 1.192 chs 949 1.229 riastrad KASSERT(size > 0); 950 1.229 riastrad 951 1.192 chs vp = fp->f_vnode; 952 1.192 chs if (vp->v_type != VREG && vp->v_type != VCHR && 953 1.192 chs vp->v_type != VBLK) { 954 1.192 chs /* only REG/CHR/BLK support mmap */ 955 1.244 riastrad return SET_ERROR(ENODEV); 956 1.192 chs } 957 1.192 chs if (vp->v_type != VCHR && off < 0) { 958 1.244 riastrad return SET_ERROR(EINVAL); 959 1.192 chs } 960 1.233 riastrad #if SIZE_MAX > UINT32_MAX /* XXX -Wtype-limits */ 961 1.233 riastrad if (vp->v_type != VCHR && size > __type_max(off_t)) { 962 1.244 riastrad return SET_ERROR(EOVERFLOW); 963 1.233 riastrad } 964 1.233 riastrad #endif 965 1.233 riastrad if (vp->v_type != VCHR && off > __type_max(off_t) - size) { 966 1.192 chs /* no offset wrapping */ 967 1.244 riastrad return SET_ERROR(EOVERFLOW); 968 1.192 chs } 969 1.192 chs 970 1.192 chs /* special case: catch SunOS style /dev/zero */ 971 1.192 chs if (vp->v_type == VCHR && 972 1.192 chs (vp->v_rdev == zerodev || COMPAT_ZERODEV(vp->v_rdev))) { 973 1.192 chs *uobjp = NULL; 974 1.192 chs *maxprotp = VM_PROT_ALL; 975 1.192 chs return 0; 976 1.192 chs } 977 1.192 chs 978 1.192 chs /* 979 1.192 chs * Old programs may not select a specific sharing type, so 980 1.192 chs * default to an appropriate one. 981 1.192 chs * 982 1.192 chs * XXX: how does MAP_ANON fit in the picture? 983 1.192 chs */ 984 1.192 chs if ((flags & (MAP_SHARED|MAP_PRIVATE)) == 0) { 985 1.192 chs #if defined(DEBUG) 986 1.192 chs struct proc *p = l->l_proc; 987 1.192 chs printf("WARNING: defaulted mmap() share type to " 988 1.243 riastrad "%s (pid %d command %s)\n", 989 1.243 riastrad vp->v_type == VCHR ? "MAP_SHARED" : "MAP_PRIVATE", 990 1.243 riastrad p->p_pid, 991 1.243 riastrad p->p_comm); 992 1.192 chs #endif 993 1.192 chs if (vp->v_type == VCHR) 994 1.192 chs flags |= MAP_SHARED; /* for a device */ 995 1.192 chs else 996 1.192 chs flags |= MAP_PRIVATE; /* for a file */ 997 1.192 chs } 998 1.192 chs 999 1.192 chs /* 1000 1.192 chs * MAP_PRIVATE device mappings don't make sense (and aren't 1001 1.192 chs * supported anyway). However, some programs rely on this, 1002 1.192 chs * so just change it to MAP_SHARED. 1003 1.192 chs */ 1004 1.192 chs if (vp->v_type == VCHR && (flags & MAP_PRIVATE) != 0) { 1005 1.192 chs flags = (flags & ~MAP_PRIVATE) | MAP_SHARED; 1006 1.192 chs } 1007 1.192 chs 1008 1.192 chs /* 1009 1.192 chs * now check protection 1010 1.192 chs */ 1011 1.192 chs 1012 1.192 chs /* check read access */ 1013 1.192 chs if (fp->f_flag & FREAD) 1014 1.192 chs maxprot |= VM_PROT_READ; 1015 1.192 chs else if (prot & PROT_READ) { 1016 1.244 riastrad return SET_ERROR(EACCES); 1017 1.192 chs } 1018 1.192 chs 1019 1.192 chs /* check write access, shared case first */ 1020 1.192 chs if (flags & MAP_SHARED) { 1021 1.192 chs /* 1022 1.192 chs * if the file is writable, only add PROT_WRITE to 1023 1.192 chs * maxprot if the file is not immutable, append-only. 1024 1.192 chs * otherwise, if we have asked for PROT_WRITE, return 1025 1.192 chs * EPERM. 1026 1.192 chs */ 1027 1.192 chs if (fp->f_flag & FWRITE) { 1028 1.192 chs vn_lock(vp, LK_SHARED | LK_RETRY); 1029 1.192 chs error = VOP_GETATTR(vp, &va, l->l_cred); 1030 1.192 chs VOP_UNLOCK(vp); 1031 1.192 chs if (error) { 1032 1.192 chs return error; 1033 1.192 chs } 1034 1.192 chs if ((va.va_flags & 1035 1.243 riastrad (SF_SNAPSHOT|IMMUTABLE|APPEND)) == 0) 1036 1.192 chs maxprot |= VM_PROT_WRITE; 1037 1.192 chs else if (prot & PROT_WRITE) { 1038 1.244 riastrad return SET_ERROR(EPERM); 1039 1.192 chs } 1040 1.192 chs } else if (prot & PROT_WRITE) { 1041 1.244 riastrad return SET_ERROR(EACCES); 1042 1.192 chs } 1043 1.192 chs } else { 1044 1.192 chs /* MAP_PRIVATE mappings can always write to */ 1045 1.192 chs maxprot |= VM_PROT_WRITE; 1046 1.192 chs } 1047 1.192 chs 1048 1.192 chs /* 1049 1.192 chs * Don't allow mmap for EXEC if the file system 1050 1.192 chs * is mounted NOEXEC. 1051 1.192 chs */ 1052 1.192 chs if ((prot & PROT_EXEC) != 0 && 1053 1.192 chs (vp->v_mount->mnt_flag & MNT_NOEXEC) != 0) { 1054 1.244 riastrad return SET_ERROR(EACCES); 1055 1.192 chs } 1056 1.192 chs 1057 1.192 chs if (vp->v_type != VCHR) { 1058 1.192 chs error = VOP_MMAP(vp, prot, curlwp->l_cred); 1059 1.192 chs if (error) { 1060 1.192 chs return error; 1061 1.192 chs } 1062 1.192 chs vref(vp); 1063 1.192 chs uobj = &vp->v_uobj; 1064 1.192 chs 1065 1.192 chs /* 1066 1.192 chs * If the vnode is being mapped with PROT_EXEC, 1067 1.192 chs * then mark it as text. 1068 1.192 chs */ 1069 1.192 chs if (prot & PROT_EXEC) { 1070 1.192 chs vn_markexec(vp); 1071 1.192 chs } 1072 1.192 chs } else { 1073 1.192 chs int i = maxprot; 1074 1.192 chs 1075 1.192 chs /* 1076 1.192 chs * XXX Some devices don't like to be mapped with 1077 1.192 chs * XXX PROT_EXEC or PROT_WRITE, but we don't really 1078 1.192 chs * XXX have a better way of handling this, right now 1079 1.192 chs */ 1080 1.192 chs do { 1081 1.192 chs uobj = udv_attach(vp->v_rdev, 1082 1.243 riastrad (flags & MAP_SHARED) ? i : (i & ~VM_PROT_WRITE), 1083 1.243 riastrad off, size); 1084 1.192 chs i--; 1085 1.192 chs } while ((uobj == NULL) && (i > 0)); 1086 1.192 chs if (uobj == NULL) { 1087 1.244 riastrad return SET_ERROR(EINVAL); 1088 1.192 chs } 1089 1.192 chs *advicep = UVM_ADV_RANDOM; 1090 1.192 chs } 1091 1.192 chs 1092 1.192 chs /* 1093 1.192 chs * Set vnode flags to indicate the new kinds of mapping. 1094 1.192 chs * We take the vnode lock in exclusive mode here to serialize 1095 1.192 chs * with direct I/O. 1096 1.192 chs * 1097 1.192 chs * Safe to check for these flag values without a lock, as 1098 1.192 chs * long as a reference to the vnode is held. 1099 1.192 chs */ 1100 1.192 chs needwritemap = (vp->v_iflag & VI_WRMAP) == 0 && 1101 1.243 riastrad (flags & MAP_SHARED) != 0 && 1102 1.243 riastrad (maxprot & VM_PROT_WRITE) != 0; 1103 1.192 chs if ((vp->v_vflag & VV_MAPPED) == 0 || needwritemap) { 1104 1.192 chs vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 1105 1.192 chs vp->v_vflag |= VV_MAPPED; 1106 1.192 chs if (needwritemap) { 1107 1.207 ad rw_enter(vp->v_uobj.vmobjlock, RW_WRITER); 1108 1.192 chs mutex_enter(vp->v_interlock); 1109 1.192 chs vp->v_iflag |= VI_WRMAP; 1110 1.192 chs mutex_exit(vp->v_interlock); 1111 1.207 ad rw_exit(vp->v_uobj.vmobjlock); 1112 1.192 chs } 1113 1.192 chs VOP_UNLOCK(vp); 1114 1.192 chs } 1115 1.192 chs 1116 1.192 chs #if NVERIEXEC > 0 1117 1.192 chs /* 1118 1.192 chs * Check if the file can be executed indirectly. 1119 1.192 chs * 1120 1.192 chs * XXX: This gives false warnings about "Incorrect access type" 1121 1.192 chs * XXX: if the mapping is not executable. Harmless, but will be 1122 1.192 chs * XXX: fixed as part of other changes. 1123 1.192 chs */ 1124 1.192 chs if (veriexec_verify(l, vp, "(mmap)", VERIEXEC_INDIRECT, 1125 1.243 riastrad NULL)) { 1126 1.192 chs 1127 1.192 chs /* 1128 1.192 chs * Don't allow executable mappings if we can't 1129 1.192 chs * indirectly execute the file. 1130 1.192 chs */ 1131 1.192 chs if (prot & VM_PROT_EXECUTE) { 1132 1.244 riastrad return SET_ERROR(EPERM); 1133 1.192 chs } 1134 1.192 chs 1135 1.192 chs /* 1136 1.192 chs * Strip the executable bit from 'maxprot' to make sure 1137 1.192 chs * it can't be made executable later. 1138 1.192 chs */ 1139 1.192 chs maxprot &= ~VM_PROT_EXECUTE; 1140 1.192 chs } 1141 1.192 chs #endif /* NVERIEXEC > 0 */ 1142 1.192 chs 1143 1.192 chs *uobjp = uobj; 1144 1.192 chs *maxprotp = maxprot; 1145 1.192 chs *flagsp = flags; 1146 1.192 chs 1147 1.192 chs return 0; 1148 1.192 chs } 1149 1.192 chs 1150 1.222 riastrad static int 1151 1.243 riastrad vn_seek(struct file *fp, off_t delta, int whence, off_t *newoffp, int flags) 1152 1.222 riastrad { 1153 1.225 riastrad const off_t OFF_MIN = __type_min(off_t); 1154 1.225 riastrad const off_t OFF_MAX = __type_max(off_t); 1155 1.222 riastrad kauth_cred_t cred = fp->f_cred; 1156 1.222 riastrad off_t oldoff, newoff; 1157 1.222 riastrad struct vnode *vp = fp->f_vnode; 1158 1.222 riastrad struct vattr vattr; 1159 1.222 riastrad int error; 1160 1.222 riastrad 1161 1.222 riastrad if (vp->v_type == VFIFO) 1162 1.244 riastrad return SET_ERROR(ESPIPE); 1163 1.222 riastrad 1164 1.235 riastrad if (flags & FOF_UPDATE_OFFSET) 1165 1.235 riastrad vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 1166 1.235 riastrad else 1167 1.235 riastrad vn_lock(vp, LK_SHARED | LK_RETRY); 1168 1.222 riastrad 1169 1.222 riastrad /* Compute the old and new offsets. */ 1170 1.238 riastrad if (vp->v_type == VDIR && (flags & FOF_UPDATE_OFFSET) == 0) 1171 1.238 riastrad mutex_enter(&fp->f_lock); 1172 1.222 riastrad oldoff = fp->f_offset; 1173 1.238 riastrad if (vp->v_type == VDIR && (flags & FOF_UPDATE_OFFSET) == 0) 1174 1.238 riastrad mutex_exit(&fp->f_lock); 1175 1.222 riastrad switch (whence) { 1176 1.222 riastrad case SEEK_CUR: 1177 1.225 riastrad if (delta > 0) { 1178 1.225 riastrad if (oldoff > 0 && delta > OFF_MAX - oldoff) { 1179 1.225 riastrad newoff = OFF_MAX; 1180 1.225 riastrad break; 1181 1.225 riastrad } 1182 1.225 riastrad } else { 1183 1.225 riastrad if (oldoff < 0 && delta < OFF_MIN - oldoff) { 1184 1.225 riastrad newoff = OFF_MIN; 1185 1.225 riastrad break; 1186 1.225 riastrad } 1187 1.225 riastrad } 1188 1.225 riastrad newoff = oldoff + delta; 1189 1.222 riastrad break; 1190 1.222 riastrad case SEEK_END: 1191 1.222 riastrad error = VOP_GETATTR(vp, &vattr, cred); 1192 1.222 riastrad if (error) 1193 1.222 riastrad goto out; 1194 1.225 riastrad if (vattr.va_size > OFF_MAX || 1195 1.225 riastrad delta > OFF_MAX - (off_t)vattr.va_size) { 1196 1.225 riastrad newoff = OFF_MAX; 1197 1.225 riastrad break; 1198 1.225 riastrad } 1199 1.225 riastrad newoff = delta + vattr.va_size; 1200 1.222 riastrad break; 1201 1.222 riastrad case SEEK_SET: 1202 1.222 riastrad newoff = delta; 1203 1.222 riastrad break; 1204 1.222 riastrad default: 1205 1.244 riastrad error = SET_ERROR(EINVAL); 1206 1.222 riastrad goto out; 1207 1.222 riastrad } 1208 1.222 riastrad 1209 1.222 riastrad /* Pass the proposed change to the file system to audit. */ 1210 1.222 riastrad error = VOP_SEEK(vp, oldoff, newoff, cred); 1211 1.222 riastrad if (error) 1212 1.222 riastrad goto out; 1213 1.222 riastrad 1214 1.222 riastrad /* Success! */ 1215 1.222 riastrad if (newoffp) 1216 1.222 riastrad *newoffp = newoff; 1217 1.222 riastrad if (flags & FOF_UPDATE_OFFSET) 1218 1.222 riastrad fp->f_offset = newoff; 1219 1.222 riastrad error = 0; 1220 1.192 chs 1221 1.222 riastrad out: VOP_UNLOCK(vp); 1222 1.222 riastrad return error; 1223 1.222 riastrad } 1224 1.192 chs 1225 1.239 riastrad static int 1226 1.243 riastrad vn_advlock(struct file *fp, void *id, int op, struct flock *fl, int flags) 1227 1.239 riastrad { 1228 1.239 riastrad struct vnode *const vp = fp->f_vnode; 1229 1.239 riastrad 1230 1.239 riastrad if (fl->l_whence == SEEK_CUR) { 1231 1.239 riastrad vn_lock(vp, LK_SHARED | LK_RETRY); 1232 1.239 riastrad fl->l_start += fp->f_offset; 1233 1.239 riastrad VOP_UNLOCK(vp); 1234 1.239 riastrad } 1235 1.239 riastrad 1236 1.239 riastrad return VOP_ADVLOCK(vp, id, op, fl, flags); 1237 1.239 riastrad } 1238 1.239 riastrad 1239 1.240 riastrad static int 1240 1.240 riastrad vn_fpathconf(struct file *fp, int name, register_t *retval) 1241 1.240 riastrad { 1242 1.240 riastrad struct vnode *const vp = fp->f_vnode; 1243 1.240 riastrad int error; 1244 1.240 riastrad 1245 1.240 riastrad vn_lock(vp, LK_SHARED | LK_RETRY); 1246 1.240 riastrad error = VOP_PATHCONF(vp, name, retval); 1247 1.240 riastrad VOP_UNLOCK(vp); 1248 1.240 riastrad 1249 1.240 riastrad return error; 1250 1.240 riastrad } 1251 1.240 riastrad 1252 1.241 riastrad static int 1253 1.241 riastrad vn_posix_fadvise(struct file *fp, off_t offset, off_t len, int advice) 1254 1.241 riastrad { 1255 1.241 riastrad const off_t OFF_MAX = __type_max(off_t); 1256 1.241 riastrad struct vnode *vp = fp->f_vnode; 1257 1.241 riastrad off_t endoffset; 1258 1.241 riastrad int error; 1259 1.241 riastrad 1260 1.241 riastrad if (offset < 0) { 1261 1.244 riastrad return SET_ERROR(EINVAL); 1262 1.241 riastrad } 1263 1.241 riastrad if (len == 0) { 1264 1.241 riastrad endoffset = OFF_MAX; 1265 1.241 riastrad } else if (len > 0 && (OFF_MAX - offset) >= len) { 1266 1.241 riastrad endoffset = offset + len; 1267 1.241 riastrad } else { 1268 1.244 riastrad return SET_ERROR(EINVAL); 1269 1.241 riastrad } 1270 1.241 riastrad 1271 1.241 riastrad CTASSERT(POSIX_FADV_NORMAL == UVM_ADV_NORMAL); 1272 1.241 riastrad CTASSERT(POSIX_FADV_RANDOM == UVM_ADV_RANDOM); 1273 1.241 riastrad CTASSERT(POSIX_FADV_SEQUENTIAL == UVM_ADV_SEQUENTIAL); 1274 1.241 riastrad 1275 1.241 riastrad switch (advice) { 1276 1.241 riastrad case POSIX_FADV_WILLNEED: 1277 1.241 riastrad case POSIX_FADV_DONTNEED: 1278 1.241 riastrad if (vp->v_type != VREG && vp->v_type != VBLK) 1279 1.241 riastrad return 0; 1280 1.241 riastrad break; 1281 1.241 riastrad } 1282 1.241 riastrad 1283 1.241 riastrad switch (advice) { 1284 1.241 riastrad case POSIX_FADV_NORMAL: 1285 1.241 riastrad case POSIX_FADV_RANDOM: 1286 1.241 riastrad case POSIX_FADV_SEQUENTIAL: 1287 1.241 riastrad /* 1288 1.241 riastrad * We ignore offset and size. Must lock the file to 1289 1.241 riastrad * do this, as f_advice is sub-word sized. 1290 1.241 riastrad */ 1291 1.241 riastrad mutex_enter(&fp->f_lock); 1292 1.241 riastrad fp->f_advice = (u_char)advice; 1293 1.241 riastrad mutex_exit(&fp->f_lock); 1294 1.241 riastrad error = 0; 1295 1.241 riastrad break; 1296 1.241 riastrad 1297 1.241 riastrad case POSIX_FADV_WILLNEED: 1298 1.241 riastrad error = uvm_readahead(&vp->v_uobj, offset, endoffset - offset); 1299 1.241 riastrad break; 1300 1.241 riastrad 1301 1.241 riastrad case POSIX_FADV_DONTNEED: 1302 1.241 riastrad /* 1303 1.241 riastrad * Align the region to page boundaries as VOP_PUTPAGES expects 1304 1.241 riastrad * by shrinking it. We shrink instead of expand because we 1305 1.241 riastrad * do not want to deactivate cache outside of the requested 1306 1.241 riastrad * region. It means that if the specified region is smaller 1307 1.241 riastrad * than PAGE_SIZE, we do nothing. 1308 1.241 riastrad */ 1309 1.241 riastrad if (offset <= trunc_page(OFF_MAX) && 1310 1.241 riastrad round_page(offset) < trunc_page(endoffset)) { 1311 1.241 riastrad rw_enter(vp->v_uobj.vmobjlock, RW_WRITER); 1312 1.241 riastrad error = VOP_PUTPAGES(vp, 1313 1.241 riastrad round_page(offset), trunc_page(endoffset), 1314 1.241 riastrad PGO_DEACTIVATE | PGO_CLEANIT); 1315 1.241 riastrad } else { 1316 1.241 riastrad error = 0; 1317 1.241 riastrad } 1318 1.241 riastrad break; 1319 1.241 riastrad 1320 1.241 riastrad case POSIX_FADV_NOREUSE: 1321 1.241 riastrad /* Not implemented yet. */ 1322 1.241 riastrad error = 0; 1323 1.241 riastrad break; 1324 1.241 riastrad default: 1325 1.244 riastrad error = SET_ERROR(EINVAL); 1326 1.241 riastrad break; 1327 1.241 riastrad } 1328 1.241 riastrad 1329 1.241 riastrad return error; 1330 1.241 riastrad } 1331 1.241 riastrad 1332 1.242 christos static int 1333 1.242 christos vn_truncate(file_t *fp, off_t length) 1334 1.242 christos { 1335 1.242 christos struct vattr vattr; 1336 1.242 christos struct vnode *vp; 1337 1.242 christos int error = 0; 1338 1.242 christos 1339 1.242 christos if (length < 0) 1340 1.244 riastrad return SET_ERROR(EINVAL); 1341 1.242 christos 1342 1.242 christos if ((fp->f_flag & FWRITE) == 0) 1343 1.244 riastrad return SET_ERROR(EINVAL); 1344 1.242 christos vp = fp->f_vnode; 1345 1.242 christos vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 1346 1.242 christos if (vp->v_type == VDIR) 1347 1.244 riastrad error = SET_ERROR(EISDIR); 1348 1.242 christos else if ((error = vn_writechk(vp)) == 0) { 1349 1.242 christos vattr_null(&vattr); 1350 1.242 christos vattr.va_size = length; 1351 1.242 christos error = VOP_SETATTR(vp, &vattr, fp->f_cred); 1352 1.242 christos } 1353 1.242 christos VOP_UNLOCK(vp); 1354 1.242 christos 1355 1.242 christos return error; 1356 1.242 christos } 1357 1.242 christos 1358 1.242 christos 1359 1.28 fvdl /* 1360 1.28 fvdl * Check that the vnode is still valid, and if so 1361 1.28 fvdl * acquire requested lock. 1362 1.28 fvdl */ 1363 1.28 fvdl int 1364 1.89 thorpej vn_lock(struct vnode *vp, int flags) 1365 1.28 fvdl { 1366 1.205 ad struct lwp *l; 1367 1.28 fvdl int error; 1368 1.34 sommerfe 1369 1.227 hannken KASSERT(vrefcnt(vp) > 0); 1370 1.203 ad KASSERT((flags & ~(LK_SHARED|LK_EXCLUSIVE|LK_NOWAIT|LK_RETRY| 1371 1.203 ad LK_UPGRADE|LK_DOWNGRADE)) == 0); 1372 1.203 ad KASSERT((flags & LK_NOWAIT) != 0 || !mutex_owned(vp->v_interlock)); 1373 1.80 yamt 1374 1.159 simonb #ifdef DIAGNOSTIC 1375 1.159 simonb if (wapbl_vphaswapbl(vp)) 1376 1.159 simonb WAPBL_JUNLOCK_ASSERT(wapbl_vptomp(vp)); 1377 1.159 simonb #endif 1378 1.159 simonb 1379 1.205 ad /* Get a more useful report for lockstat. */ 1380 1.205 ad l = curlwp; 1381 1.205 ad KASSERT(l->l_rwcallsite == 0); 1382 1.243 riastrad l->l_rwcallsite = (uintptr_t)__builtin_return_address(0); 1383 1.205 ad 1384 1.189 hannken error = VOP_LOCK(vp, flags); 1385 1.189 hannken 1386 1.205 ad l->l_rwcallsite = 0; 1387 1.205 ad 1388 1.227 hannken switch (flags & (LK_RETRY | LK_NOWAIT)) { 1389 1.227 hannken case 0: 1390 1.227 hannken KASSERT(error == 0 || error == ENOENT); 1391 1.227 hannken break; 1392 1.227 hannken case LK_RETRY: 1393 1.227 hannken KASSERT(error == 0); 1394 1.227 hannken break; 1395 1.227 hannken case LK_NOWAIT: 1396 1.227 hannken KASSERT(error == 0 || error == EBUSY || error == ENOENT); 1397 1.227 hannken break; 1398 1.227 hannken case LK_RETRY | LK_NOWAIT: 1399 1.227 hannken KASSERT(error == 0 || error == EBUSY); 1400 1.227 hannken break; 1401 1.227 hannken } 1402 1.189 hannken 1403 1.189 hannken return error; 1404 1.10 cgd } 1405 1.10 cgd 1406 1.10 cgd /* 1407 1.10 cgd * File table vnode close routine. 1408 1.10 cgd */ 1409 1.56 gmcgarry static int 1410 1.155 ad vn_closefile(file_t *fp) 1411 1.10 cgd { 1412 1.10 cgd 1413 1.191 matt return vn_close(fp->f_vnode, fp->f_flag, fp->f_cred); 1414 1.39 fvdl } 1415 1.39 fvdl 1416 1.39 fvdl /* 1417 1.85 thorpej * Simplified in-kernel wrapper calls for extended attribute access. 1418 1.85 thorpej * Both calls pass in a NULL credential, authorizing a "kernel" access. 1419 1.85 thorpej * Set IO_NODELOCKED in ioflg if the vnode is already locked. 1420 1.85 thorpej */ 1421 1.85 thorpej int 1422 1.85 thorpej vn_extattr_get(struct vnode *vp, int ioflg, int attrnamespace, 1423 1.101 christos const char *attrname, size_t *buflen, void *bf, struct lwp *l) 1424 1.85 thorpej { 1425 1.85 thorpej struct uio auio; 1426 1.85 thorpej struct iovec aiov; 1427 1.85 thorpej int error; 1428 1.85 thorpej 1429 1.85 thorpej aiov.iov_len = *buflen; 1430 1.88 christos aiov.iov_base = bf; 1431 1.85 thorpej 1432 1.85 thorpej auio.uio_iov = &aiov; 1433 1.85 thorpej auio.uio_iovcnt = 1; 1434 1.85 thorpej auio.uio_rw = UIO_READ; 1435 1.85 thorpej auio.uio_offset = 0; 1436 1.85 thorpej auio.uio_resid = *buflen; 1437 1.106 yamt UIO_SETUP_SYSSPACE(&auio); 1438 1.85 thorpej 1439 1.85 thorpej if ((ioflg & IO_NODELOCKED) == 0) 1440 1.85 thorpej vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 1441 1.86 perry 1442 1.208 christos error = VOP_GETEXTATTR(vp, attrnamespace, attrname, &auio, NULL, 1443 1.208 christos NOCRED); 1444 1.86 perry 1445 1.85 thorpej if ((ioflg & IO_NODELOCKED) == 0) 1446 1.174 hannken VOP_UNLOCK(vp); 1447 1.86 perry 1448 1.85 thorpej if (error == 0) 1449 1.85 thorpej *buflen = *buflen - auio.uio_resid; 1450 1.86 perry 1451 1.231 riastrad return error; 1452 1.85 thorpej } 1453 1.85 thorpej 1454 1.85 thorpej /* 1455 1.85 thorpej * XXX Failure mode if partially written? 1456 1.85 thorpej */ 1457 1.85 thorpej int 1458 1.85 thorpej vn_extattr_set(struct vnode *vp, int ioflg, int attrnamespace, 1459 1.101 christos const char *attrname, size_t buflen, const void *bf, struct lwp *l) 1460 1.85 thorpej { 1461 1.85 thorpej struct uio auio; 1462 1.85 thorpej struct iovec aiov; 1463 1.85 thorpej int error; 1464 1.85 thorpej 1465 1.85 thorpej aiov.iov_len = buflen; 1466 1.88 christos aiov.iov_base = __UNCONST(bf); /* XXXUNCONST kills const */ 1467 1.85 thorpej 1468 1.85 thorpej auio.uio_iov = &aiov; 1469 1.85 thorpej auio.uio_iovcnt = 1; 1470 1.85 thorpej auio.uio_rw = UIO_WRITE; 1471 1.85 thorpej auio.uio_offset = 0; 1472 1.85 thorpej auio.uio_resid = buflen; 1473 1.106 yamt UIO_SETUP_SYSSPACE(&auio); 1474 1.85 thorpej 1475 1.85 thorpej if ((ioflg & IO_NODELOCKED) == 0) { 1476 1.85 thorpej vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 1477 1.85 thorpej } 1478 1.85 thorpej 1479 1.208 christos error = VOP_SETEXTATTR(vp, attrnamespace, attrname, &auio, NOCRED); 1480 1.85 thorpej 1481 1.85 thorpej if ((ioflg & IO_NODELOCKED) == 0) { 1482 1.174 hannken VOP_UNLOCK(vp); 1483 1.85 thorpej } 1484 1.85 thorpej 1485 1.231 riastrad return error; 1486 1.85 thorpej } 1487 1.85 thorpej 1488 1.85 thorpej int 1489 1.85 thorpej vn_extattr_rm(struct vnode *vp, int ioflg, int attrnamespace, 1490 1.101 christos const char *attrname, struct lwp *l) 1491 1.85 thorpej { 1492 1.85 thorpej int error; 1493 1.85 thorpej 1494 1.85 thorpej if ((ioflg & IO_NODELOCKED) == 0) { 1495 1.85 thorpej vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 1496 1.85 thorpej } 1497 1.85 thorpej 1498 1.210 christos error = VOP_DELETEEXTATTR(vp, attrnamespace, attrname, NOCRED); 1499 1.85 thorpej if (error == EOPNOTSUPP) 1500 1.208 christos error = VOP_SETEXTATTR(vp, attrnamespace, attrname, NULL, 1501 1.208 christos NOCRED); 1502 1.86 perry 1503 1.85 thorpej if ((ioflg & IO_NODELOCKED) == 0) { 1504 1.174 hannken VOP_UNLOCK(vp); 1505 1.85 thorpej } 1506 1.85 thorpej 1507 1.231 riastrad return error; 1508 1.85 thorpej } 1509 1.95 yamt 1510 1.170 pooka int 1511 1.170 pooka vn_fifo_bypass(void *v) 1512 1.170 pooka { 1513 1.170 pooka struct vop_generic_args *ap = v; 1514 1.170 pooka 1515 1.170 pooka return VOCALL(fifo_vnodeop_p, ap->a_desc->vdesc_offset, v); 1516 1.170 pooka } 1517 1.202 mlelstv 1518 1.202 mlelstv /* 1519 1.202 mlelstv * Open block device by device number 1520 1.202 mlelstv */ 1521 1.202 mlelstv int 1522 1.202 mlelstv vn_bdev_open(dev_t dev, struct vnode **vpp, struct lwp *l) 1523 1.202 mlelstv { 1524 1.202 mlelstv int error; 1525 1.202 mlelstv 1526 1.202 mlelstv if ((error = bdevvp(dev, vpp)) != 0) 1527 1.202 mlelstv return error; 1528 1.202 mlelstv 1529 1.226 hannken vn_lock(*vpp, LK_EXCLUSIVE | LK_RETRY); 1530 1.202 mlelstv if ((error = VOP_OPEN(*vpp, FREAD | FWRITE, l->l_cred)) != 0) { 1531 1.226 hannken vput(*vpp); 1532 1.202 mlelstv return error; 1533 1.202 mlelstv } 1534 1.202 mlelstv mutex_enter((*vpp)->v_interlock); 1535 1.202 mlelstv (*vpp)->v_writecount++; 1536 1.202 mlelstv mutex_exit((*vpp)->v_interlock); 1537 1.226 hannken VOP_UNLOCK(*vpp); 1538 1.202 mlelstv 1539 1.202 mlelstv return 0; 1540 1.202 mlelstv } 1541 1.202 mlelstv 1542 1.202 mlelstv /* 1543 1.202 mlelstv * Lookup the provided name in the filesystem. If the file exists, 1544 1.202 mlelstv * is a valid block device, and isn't being used by anyone else, 1545 1.202 mlelstv * set *vpp to the file's vnode. 1546 1.202 mlelstv */ 1547 1.202 mlelstv int 1548 1.202 mlelstv vn_bdev_openpath(struct pathbuf *pb, struct vnode **vpp, struct lwp *l) 1549 1.202 mlelstv { 1550 1.202 mlelstv struct vnode *vp; 1551 1.202 mlelstv dev_t dev; 1552 1.202 mlelstv enum vtype vt; 1553 1.202 mlelstv int error; 1554 1.202 mlelstv 1555 1.216 dholland error = vn_open(NULL, pb, 0, FREAD | FWRITE, 0, &vp, NULL, NULL); 1556 1.216 dholland if (error != 0) 1557 1.202 mlelstv return error; 1558 1.202 mlelstv 1559 1.202 mlelstv vt = vp->v_type; 1560 1.245 mlelstv if (vt == VBLK) 1561 1.245 mlelstv dev = vp->v_rdev; 1562 1.202 mlelstv 1563 1.202 mlelstv VOP_UNLOCK(vp); 1564 1.202 mlelstv (void) vn_close(vp, FREAD | FWRITE, l->l_cred); 1565 1.202 mlelstv 1566 1.202 mlelstv if (vt != VBLK) 1567 1.244 riastrad return SET_ERROR(ENOTBLK); 1568 1.202 mlelstv 1569 1.202 mlelstv return vn_bdev_open(dev, vpp, l); 1570 1.202 mlelstv } 1571 1.224 thorpej 1572 1.224 thorpej static long 1573 1.224 thorpej vn_knote_to_interest(const struct knote *kn) 1574 1.224 thorpej { 1575 1.243 riastrad 1576 1.224 thorpej switch (kn->kn_filter) { 1577 1.224 thorpej case EVFILT_READ: 1578 1.224 thorpej /* 1579 1.224 thorpej * Writing to the file or changing its attributes can 1580 1.224 thorpej * set the file size, which impacts the readability 1581 1.224 thorpej * filter. 1582 1.224 thorpej * 1583 1.224 thorpej * (No need to set NOTE_EXTEND here; it's only ever 1584 1.224 thorpej * send with other hints; see vnode_if.c.) 1585 1.224 thorpej */ 1586 1.224 thorpej return NOTE_WRITE | NOTE_ATTRIB; 1587 1.224 thorpej 1588 1.224 thorpej case EVFILT_VNODE: 1589 1.224 thorpej return kn->kn_sfflags; 1590 1.224 thorpej 1591 1.224 thorpej case EVFILT_WRITE: 1592 1.224 thorpej default: 1593 1.224 thorpej return 0; 1594 1.224 thorpej } 1595 1.224 thorpej } 1596 1.224 thorpej 1597 1.224 thorpej void 1598 1.224 thorpej vn_knote_attach(struct vnode *vp, struct knote *kn) 1599 1.224 thorpej { 1600 1.234 thorpej struct vnode_klist *vk = vp->v_klist; 1601 1.224 thorpej long interest = 0; 1602 1.224 thorpej 1603 1.224 thorpej /* 1604 1.234 thorpej * In the case of layered / stacked file systems, knotes 1605 1.234 thorpej * should only ever be associated with the base vnode. 1606 1.234 thorpej */ 1607 1.234 thorpej KASSERT(kn->kn_hook == vp); 1608 1.234 thorpej KASSERT(vp->v_klist == &VNODE_TO_VIMPL(vp)->vi_klist); 1609 1.234 thorpej 1610 1.234 thorpej /* 1611 1.224 thorpej * We maintain a bitmask of the kevents that there is interest in, 1612 1.224 thorpej * to minimize the impact of having watchers. It's silly to have 1613 1.224 thorpej * to traverse vn_klist every time a read or write happens simply 1614 1.224 thorpej * because there is someone interested in knowing when the file 1615 1.224 thorpej * is deleted, for example. 1616 1.224 thorpej */ 1617 1.224 thorpej 1618 1.224 thorpej mutex_enter(vp->v_interlock); 1619 1.234 thorpej SLIST_INSERT_HEAD(&vk->vk_klist, kn, kn_selnext); 1620 1.234 thorpej SLIST_FOREACH(kn, &vk->vk_klist, kn_selnext) { 1621 1.224 thorpej interest |= vn_knote_to_interest(kn); 1622 1.224 thorpej } 1623 1.234 thorpej vk->vk_interest = interest; 1624 1.224 thorpej mutex_exit(vp->v_interlock); 1625 1.224 thorpej } 1626 1.224 thorpej 1627 1.224 thorpej void 1628 1.224 thorpej vn_knote_detach(struct vnode *vp, struct knote *kn) 1629 1.224 thorpej { 1630 1.234 thorpej struct vnode_klist *vk = vp->v_klist; 1631 1.234 thorpej long interest = 0; 1632 1.234 thorpej 1633 1.234 thorpej /* See above. */ 1634 1.234 thorpej KASSERT(kn->kn_hook == vp); 1635 1.234 thorpej KASSERT(vp->v_klist == &VNODE_TO_VIMPL(vp)->vi_klist); 1636 1.224 thorpej 1637 1.224 thorpej /* 1638 1.228 andvar * We special case removing the head of the list, because: 1639 1.224 thorpej * 1640 1.224 thorpej * 1. It's extremely likely that we're detaching the only 1641 1.224 thorpej * knote. 1642 1.224 thorpej * 1643 1.224 thorpej * 2. We're already traversing the whole list, so we don't 1644 1.224 thorpej * want to use the generic SLIST_REMOVE() which would 1645 1.224 thorpej * traverse it *again*. 1646 1.224 thorpej */ 1647 1.224 thorpej 1648 1.224 thorpej mutex_enter(vp->v_interlock); 1649 1.234 thorpej if (__predict_true(kn == SLIST_FIRST(&vk->vk_klist))) { 1650 1.234 thorpej SLIST_REMOVE_HEAD(&vk->vk_klist, kn_selnext); 1651 1.234 thorpej SLIST_FOREACH(kn, &vk->vk_klist, kn_selnext) { 1652 1.224 thorpej interest |= vn_knote_to_interest(kn); 1653 1.224 thorpej } 1654 1.234 thorpej vk->vk_interest = interest; 1655 1.224 thorpej } else { 1656 1.224 thorpej struct knote *thiskn, *nextkn, *prevkn = NULL; 1657 1.224 thorpej 1658 1.234 thorpej SLIST_FOREACH_SAFE(thiskn, &vk->vk_klist, kn_selnext, nextkn) { 1659 1.224 thorpej if (thiskn == kn) { 1660 1.224 thorpej KASSERT(kn != NULL); 1661 1.224 thorpej KASSERT(prevkn != NULL); 1662 1.224 thorpej SLIST_REMOVE_AFTER(prevkn, kn_selnext); 1663 1.224 thorpej kn = NULL; 1664 1.224 thorpej } else { 1665 1.224 thorpej interest |= vn_knote_to_interest(thiskn); 1666 1.224 thorpej prevkn = thiskn; 1667 1.224 thorpej } 1668 1.224 thorpej } 1669 1.234 thorpej vk->vk_interest = interest; 1670 1.224 thorpej } 1671 1.224 thorpej mutex_exit(vp->v_interlock); 1672 1.224 thorpej } 1673