1 1.1 haad /* 2 1.1 haad * CDDL HEADER START 3 1.1 haad * 4 1.1 haad * The contents of this file are subject to the terms of the 5 1.1 haad * Common Development and Distribution License (the "License"). 6 1.1 haad * You may not use this file except in compliance with the License. 7 1.1 haad * 8 1.1 haad * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 1.1 haad * or http://www.opensolaris.org/os/licensing. 10 1.1 haad * See the License for the specific language governing permissions 11 1.1 haad * and limitations under the License. 12 1.1 haad * 13 1.1 haad * When distributing Covered Code, include this CDDL HEADER in each 14 1.1 haad * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 1.1 haad * If applicable, add the following below this CDDL HEADER, with the 16 1.1 haad * fields enclosed by brackets "[]" replaced with your own identifying 17 1.1 haad * information: Portions Copyright [yyyy] [name of copyright owner] 18 1.1 haad * 19 1.1 haad * CDDL HEADER END 20 1.1 haad */ 21 1.1 haad /* 22 1.27 chs * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 23 1.27 chs * Copyright (c) 2012, 2015 by Delphix. All rights reserved. 24 1.27 chs * Copyright 2014 Nexenta Systems, Inc. All rights reserved. 25 1.27 chs * Copyright (c) 2014 Integros [integros.com] 26 1.1 haad */ 27 1.1 haad 28 1.1 haad /* Portions Copyright 2007 Jeremy Teo */ 29 1.27 chs /* Portions Copyright 2010 Robert Milkowski */ 30 1.1 haad 31 1.1 haad #include <sys/types.h> 32 1.1 haad #include <sys/param.h> 33 1.1 haad #include <sys/time.h> 34 1.1 haad #include <sys/systm.h> 35 1.1 haad #include <sys/sysmacros.h> 36 1.1 haad #include <sys/resource.h> 37 1.1 haad #include <sys/vfs.h> 38 1.27 chs #include <sys/vm.h> 39 1.1 haad #include <sys/vnode.h> 40 1.1 haad #include <sys/file.h> 41 1.1 haad #include <sys/stat.h> 42 1.1 haad #include <sys/kmem.h> 43 1.1 haad #include <sys/taskq.h> 44 1.1 haad #include <sys/uio.h> 45 1.1 haad #include <sys/atomic.h> 46 1.2 haad #include <sys/namei.h> 47 1.1 haad #include <sys/mman.h> 48 1.1 haad #include <sys/cmn_err.h> 49 1.1 haad #include <sys/errno.h> 50 1.1 haad #include <sys/unistd.h> 51 1.1 haad #include <sys/zfs_dir.h> 52 1.1 haad #include <sys/zfs_ioctl.h> 53 1.1 haad #include <sys/fs/zfs.h> 54 1.1 haad #include <sys/dmu.h> 55 1.27 chs #include <sys/dmu_objset.h> 56 1.1 haad #include <sys/spa.h> 57 1.1 haad #include <sys/txg.h> 58 1.1 haad #include <sys/dbuf.h> 59 1.1 haad #include <sys/zap.h> 60 1.27 chs #include <sys/sa.h> 61 1.1 haad #include <sys/dirent.h> 62 1.1 haad #include <sys/policy.h> 63 1.1 haad #include <sys/sunddi.h> 64 1.1 haad #include <sys/filio.h> 65 1.27 chs #include <sys/sid.h> 66 1.1 haad #include <sys/zfs_ctldir.h> 67 1.1 haad #include <sys/zfs_fuid.h> 68 1.27 chs #include <sys/zfs_sa.h> 69 1.1 haad #include <sys/dnlc.h> 70 1.1 haad #include <sys/zfs_rlock.h> 71 1.2 haad #include <sys/buf.h> 72 1.2 haad #include <sys/sched.h> 73 1.2 haad #include <sys/acl.h> 74 1.27 chs #include <sys/extdirent.h> 75 1.27 chs 76 1.27 chs #ifdef __FreeBSD__ 77 1.27 chs #include <sys/kidmap.h> 78 1.27 chs #include <sys/bio.h> 79 1.27 chs #include <vm/vm_param.h> 80 1.27 chs #endif 81 1.2 haad 82 1.2 haad #ifdef __NetBSD__ 83 1.28 riastrad #include <dev/mm.h> 84 1.48 hannken #include <miscfs/fifofs/fifo.h> 85 1.2 haad #include <miscfs/genfs/genfs.h> 86 1.27 chs #include <miscfs/genfs/genfs_node.h> 87 1.27 chs #include <uvm/uvm_extern.h> 88 1.41 hannken #include <sys/fstrans.h> 89 1.45 hannken #include <sys/malloc.h> 90 1.27 chs 91 1.27 chs uint_t zfs_putpage_key; 92 1.2 haad #endif 93 1.1 haad 94 1.1 haad /* 95 1.1 haad * Programming rules. 96 1.1 haad * 97 1.1 haad * Each vnode op performs some logical unit of work. To do this, the ZPL must 98 1.1 haad * properly lock its in-core state, create a DMU transaction, do the work, 99 1.1 haad * record this work in the intent log (ZIL), commit the DMU transaction, 100 1.1 haad * and wait for the intent log to commit if it is a synchronous operation. 101 1.1 haad * Moreover, the vnode ops must work in both normal and log replay context. 102 1.1 haad * The ordering of events is important to avoid deadlocks and references 103 1.1 haad * to freed memory. The example below illustrates the following Big Rules: 104 1.1 haad * 105 1.27 chs * (1) A check must be made in each zfs thread for a mounted file system. 106 1.1 haad * This is done avoiding races using ZFS_ENTER(zfsvfs). 107 1.27 chs * A ZFS_EXIT(zfsvfs) is needed before all returns. Any znodes 108 1.27 chs * must be checked with ZFS_VERIFY_ZP(zp). Both of these macros 109 1.27 chs * can return EIO from the calling function. 110 1.1 haad * 111 1.1 haad * (2) VN_RELE() should always be the last thing except for zil_commit() 112 1.1 haad * (if necessary) and ZFS_EXIT(). This is for 3 reasons: 113 1.1 haad * First, if it's the last reference, the vnode/znode 114 1.1 haad * can be freed, so the zp may point to freed memory. Second, the last 115 1.1 haad * reference will call zfs_zinactive(), which may induce a lot of work -- 116 1.1 haad * pushing cached pages (which acquires range locks) and syncing out 117 1.1 haad * cached atime changes. Third, zfs_zinactive() may require a new tx, 118 1.1 haad * which could deadlock the system if you were already holding one. 119 1.2 haad * If you must call VN_RELE() within a tx then use VN_RELE_ASYNC(). 120 1.1 haad * 121 1.1 haad * (3) All range locks must be grabbed before calling dmu_tx_assign(), 122 1.1 haad * as they can span dmu_tx_assign() calls. 123 1.1 haad * 124 1.27 chs * (4) If ZPL locks are held, pass TXG_NOWAIT as the second argument to 125 1.27 chs * dmu_tx_assign(). This is critical because we don't want to block 126 1.27 chs * while holding locks. 127 1.27 chs * 128 1.27 chs * If no ZPL locks are held (aside from ZFS_ENTER()), use TXG_WAIT. This 129 1.27 chs * reduces lock contention and CPU usage when we must wait (note that if 130 1.27 chs * throughput is constrained by the storage, nearly every transaction 131 1.27 chs * must wait). 132 1.27 chs * 133 1.27 chs * Note, in particular, that if a lock is sometimes acquired before 134 1.27 chs * the tx assigns, and sometimes after (e.g. z_lock), then failing 135 1.27 chs * to use a non-blocking assign can deadlock the system. The scenario: 136 1.1 haad * 137 1.1 haad * Thread A has grabbed a lock before calling dmu_tx_assign(). 138 1.1 haad * Thread B is in an already-assigned tx, and blocks for this lock. 139 1.1 haad * Thread A calls dmu_tx_assign(TXG_WAIT) and blocks in txg_wait_open() 140 1.1 haad * forever, because the previous txg can't quiesce until B's tx commits. 141 1.1 haad * 142 1.1 haad * If dmu_tx_assign() returns ERESTART and zfsvfs->z_assign is TXG_NOWAIT, 143 1.27 chs * then drop all locks, call dmu_tx_wait(), and try again. On subsequent 144 1.27 chs * calls to dmu_tx_assign(), pass TXG_WAITED rather than TXG_NOWAIT, 145 1.27 chs * to indicate that this operation has already called dmu_tx_wait(). 146 1.27 chs * This will ensure that we don't retry forever, waiting a short bit 147 1.27 chs * each time. 148 1.1 haad * 149 1.1 haad * (5) If the operation succeeded, generate the intent log entry for it 150 1.1 haad * before dropping locks. This ensures that the ordering of events 151 1.1 haad * in the intent log matches the order in which they actually occurred. 152 1.27 chs * During ZIL replay the zfs_log_* functions will update the sequence 153 1.4 haad * number to indicate the zil transaction has replayed. 154 1.1 haad * 155 1.1 haad * (6) At the end of each vnode op, the DMU tx must always commit, 156 1.1 haad * regardless of whether there were any errors. 157 1.1 haad * 158 1.27 chs * (7) After dropping all locks, invoke zil_commit(zilog, foid) 159 1.1 haad * to ensure that synchronous semantics are provided when necessary. 160 1.1 haad * 161 1.1 haad * In general, this is how things should be ordered in each vnode op: 162 1.1 haad * 163 1.1 haad * ZFS_ENTER(zfsvfs); // exit if unmounted 164 1.1 haad * top: 165 1.27 chs * zfs_dirent_lookup(&dl, ...) // lock directory entry (may VN_HOLD()) 166 1.1 haad * rw_enter(...); // grab any other locks you need 167 1.1 haad * tx = dmu_tx_create(...); // get DMU tx 168 1.1 haad * dmu_tx_hold_*(); // hold each object you might modify 169 1.27 chs * error = dmu_tx_assign(tx, waited ? TXG_WAITED : TXG_NOWAIT); 170 1.1 haad * if (error) { 171 1.1 haad * rw_exit(...); // drop locks 172 1.27 chs * zfs_dirent_unlock(dl); // unlock directory entry 173 1.1 haad * VN_RELE(...); // release held vnodes 174 1.4 haad * if (error == ERESTART) { 175 1.27 chs * waited = B_TRUE; 176 1.1 haad * dmu_tx_wait(tx); 177 1.1 haad * dmu_tx_abort(tx); 178 1.1 haad * goto top; 179 1.1 haad * } 180 1.1 haad * dmu_tx_abort(tx); // abort DMU tx 181 1.1 haad * ZFS_EXIT(zfsvfs); // finished in zfs 182 1.1 haad * return (error); // really out of space 183 1.1 haad * } 184 1.1 haad * error = do_real_work(); // do whatever this VOP does 185 1.1 haad * if (error == 0) 186 1.1 haad * zfs_log_*(...); // on success, make ZIL entry 187 1.1 haad * dmu_tx_commit(tx); // commit DMU tx -- error or not 188 1.1 haad * rw_exit(...); // drop locks 189 1.27 chs * zfs_dirent_unlock(dl); // unlock directory entry 190 1.1 haad * VN_RELE(...); // release held vnodes 191 1.27 chs * zil_commit(zilog, foid); // synchronous when necessary 192 1.1 haad * ZFS_EXIT(zfsvfs); // finished in zfs 193 1.1 haad * return (error); // done, report error 194 1.1 haad */ 195 1.1 haad 196 1.1 haad /* ARGSUSED */ 197 1.1 haad static int 198 1.1 haad zfs_open(vnode_t **vpp, int flag, cred_t *cr, caller_context_t *ct) 199 1.1 haad { 200 1.1 haad znode_t *zp = VTOZ(*vpp); 201 1.27 chs zfsvfs_t *zfsvfs = zp->z_zfsvfs; 202 1.1 haad 203 1.27 chs ZFS_ENTER(zfsvfs); 204 1.27 chs ZFS_VERIFY_ZP(zp); 205 1.27 chs 206 1.27 chs if ((flag & FWRITE) && (zp->z_pflags & ZFS_APPENDONLY) && 207 1.1 haad ((flag & FAPPEND) == 0)) { 208 1.27 chs ZFS_EXIT(zfsvfs); 209 1.27 chs return (SET_ERROR(EPERM)); 210 1.1 haad } 211 1.1 haad 212 1.1 haad if (!zfs_has_ctldir(zp) && zp->z_zfsvfs->z_vscan && 213 1.1 haad ZTOV(zp)->v_type == VREG && 214 1.27 chs !(zp->z_pflags & ZFS_AV_QUARANTINED) && zp->z_size > 0) { 215 1.27 chs if (fs_vscan(*vpp, cr, 0) != 0) { 216 1.27 chs ZFS_EXIT(zfsvfs); 217 1.27 chs return (SET_ERROR(EACCES)); 218 1.27 chs } 219 1.27 chs } 220 1.1 haad 221 1.99 yamt /* 222 1.99 yamt * Keep a count of the synchronous opens in the znode. On first 223 1.99 yamt * synchronous open we must convert all previous async transactions 224 1.99 yamt * into sync to keep correct ordering. 225 1.99 yamt */ 226 1.99 yamt if (flag & (FSYNC | FDSYNC)) { 227 1.99 yamt if (atomic_inc_32_nv(&zp->z_sync_cnt) == 1) 228 1.99 yamt zil_async_to_sync(zfsvfs->z_log, zp->z_id); 229 1.99 yamt } 230 1.1 haad 231 1.27 chs ZFS_EXIT(zfsvfs); 232 1.1 haad return (0); 233 1.1 haad } 234 1.1 haad 235 1.1 haad /* ARGSUSED */ 236 1.1 haad static int 237 1.1 haad zfs_close(vnode_t *vp, int flag, int count, offset_t offset, cred_t *cr, 238 1.1 haad caller_context_t *ct) 239 1.1 haad { 240 1.1 haad znode_t *zp = VTOZ(vp); 241 1.4 haad zfsvfs_t *zfsvfs = zp->z_zfsvfs; 242 1.1 haad 243 1.1 haad /* 244 1.1 haad * Clean up any locks held by this process on the vp. 245 1.1 haad */ 246 1.1 haad cleanlocks(vp, ddi_get_pid(), 0); 247 1.1 haad cleanshares(vp, ddi_get_pid()); 248 1.1 haad 249 1.4 haad ZFS_ENTER(zfsvfs); 250 1.4 haad ZFS_VERIFY_ZP(zp); 251 1.4 haad 252 1.4 haad /* Decrement the synchronous opens in the znode */ 253 1.4 haad if ((flag & (FSYNC | FDSYNC)) && (count == 1)) 254 1.4 haad atomic_dec_32(&zp->z_sync_cnt); 255 1.4 haad 256 1.1 haad if (!zfs_has_ctldir(zp) && zp->z_zfsvfs->z_vscan && 257 1.1 haad ZTOV(zp)->v_type == VREG && 258 1.27 chs !(zp->z_pflags & ZFS_AV_QUARANTINED) && zp->z_size > 0) 259 1.1 haad VERIFY(fs_vscan(vp, cr, 1) == 0); 260 1.1 haad 261 1.18 riastrad ZFS_EXIT(zfsvfs); 262 1.1 haad return (0); 263 1.1 haad } 264 1.1 haad 265 1.1 haad /* 266 1.1 haad * Lseek support for finding holes (cmd == _FIO_SEEK_HOLE) and 267 1.1 haad * data (cmd == _FIO_SEEK_DATA). "off" is an in/out parameter. 268 1.1 haad */ 269 1.1 haad static int 270 1.2 haad zfs_holey(vnode_t *vp, u_long cmd, offset_t *off) 271 1.1 haad { 272 1.1 haad znode_t *zp = VTOZ(vp); 273 1.1 haad uint64_t noff = (uint64_t)*off; /* new offset */ 274 1.1 haad uint64_t file_sz; 275 1.1 haad int error; 276 1.1 haad boolean_t hole; 277 1.1 haad 278 1.27 chs file_sz = zp->z_size; 279 1.1 haad if (noff >= file_sz) { 280 1.27 chs return (SET_ERROR(ENXIO)); 281 1.1 haad } 282 1.1 haad 283 1.1 haad if (cmd == _FIO_SEEK_HOLE) 284 1.1 haad hole = B_TRUE; 285 1.1 haad else 286 1.1 haad hole = B_FALSE; 287 1.1 haad 288 1.1 haad error = dmu_offset_next(zp->z_zfsvfs->z_os, zp->z_id, hole, &noff); 289 1.1 haad 290 1.27 chs if (error == ESRCH) 291 1.27 chs return (SET_ERROR(ENXIO)); 292 1.27 chs 293 1.27 chs /* 294 1.27 chs * We could find a hole that begins after the logical end-of-file, 295 1.27 chs * because dmu_offset_next() only works on whole blocks. If the 296 1.27 chs * EOF falls mid-block, then indicate that the "virtual hole" 297 1.27 chs * at the end of the file begins at the logical EOF, rather than 298 1.27 chs * at the end of the last block. 299 1.27 chs */ 300 1.27 chs if (noff > file_sz) { 301 1.27 chs ASSERT(hole); 302 1.27 chs noff = file_sz; 303 1.1 haad } 304 1.1 haad 305 1.1 haad if (noff < *off) 306 1.1 haad return (error); 307 1.1 haad *off = noff; 308 1.1 haad return (error); 309 1.1 haad } 310 1.1 haad 311 1.27 chs /* ARGSUSED */ 312 1.1 haad static int 313 1.2 haad zfs_ioctl(vnode_t *vp, u_long com, intptr_t data, int flag, cred_t *cred, 314 1.1 haad int *rvalp, caller_context_t *ct) 315 1.1 haad { 316 1.1 haad offset_t off; 317 1.27 chs offset_t ndata; 318 1.27 chs dmu_object_info_t doi; 319 1.1 haad int error; 320 1.1 haad zfsvfs_t *zfsvfs; 321 1.1 haad znode_t *zp; 322 1.27 chs 323 1.1 haad switch (com) { 324 1.1 haad case _FIOFFS: 325 1.27 chs { 326 1.2 haad return (0); 327 1.27 chs 328 1.1 haad /* 329 1.1 haad * The following two ioctls are used by bfu. Faking out, 330 1.1 haad * necessary to avoid bfu errors. 331 1.1 haad */ 332 1.27 chs } 333 1.1 haad case _FIOGDIO: 334 1.1 haad case _FIOSDIO: 335 1.27 chs { 336 1.1 haad return (0); 337 1.27 chs } 338 1.27 chs 339 1.1 haad case _FIO_SEEK_DATA: 340 1.1 haad case _FIO_SEEK_HOLE: 341 1.27 chs { 342 1.27 chs #ifdef illumos 343 1.1 haad if (ddi_copyin((void *)data, &off, sizeof (off), flag)) 344 1.27 chs return (SET_ERROR(EFAULT)); 345 1.27 chs #else 346 1.27 chs off = *(offset_t *)data; 347 1.27 chs #endif 348 1.1 haad zp = VTOZ(vp); 349 1.1 haad zfsvfs = zp->z_zfsvfs; 350 1.1 haad ZFS_ENTER(zfsvfs); 351 1.1 haad ZFS_VERIFY_ZP(zp); 352 1.27 chs 353 1.1 haad /* offset parameter is in/out */ 354 1.1 haad error = zfs_holey(vp, com, &off); 355 1.1 haad ZFS_EXIT(zfsvfs); 356 1.1 haad if (error) 357 1.1 haad return (error); 358 1.27 chs #ifdef illumos 359 1.1 haad if (ddi_copyout(&off, (void *)data, sizeof (off), flag)) 360 1.27 chs return (SET_ERROR(EFAULT)); 361 1.27 chs #else 362 1.27 chs *(offset_t *)data = off; 363 1.27 chs #endif 364 1.27 chs return (0); 365 1.27 chs } 366 1.27 chs #ifdef illumos 367 1.27 chs case _FIO_COUNT_FILLED: 368 1.27 chs { 369 1.27 chs /* 370 1.27 chs * _FIO_COUNT_FILLED adds a new ioctl command which 371 1.27 chs * exposes the number of filled blocks in a 372 1.27 chs * ZFS object. 373 1.27 chs */ 374 1.27 chs zp = VTOZ(vp); 375 1.27 chs zfsvfs = zp->z_zfsvfs; 376 1.27 chs ZFS_ENTER(zfsvfs); 377 1.27 chs ZFS_VERIFY_ZP(zp); 378 1.27 chs 379 1.27 chs /* 380 1.27 chs * Wait for all dirty blocks for this object 381 1.27 chs * to get synced out to disk, and the DMU info 382 1.27 chs * updated. 383 1.27 chs */ 384 1.27 chs error = dmu_object_wait_synced(zfsvfs->z_os, zp->z_id); 385 1.27 chs if (error) { 386 1.27 chs ZFS_EXIT(zfsvfs); 387 1.27 chs return (error); 388 1.27 chs } 389 1.27 chs 390 1.27 chs /* 391 1.27 chs * Retrieve fill count from DMU object. 392 1.27 chs */ 393 1.27 chs error = dmu_object_info(zfsvfs->z_os, zp->z_id, &doi); 394 1.27 chs if (error) { 395 1.27 chs ZFS_EXIT(zfsvfs); 396 1.27 chs return (error); 397 1.27 chs } 398 1.27 chs 399 1.27 chs ndata = doi.doi_fill_count; 400 1.27 chs 401 1.27 chs ZFS_EXIT(zfsvfs); 402 1.27 chs if (ddi_copyout(&ndata, (void *)data, sizeof (ndata), flag)) 403 1.27 chs return (SET_ERROR(EFAULT)); 404 1.1 haad return (0); 405 1.27 chs } 406 1.2 haad #endif 407 1.1 haad } 408 1.27 chs return (SET_ERROR(ENOTTY)); 409 1.27 chs } 410 1.27 chs 411 1.27 chs #ifdef __FreeBSD__ 412 1.27 chs static vm_page_t 413 1.27 chs page_busy(vnode_t *vp, int64_t start, int64_t off, int64_t nbytes) 414 1.27 chs { 415 1.27 chs vm_object_t obj; 416 1.27 chs vm_page_t pp; 417 1.27 chs int64_t end; 418 1.27 chs 419 1.27 chs /* 420 1.27 chs * At present vm_page_clear_dirty extends the cleared range to DEV_BSIZE 421 1.27 chs * aligned boundaries, if the range is not aligned. As a result a 422 1.27 chs * DEV_BSIZE subrange with partially dirty data may get marked as clean. 423 1.27 chs * It may happen that all DEV_BSIZE subranges are marked clean and thus 424 1.27 chs * the whole page would be considred clean despite have some dirty data. 425 1.27 chs * For this reason we should shrink the range to DEV_BSIZE aligned 426 1.27 chs * boundaries before calling vm_page_clear_dirty. 427 1.27 chs */ 428 1.27 chs end = rounddown2(off + nbytes, DEV_BSIZE); 429 1.27 chs off = roundup2(off, DEV_BSIZE); 430 1.27 chs nbytes = end - off; 431 1.27 chs 432 1.27 chs obj = vp->v_object; 433 1.27 chs zfs_vmobject_assert_wlocked(obj); 434 1.27 chs 435 1.27 chs for (;;) { 436 1.27 chs if ((pp = vm_page_lookup(obj, OFF_TO_IDX(start))) != NULL && 437 1.27 chs pp->valid) { 438 1.27 chs if (vm_page_xbusied(pp)) { 439 1.27 chs /* 440 1.27 chs * Reference the page before unlocking and 441 1.27 chs * sleeping so that the page daemon is less 442 1.27 chs * likely to reclaim it. 443 1.27 chs */ 444 1.27 chs vm_page_reference(pp); 445 1.27 chs vm_page_lock(pp); 446 1.27 chs zfs_vmobject_wunlock(obj); 447 1.27 chs vm_page_busy_sleep(pp, "zfsmwb", true); 448 1.27 chs zfs_vmobject_wlock(obj); 449 1.27 chs continue; 450 1.27 chs } 451 1.27 chs vm_page_sbusy(pp); 452 1.27 chs } else if (pp != NULL) { 453 1.27 chs ASSERT(!pp->valid); 454 1.27 chs pp = NULL; 455 1.27 chs } 456 1.27 chs 457 1.27 chs if (pp != NULL) { 458 1.27 chs ASSERT3U(pp->valid, ==, VM_PAGE_BITS_ALL); 459 1.27 chs vm_object_pip_add(obj, 1); 460 1.27 chs pmap_remove_write(pp); 461 1.27 chs if (nbytes != 0) 462 1.27 chs vm_page_clear_dirty(pp, off, nbytes); 463 1.27 chs } 464 1.27 chs break; 465 1.27 chs } 466 1.27 chs return (pp); 467 1.27 chs } 468 1.27 chs 469 1.27 chs static void 470 1.27 chs page_unbusy(vm_page_t pp) 471 1.27 chs { 472 1.27 chs 473 1.27 chs vm_page_sunbusy(pp); 474 1.27 chs vm_object_pip_subtract(pp->object, 1); 475 1.27 chs } 476 1.27 chs 477 1.27 chs static vm_page_t 478 1.27 chs page_hold(vnode_t *vp, int64_t start) 479 1.27 chs { 480 1.27 chs vm_object_t obj; 481 1.27 chs vm_page_t pp; 482 1.27 chs 483 1.27 chs obj = vp->v_object; 484 1.27 chs zfs_vmobject_assert_wlocked(obj); 485 1.27 chs 486 1.27 chs for (;;) { 487 1.27 chs if ((pp = vm_page_lookup(obj, OFF_TO_IDX(start))) != NULL && 488 1.27 chs pp->valid) { 489 1.27 chs if (vm_page_xbusied(pp)) { 490 1.27 chs /* 491 1.27 chs * Reference the page before unlocking and 492 1.27 chs * sleeping so that the page daemon is less 493 1.27 chs * likely to reclaim it. 494 1.27 chs */ 495 1.27 chs vm_page_reference(pp); 496 1.27 chs vm_page_lock(pp); 497 1.27 chs zfs_vmobject_wunlock(obj); 498 1.27 chs vm_page_busy_sleep(pp, "zfsmwb", true); 499 1.27 chs zfs_vmobject_wlock(obj); 500 1.27 chs continue; 501 1.27 chs } 502 1.27 chs 503 1.27 chs ASSERT3U(pp->valid, ==, VM_PAGE_BITS_ALL); 504 1.27 chs vm_page_lock(pp); 505 1.27 chs vm_page_hold(pp); 506 1.27 chs vm_page_unlock(pp); 507 1.27 chs 508 1.27 chs } else 509 1.27 chs pp = NULL; 510 1.27 chs break; 511 1.27 chs } 512 1.27 chs return (pp); 513 1.27 chs } 514 1.27 chs 515 1.27 chs static void 516 1.27 chs page_unhold(vm_page_t pp) 517 1.27 chs { 518 1.27 chs 519 1.27 chs vm_page_lock(pp); 520 1.27 chs vm_page_unhold(pp); 521 1.27 chs vm_page_unlock(pp); 522 1.1 haad } 523 1.1 haad 524 1.1 haad /* 525 1.1 haad * When a file is memory mapped, we must keep the IO data synchronized 526 1.1 haad * between the DMU cache and the memory mapped pages. What this means: 527 1.1 haad * 528 1.1 haad * On Write: If we find a memory mapped page, we write to *both* 529 1.1 haad * the page and the dmu buffer. 530 1.1 haad */ 531 1.4 haad static void 532 1.27 chs update_pages(vnode_t *vp, int64_t start, int len, objset_t *os, uint64_t oid, 533 1.27 chs int segflg, dmu_tx_t *tx) 534 1.1 haad { 535 1.27 chs vm_object_t obj; 536 1.27 chs struct sf_buf *sf; 537 1.27 chs caddr_t va; 538 1.27 chs int off; 539 1.27 chs 540 1.27 chs ASSERT(segflg != UIO_NOCOPY); 541 1.27 chs ASSERT(vp->v_mount != NULL); 542 1.27 chs obj = vp->v_object; 543 1.27 chs ASSERT(obj != NULL); 544 1.1 haad 545 1.1 haad off = start & PAGEOFFSET; 546 1.27 chs zfs_vmobject_wlock(obj); 547 1.1 haad for (start &= PAGEMASK; len > 0; start += PAGESIZE) { 548 1.27 chs vm_page_t pp; 549 1.27 chs int nbytes = imin(PAGESIZE - off, len); 550 1.27 chs 551 1.27 chs if ((pp = page_busy(vp, start, off, nbytes)) != NULL) { 552 1.27 chs zfs_vmobject_wunlock(obj); 553 1.1 haad 554 1.27 chs va = zfs_map_page(pp, &sf); 555 1.27 chs (void) dmu_read(os, oid, start+off, nbytes, 556 1.27 chs va+off, DMU_READ_PREFETCH);; 557 1.27 chs zfs_unmap_page(sf); 558 1.1 haad 559 1.27 chs zfs_vmobject_wlock(obj); 560 1.27 chs page_unbusy(pp); 561 1.1 haad } 562 1.4 haad len -= nbytes; 563 1.1 haad off = 0; 564 1.1 haad } 565 1.27 chs vm_object_pip_wakeupn(obj, 0); 566 1.27 chs zfs_vmobject_wunlock(obj); 567 1.27 chs } 568 1.27 chs 569 1.27 chs /* 570 1.27 chs * Read with UIO_NOCOPY flag means that sendfile(2) requests 571 1.27 chs * ZFS to populate a range of page cache pages with data. 572 1.27 chs * 573 1.27 chs * NOTE: this function could be optimized to pre-allocate 574 1.27 chs * all pages in advance, drain exclusive busy on all of them, 575 1.27 chs * map them into contiguous KVA region and populate them 576 1.27 chs * in one single dmu_read() call. 577 1.27 chs */ 578 1.27 chs static int 579 1.27 chs mappedread_sf(vnode_t *vp, int nbytes, uio_t *uio) 580 1.27 chs { 581 1.27 chs znode_t *zp = VTOZ(vp); 582 1.27 chs objset_t *os = zp->z_zfsvfs->z_os; 583 1.27 chs struct sf_buf *sf; 584 1.27 chs vm_object_t obj; 585 1.27 chs vm_page_t pp; 586 1.27 chs int64_t start; 587 1.27 chs caddr_t va; 588 1.27 chs int len = nbytes; 589 1.27 chs int off; 590 1.27 chs int error = 0; 591 1.4 haad 592 1.27 chs ASSERT(uio->uio_segflg == UIO_NOCOPY); 593 1.27 chs ASSERT(vp->v_mount != NULL); 594 1.27 chs obj = vp->v_object; 595 1.27 chs ASSERT(obj != NULL); 596 1.27 chs ASSERT((uio->uio_loffset & PAGEOFFSET) == 0); 597 1.27 chs 598 1.27 chs zfs_vmobject_wlock(obj); 599 1.27 chs for (start = uio->uio_loffset; len > 0; start += PAGESIZE) { 600 1.27 chs int bytes = MIN(PAGESIZE, len); 601 1.27 chs 602 1.27 chs pp = vm_page_grab(obj, OFF_TO_IDX(start), VM_ALLOC_SBUSY | 603 1.27 chs VM_ALLOC_NORMAL | VM_ALLOC_IGN_SBUSY); 604 1.27 chs if (pp->valid == 0) { 605 1.27 chs zfs_vmobject_wunlock(obj); 606 1.27 chs va = zfs_map_page(pp, &sf); 607 1.27 chs error = dmu_read(os, zp->z_id, start, bytes, va, 608 1.27 chs DMU_READ_PREFETCH); 609 1.27 chs if (bytes != PAGESIZE && error == 0) 610 1.27 chs bzero(va + bytes, PAGESIZE - bytes); 611 1.27 chs zfs_unmap_page(sf); 612 1.27 chs zfs_vmobject_wlock(obj); 613 1.27 chs vm_page_sunbusy(pp); 614 1.27 chs vm_page_lock(pp); 615 1.27 chs if (error) { 616 1.27 chs if (pp->wire_count == 0 && pp->valid == 0 && 617 1.27 chs !vm_page_busied(pp)) 618 1.27 chs vm_page_free(pp); 619 1.27 chs } else { 620 1.27 chs pp->valid = VM_PAGE_BITS_ALL; 621 1.27 chs vm_page_activate(pp); 622 1.27 chs } 623 1.27 chs vm_page_unlock(pp); 624 1.27 chs } else { 625 1.27 chs ASSERT3U(pp->valid, ==, VM_PAGE_BITS_ALL); 626 1.27 chs vm_page_sunbusy(pp); 627 1.27 chs } 628 1.27 chs if (error) 629 1.27 chs break; 630 1.27 chs uio->uio_resid -= bytes; 631 1.27 chs uio->uio_offset += bytes; 632 1.27 chs len -= bytes; 633 1.27 chs } 634 1.27 chs zfs_vmobject_wunlock(obj); 635 1.1 haad return (error); 636 1.1 haad } 637 1.1 haad 638 1.1 haad /* 639 1.1 haad * When a file is memory mapped, we must keep the IO data synchronized 640 1.1 haad * between the DMU cache and the memory mapped pages. What this means: 641 1.1 haad * 642 1.1 haad * On Read: We "read" preferentially from memory mapped pages, 643 1.1 haad * else we default from the dmu buffer. 644 1.1 haad * 645 1.1 haad * NOTE: We will always "break up" the IO into PAGESIZE uiomoves when 646 1.27 chs * the file is memory mapped. 647 1.1 haad */ 648 1.1 haad static int 649 1.1 haad mappedread(vnode_t *vp, int nbytes, uio_t *uio) 650 1.1 haad { 651 1.1 haad znode_t *zp = VTOZ(vp); 652 1.2 haad vm_object_t obj; 653 1.27 chs int64_t start; 654 1.2 haad caddr_t va; 655 1.1 haad int len = nbytes; 656 1.27 chs int off; 657 1.1 haad int error = 0; 658 1.2 haad 659 1.2 haad ASSERT(vp->v_mount != NULL); 660 1.2 haad obj = vp->v_object; 661 1.2 haad ASSERT(obj != NULL); 662 1.1 haad 663 1.1 haad start = uio->uio_loffset; 664 1.1 haad off = start & PAGEOFFSET; 665 1.27 chs zfs_vmobject_wlock(obj); 666 1.27 chs for (start &= PAGEMASK; len > 0; start += PAGESIZE) { 667 1.27 chs vm_page_t pp; 668 1.27 chs uint64_t bytes = MIN(PAGESIZE - off, len); 669 1.27 chs 670 1.27 chs if (pp = page_hold(vp, start)) { 671 1.27 chs struct sf_buf *sf; 672 1.27 chs caddr_t va; 673 1.27 chs 674 1.27 chs zfs_vmobject_wunlock(obj); 675 1.27 chs va = zfs_map_page(pp, &sf); 676 1.27 chs #ifdef illumos 677 1.27 chs error = uiomove(va + off, bytes, UIO_READ, uio); 678 1.27 chs #else 679 1.27 chs error = vn_io_fault_uiomove(va + off, bytes, uio); 680 1.27 chs #endif 681 1.27 chs zfs_unmap_page(sf); 682 1.27 chs zfs_vmobject_wlock(obj); 683 1.27 chs page_unhold(pp); 684 1.27 chs } else { 685 1.27 chs zfs_vmobject_wunlock(obj); 686 1.27 chs error = dmu_read_uio_dbuf(sa_get_db(zp->z_sa_hdl), 687 1.27 chs uio, bytes); 688 1.27 chs zfs_vmobject_wlock(obj); 689 1.27 chs } 690 1.27 chs len -= bytes; 691 1.27 chs off = 0; 692 1.27 chs if (error) 693 1.27 chs break; 694 1.27 chs } 695 1.27 chs zfs_vmobject_wunlock(obj); 696 1.27 chs return (error); 697 1.27 chs } 698 1.27 chs #endif /* __FreeBSD__ */ 699 1.27 chs 700 1.27 chs #ifdef __NetBSD__ 701 1.27 chs 702 1.27 chs caddr_t 703 1.27 chs zfs_map_page(page_t *pp, enum seg_rw rw) 704 1.27 chs { 705 1.27 chs vaddr_t va; 706 1.27 chs int flags; 707 1.27 chs 708 1.27 chs #ifdef __HAVE_MM_MD_DIRECT_MAPPED_PHYS 709 1.27 chs if (mm_md_direct_mapped_phys(VM_PAGE_TO_PHYS(pp), &va)) 710 1.27 chs return (caddr_t)va; 711 1.27 chs #endif 712 1.27 chs 713 1.27 chs flags = UVMPAGER_MAPIN_WAITOK | 714 1.27 chs (rw == S_READ ? UVMPAGER_MAPIN_WRITE : UVMPAGER_MAPIN_READ); 715 1.27 chs va = uvm_pagermapin(&pp, 1, flags); 716 1.27 chs return (caddr_t)va; 717 1.27 chs } 718 1.27 chs 719 1.27 chs void 720 1.27 chs zfs_unmap_page(page_t *pp, caddr_t addr) 721 1.27 chs { 722 1.27 chs 723 1.27 chs #ifdef __HAVE_MM_MD_DIRECT_MAPPED_PHYS 724 1.27 chs vaddr_t va; 725 1.27 chs 726 1.27 chs if (mm_md_direct_mapped_phys(VM_PAGE_TO_PHYS(pp), &va)) 727 1.27 chs return; 728 1.27 chs #endif 729 1.27 chs uvm_pagermapout((vaddr_t)addr, 1); 730 1.27 chs } 731 1.27 chs 732 1.27 chs static int 733 1.27 chs mappedread(vnode_t *vp, int nbytes, uio_t *uio) 734 1.27 chs { 735 1.27 chs znode_t *zp = VTOZ(vp); 736 1.27 chs struct uvm_object *uobj = &vp->v_uobj; 737 1.62 ad krwlock_t *rw = uobj->vmobjlock; 738 1.27 chs int64_t start; 739 1.27 chs caddr_t va; 740 1.27 chs size_t len = nbytes; 741 1.27 chs int off; 742 1.27 chs int error = 0; 743 1.27 chs int npages, found; 744 1.83 yamt void *buf = NULL; 745 1.27 chs 746 1.27 chs start = uio->uio_loffset; 747 1.27 chs off = start & PAGEOFFSET; 748 1.27 chs 749 1.1 haad for (start &= PAGEMASK; len > 0; start += PAGESIZE) { 750 1.27 chs page_t *pp; 751 1.1 haad uint64_t bytes = MIN(PAGESIZE - off, len); 752 1.83 yamt retry: 753 1.27 chs pp = NULL; 754 1.27 chs npages = 1; 755 1.62 ad rw_enter(rw, RW_WRITER); 756 1.55 ad found = uvn_findpages(uobj, start, &npages, &pp, NULL, 757 1.55 ad UFP_NOALLOC); 758 1.62 ad rw_exit(rw); 759 1.27 chs 760 1.27 chs if (found) { 761 1.83 yamt if (buf != NULL) { 762 1.83 yamt va = zfs_map_page(pp, S_READ); 763 1.83 yamt memcpy(buf, va + off, bytes); 764 1.83 yamt zfs_unmap_page(pp, va); 765 1.83 yamt } 766 1.68 ad rw_enter(rw, RW_WRITER); 767 1.68 ad uvm_page_unbusy(&pp, 1); 768 1.68 ad rw_exit(rw); 769 1.83 yamt if (buf == NULL) { 770 1.83 yamt buf = kmem_alloc(PAGESIZE, KM_SLEEP); 771 1.83 yamt goto retry; 772 1.83 yamt } 773 1.83 yamt error = uiomove(buf, bytes, UIO_READ, uio); 774 1.1 haad } else { 775 1.27 chs error = dmu_read_uio_dbuf(sa_get_db(zp->z_sa_hdl), 776 1.27 chs uio, bytes); 777 1.1 haad } 778 1.27 chs 779 1.1 haad len -= bytes; 780 1.1 haad off = 0; 781 1.1 haad if (error) 782 1.1 haad break; 783 1.1 haad } 784 1.83 yamt if (buf != NULL) { 785 1.83 yamt kmem_free(buf, PAGESIZE); 786 1.83 yamt } 787 1.1 haad return (error); 788 1.1 haad } 789 1.27 chs 790 1.27 chs static void 791 1.27 chs update_pages(vnode_t *vp, int64_t start, int len, objset_t *os, uint64_t oid, 792 1.27 chs int segflg, dmu_tx_t *tx) 793 1.27 chs { 794 1.27 chs struct uvm_object *uobj = &vp->v_uobj; 795 1.62 ad krwlock_t *rw = uobj->vmobjlock; 796 1.27 chs caddr_t va; 797 1.56 ad int off, status; 798 1.27 chs 799 1.27 chs ASSERT(vp->v_mount != NULL); 800 1.27 chs 801 1.62 ad rw_enter(rw, RW_WRITER); 802 1.27 chs 803 1.27 chs off = start & PAGEOFFSET; 804 1.27 chs for (start &= PAGEMASK; len > 0; start += PAGESIZE) { 805 1.27 chs page_t *pp; 806 1.27 chs int nbytes = MIN(PAGESIZE - off, len); 807 1.27 chs int npages, found; 808 1.27 chs 809 1.27 chs pp = NULL; 810 1.27 chs npages = 1; 811 1.55 ad found = uvn_findpages(uobj, start, &npages, &pp, NULL, 812 1.55 ad UFP_NOALLOC); 813 1.27 chs if (found) { 814 1.84 yamt if (nbytes == PAGESIZE) { 815 1.84 yamt /* 816 1.84 yamt * We're about to zap the page's contents 817 1.84 yamt * and don't care about any existing 818 1.84 yamt * modifications. We must keep track of 819 1.84 yamt * any new modifications past this point. 820 1.84 yamt * Clear the modified bit in the pmap, and 821 1.84 yamt * if the page is marked dirty revert to 822 1.84 yamt * tracking the modified bit. 823 1.84 yamt */ 824 1.84 yamt switch (uvm_pagegetdirty(pp)) { 825 1.84 yamt case UVM_PAGE_STATUS_DIRTY: 826 1.84 yamt /* Does pmap_clear_modify(). */ 827 1.84 yamt uvm_pagemarkdirty(pp, UVM_PAGE_STATUS_UNKNOWN); 828 1.84 yamt break; 829 1.84 yamt case UVM_PAGE_STATUS_UNKNOWN: 830 1.84 yamt pmap_clear_modify(pp); 831 1.84 yamt break; 832 1.84 yamt case UVM_PAGE_STATUS_CLEAN: 833 1.84 yamt /* Nothing to do. */ 834 1.84 yamt break; 835 1.84 yamt } 836 1.56 ad } 837 1.62 ad rw_exit(rw); 838 1.27 chs 839 1.27 chs va = zfs_map_page(pp, S_WRITE); 840 1.27 chs (void) dmu_read(os, oid, start + off, nbytes, 841 1.27 chs va + off, DMU_READ_PREFETCH); 842 1.27 chs zfs_unmap_page(pp, va); 843 1.27 chs 844 1.62 ad rw_enter(rw, RW_WRITER); 845 1.27 chs uvm_page_unbusy(&pp, 1); 846 1.27 chs } 847 1.27 chs len -= nbytes; 848 1.27 chs off = 0; 849 1.27 chs } 850 1.62 ad rw_exit(rw); 851 1.27 chs } 852 1.27 chs #endif /* __NetBSD__ */ 853 1.27 chs 854 1.1 haad offset_t zfs_read_chunk_size = 1024 * 1024; /* Tunable */ 855 1.1 haad 856 1.1 haad /* 857 1.1 haad * Read bytes from specified file into supplied buffer. 858 1.1 haad * 859 1.1 haad * IN: vp - vnode of file to be read from. 860 1.1 haad * uio - structure supplying read location, range info, 861 1.1 haad * and return buffer. 862 1.1 haad * ioflag - SYNC flags; used to provide FRSYNC semantics. 863 1.1 haad * cr - credentials of caller. 864 1.1 haad * ct - caller context 865 1.1 haad * 866 1.1 haad * OUT: uio - updated offset and range, buffer filled. 867 1.1 haad * 868 1.27 chs * RETURN: 0 on success, error code on failure. 869 1.1 haad * 870 1.1 haad * Side Effects: 871 1.1 haad * vp - atime updated if byte count > 0 872 1.1 haad */ 873 1.1 haad /* ARGSUSED */ 874 1.1 haad static int 875 1.1 haad zfs_read(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct) 876 1.1 haad { 877 1.1 haad znode_t *zp = VTOZ(vp); 878 1.1 haad zfsvfs_t *zfsvfs = zp->z_zfsvfs; 879 1.1 haad ssize_t n, nbytes; 880 1.27 chs int error = 0; 881 1.1 haad rl_t *rl; 882 1.4 haad xuio_t *xuio = NULL; 883 1.1 haad 884 1.1 haad ZFS_ENTER(zfsvfs); 885 1.1 haad ZFS_VERIFY_ZP(zp); 886 1.1 haad 887 1.27 chs if (zp->z_pflags & ZFS_AV_QUARANTINED) { 888 1.1 haad ZFS_EXIT(zfsvfs); 889 1.27 chs return (SET_ERROR(EACCES)); 890 1.1 haad } 891 1.1 haad 892 1.1 haad /* 893 1.1 haad * Validate file offset 894 1.1 haad */ 895 1.1 haad if (uio->uio_loffset < (offset_t)0) { 896 1.1 haad ZFS_EXIT(zfsvfs); 897 1.27 chs return (SET_ERROR(EINVAL)); 898 1.1 haad } 899 1.1 haad 900 1.1 haad /* 901 1.1 haad * Fasttrack empty reads 902 1.1 haad */ 903 1.1 haad if (uio->uio_resid == 0) { 904 1.1 haad ZFS_EXIT(zfsvfs); 905 1.1 haad return (0); 906 1.1 haad } 907 1.1 haad 908 1.1 haad /* 909 1.1 haad * Check for mandatory locks 910 1.1 haad */ 911 1.27 chs if (MANDMODE(zp->z_mode)) { 912 1.1 haad if (error = chklock(vp, FREAD, 913 1.1 haad uio->uio_loffset, uio->uio_resid, uio->uio_fmode, ct)) { 914 1.1 haad ZFS_EXIT(zfsvfs); 915 1.1 haad return (error); 916 1.1 haad } 917 1.1 haad } 918 1.1 haad 919 1.1 haad /* 920 1.1 haad * If we're in FRSYNC mode, sync out this znode before reading it. 921 1.1 haad */ 922 1.27 chs if (zfsvfs->z_log && 923 1.27 chs (ioflag & FRSYNC || zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)) 924 1.27 chs zil_commit(zfsvfs->z_log, zp->z_id); 925 1.1 haad 926 1.1 haad /* 927 1.1 haad * Lock the range against changes. 928 1.1 haad */ 929 1.1 haad rl = zfs_range_lock(zp, uio->uio_loffset, uio->uio_resid, RL_READER); 930 1.1 haad 931 1.1 haad /* 932 1.1 haad * If we are reading past end-of-file we can skip 933 1.1 haad * to the end; but we might still need to set atime. 934 1.1 haad */ 935 1.27 chs if (uio->uio_loffset >= zp->z_size) { 936 1.1 haad error = 0; 937 1.1 haad goto out; 938 1.1 haad } 939 1.1 haad 940 1.27 chs ASSERT(uio->uio_loffset < zp->z_size); 941 1.27 chs n = MIN(uio->uio_resid, zp->z_size - uio->uio_loffset); 942 1.27 chs 943 1.27 chs #ifdef illumos 944 1.4 haad if ((uio->uio_extflg == UIO_XUIO) && 945 1.4 haad (((xuio_t *)uio)->xu_type == UIOTYPE_ZEROCOPY)) { 946 1.4 haad int nblk; 947 1.4 haad int blksz = zp->z_blksz; 948 1.4 haad uint64_t offset = uio->uio_loffset; 949 1.4 haad 950 1.4 haad xuio = (xuio_t *)uio; 951 1.4 haad if ((ISP2(blksz))) { 952 1.4 haad nblk = (P2ROUNDUP(offset + n, blksz) - P2ALIGN(offset, 953 1.4 haad blksz)) / blksz; 954 1.4 haad } else { 955 1.4 haad ASSERT(offset + n <= blksz); 956 1.4 haad nblk = 1; 957 1.4 haad } 958 1.4 haad (void) dmu_xuio_init(xuio, nblk); 959 1.1 haad 960 1.4 haad if (vn_has_cached_data(vp)) { 961 1.4 haad /* 962 1.4 haad * For simplicity, we always allocate a full buffer 963 1.4 haad * even if we only expect to read a portion of a block. 964 1.4 haad */ 965 1.4 haad while (--nblk >= 0) { 966 1.4 haad (void) dmu_xuio_add(xuio, 967 1.27 chs dmu_request_arcbuf(sa_get_db(zp->z_sa_hdl), 968 1.27 chs blksz), 0, blksz); 969 1.4 haad } 970 1.4 haad } 971 1.4 haad } 972 1.27 chs #endif /* illumos */ 973 1.27 chs 974 1.1 haad while (n > 0) { 975 1.1 haad nbytes = MIN(n, zfs_read_chunk_size - 976 1.1 haad P2PHASE(uio->uio_loffset, zfs_read_chunk_size)); 977 1.1 haad 978 1.27 chs #ifdef __FreeBSD__ 979 1.27 chs if (uio->uio_segflg == UIO_NOCOPY) 980 1.27 chs error = mappedread_sf(vp, nbytes, uio); 981 1.27 chs else 982 1.27 chs #endif /* __FreeBSD__ */ 983 1.27 chs if (vn_has_cached_data(vp)) { 984 1.27 chs error = mappedread(vp, nbytes, uio); 985 1.27 chs } else { 986 1.27 chs error = dmu_read_uio_dbuf(sa_get_db(zp->z_sa_hdl), 987 1.27 chs uio, nbytes); 988 1.27 chs } 989 1.1 haad if (error) { 990 1.1 haad /* convert checksum errors into IO errors */ 991 1.1 haad if (error == ECKSUM) 992 1.27 chs error = SET_ERROR(EIO); 993 1.1 haad break; 994 1.1 haad } 995 1.1 haad 996 1.1 haad n -= nbytes; 997 1.1 haad } 998 1.1 haad out: 999 1.1 haad zfs_range_unlock(rl); 1000 1.1 haad 1001 1.1 haad ZFS_ACCESSTIME_STAMP(zfsvfs, zp); 1002 1.1 haad ZFS_EXIT(zfsvfs); 1003 1.1 haad return (error); 1004 1.1 haad } 1005 1.1 haad 1006 1.1 haad /* 1007 1.1 haad * Write the bytes to a file. 1008 1.1 haad * 1009 1.1 haad * IN: vp - vnode of file to be written to. 1010 1.1 haad * uio - structure supplying write location, range info, 1011 1.1 haad * and data buffer. 1012 1.27 chs * ioflag - FAPPEND, FSYNC, and/or FDSYNC. FAPPEND is 1013 1.27 chs * set if in append mode. 1014 1.1 haad * cr - credentials of caller. 1015 1.1 haad * ct - caller context (NFS/CIFS fem monitor only) 1016 1.1 haad * 1017 1.1 haad * OUT: uio - updated offset and range. 1018 1.1 haad * 1019 1.27 chs * RETURN: 0 on success, error code on failure. 1020 1.1 haad * 1021 1.1 haad * Timestamps: 1022 1.1 haad * vp - ctime|mtime updated if byte count > 0 1023 1.1 haad */ 1024 1.27 chs 1025 1.1 haad /* ARGSUSED */ 1026 1.1 haad static int 1027 1.1 haad zfs_write(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct) 1028 1.1 haad { 1029 1.1 haad znode_t *zp = VTOZ(vp); 1030 1.2 haad rlim64_t limit = MAXOFFSET_T; 1031 1.1 haad ssize_t start_resid = uio->uio_resid; 1032 1.1 haad ssize_t tx_bytes; 1033 1.1 haad uint64_t end_size; 1034 1.1 haad dmu_tx_t *tx; 1035 1.1 haad zfsvfs_t *zfsvfs = zp->z_zfsvfs; 1036 1.1 haad zilog_t *zilog; 1037 1.1 haad offset_t woff; 1038 1.1 haad ssize_t n, nbytes; 1039 1.1 haad rl_t *rl; 1040 1.1 haad int max_blksz = zfsvfs->z_max_blksz; 1041 1.27 chs int error = 0; 1042 1.4 haad arc_buf_t *abuf; 1043 1.27 chs iovec_t *aiov = NULL; 1044 1.4 haad xuio_t *xuio = NULL; 1045 1.4 haad int i_iov = 0; 1046 1.4 haad int iovcnt = uio->uio_iovcnt; 1047 1.4 haad iovec_t *iovp = uio->uio_iov; 1048 1.4 haad int write_eof; 1049 1.27 chs int count = 0; 1050 1.27 chs sa_bulk_attr_t bulk[4]; 1051 1.27 chs uint64_t mtime[2], ctime[2]; 1052 1.27 chs int segflg; 1053 1.99 yamt boolean_t commit; 1054 1.1 haad 1055 1.27 chs #ifdef __NetBSD__ 1056 1.27 chs segflg = VMSPACE_IS_KERNEL_P(uio->uio_vmspace) ? 1057 1.27 chs UIO_SYSSPACE : UIO_USERSPACE; 1058 1.27 chs #else 1059 1.27 chs segflg = uio->uio_segflg; 1060 1.27 chs #endif 1061 1.2 haad 1062 1.1 haad /* 1063 1.1 haad * Fasttrack empty write 1064 1.1 haad */ 1065 1.1 haad n = start_resid; 1066 1.1 haad if (n == 0) 1067 1.1 haad return (0); 1068 1.1 haad 1069 1.1 haad if (limit == RLIM64_INFINITY || limit > MAXOFFSET_T) 1070 1.1 haad limit = MAXOFFSET_T; 1071 1.1 haad 1072 1.1 haad ZFS_ENTER(zfsvfs); 1073 1.1 haad ZFS_VERIFY_ZP(zp); 1074 1.1 haad 1075 1.27 chs SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, &mtime, 16); 1076 1.27 chs SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, &ctime, 16); 1077 1.27 chs SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zfsvfs), NULL, 1078 1.27 chs &zp->z_size, 8); 1079 1.27 chs SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL, 1080 1.27 chs &zp->z_pflags, 8); 1081 1.27 chs 1082 1.1 haad /* 1083 1.27 chs * In a case vp->v_vfsp != zp->z_zfsvfs->z_vfs (e.g. snapshots) our 1084 1.27 chs * callers might not be able to detect properly that we are read-only, 1085 1.27 chs * so check it explicitly here. 1086 1.1 haad */ 1087 1.27 chs if (zfsvfs->z_vfs->vfs_flag & VFS_RDONLY) { 1088 1.1 haad ZFS_EXIT(zfsvfs); 1089 1.27 chs return (SET_ERROR(EROFS)); 1090 1.1 haad } 1091 1.27 chs 1092 1.27 chs /* 1093 1.27 chs * If immutable or not appending then return EPERM 1094 1.27 chs */ 1095 1.27 chs if ((zp->z_pflags & (ZFS_IMMUTABLE | ZFS_READONLY)) || 1096 1.27 chs ((zp->z_pflags & ZFS_APPENDONLY) && !(ioflag & FAPPEND) && 1097 1.27 chs (uio->uio_loffset < zp->z_size))) { 1098 1.27 chs ZFS_EXIT(zfsvfs); 1099 1.27 chs return (SET_ERROR(EPERM)); 1100 1.27 chs } 1101 1.27 chs 1102 1.27 chs zilog = zfsvfs->z_log; 1103 1.1 haad 1104 1.1 haad /* 1105 1.4 haad * Validate file offset 1106 1.4 haad */ 1107 1.27 chs woff = ioflag & FAPPEND ? zp->z_size : uio->uio_loffset; 1108 1.4 haad if (woff < 0) { 1109 1.4 haad ZFS_EXIT(zfsvfs); 1110 1.27 chs return (SET_ERROR(EINVAL)); 1111 1.4 haad } 1112 1.4 haad 1113 1.4 haad /* 1114 1.4 haad * Check for mandatory locks before calling zfs_range_lock() 1115 1.4 haad * in order to prevent a deadlock with locks set via fcntl(). 1116 1.4 haad */ 1117 1.27 chs if (MANDMODE((mode_t)zp->z_mode) && 1118 1.4 haad (error = chklock(vp, FWRITE, woff, n, uio->uio_fmode, ct)) != 0) { 1119 1.4 haad ZFS_EXIT(zfsvfs); 1120 1.4 haad return (error); 1121 1.4 haad } 1122 1.4 haad 1123 1.27 chs #ifdef illumos 1124 1.4 haad /* 1125 1.1 haad * Pre-fault the pages to ensure slow (eg NFS) pages 1126 1.1 haad * don't hold up txg. 1127 1.4 haad * Skip this if uio contains loaned arc_buf. 1128 1.1 haad */ 1129 1.27 chs if ((uio->uio_extflg == UIO_XUIO) && 1130 1.27 chs (((xuio_t *)uio)->xu_type == UIOTYPE_ZEROCOPY)) 1131 1.27 chs xuio = (xuio_t *)uio; 1132 1.27 chs else 1133 1.27 chs uio_prefaultpages(MIN(n, max_blksz), uio); 1134 1.27 chs #endif 1135 1.1 haad 1136 1.1 haad /* 1137 1.1 haad * If in append mode, set the io offset pointer to eof. 1138 1.1 haad */ 1139 1.27 chs if (ioflag & FAPPEND) { 1140 1.1 haad /* 1141 1.4 haad * Obtain an appending range lock to guarantee file append 1142 1.4 haad * semantics. We reset the write offset once we have the lock. 1143 1.1 haad */ 1144 1.1 haad rl = zfs_range_lock(zp, 0, n, RL_APPEND); 1145 1.4 haad woff = rl->r_off; 1146 1.1 haad if (rl->r_len == UINT64_MAX) { 1147 1.4 haad /* 1148 1.4 haad * We overlocked the file because this write will cause 1149 1.4 haad * the file block size to increase. 1150 1.4 haad * Note that zp_size cannot change with this lock held. 1151 1.4 haad */ 1152 1.27 chs woff = zp->z_size; 1153 1.1 haad } 1154 1.4 haad uio->uio_loffset = woff; 1155 1.1 haad } else { 1156 1.1 haad /* 1157 1.4 haad * Note that if the file block size will change as a result of 1158 1.4 haad * this write, then this range lock will lock the entire file 1159 1.4 haad * so that we can re-write the block safely. 1160 1.1 haad */ 1161 1.1 haad rl = zfs_range_lock(zp, woff, n, RL_WRITER); 1162 1.1 haad } 1163 1.1 haad 1164 1.27 chs #ifdef illumos 1165 1.1 haad if (woff >= limit) { 1166 1.1 haad zfs_range_unlock(rl); 1167 1.1 haad ZFS_EXIT(zfsvfs); 1168 1.27 chs return (SET_ERROR(EFBIG)); 1169 1.27 chs } 1170 1.27 chs 1171 1.27 chs #endif 1172 1.27 chs #ifdef __FreeBSD__ 1173 1.27 chs if (vn_rlimit_fsize(vp, uio, uio->uio_td)) { 1174 1.27 chs zfs_range_unlock(rl); 1175 1.27 chs ZFS_EXIT(zfsvfs); 1176 1.27 chs return (SET_ERROR(EFBIG)); 1177 1.1 haad } 1178 1.27 chs #endif 1179 1.27 chs #ifdef __NetBSD__ 1180 1.27 chs /* XXXNETBSD we might need vn_rlimit_fsize() too here eventually */ 1181 1.27 chs #endif 1182 1.1 haad 1183 1.1 haad if ((woff + n) > limit || woff > (limit - n)) 1184 1.1 haad n = limit - woff; 1185 1.1 haad 1186 1.4 haad /* Will this write extend the file length? */ 1187 1.27 chs write_eof = (woff + n > zp->z_size); 1188 1.4 haad 1189 1.27 chs end_size = MAX(zp->z_size, woff + n); 1190 1.1 haad 1191 1.99 yamt commit = ((ioflag & (FSYNC | FDSYNC)) != 0 || 1192 1.99 yamt zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS); 1193 1.99 yamt 1194 1.1 haad /* 1195 1.1 haad * Write the file in reasonable size chunks. Each chunk is written 1196 1.1 haad * in a separate transaction; this keeps the intent log records small 1197 1.1 haad * and allows us to do more fine-grained space accounting. 1198 1.1 haad */ 1199 1.1 haad while (n > 0) { 1200 1.4 haad abuf = NULL; 1201 1.4 haad woff = uio->uio_loffset; 1202 1.27 chs if (zfs_owner_overquota(zfsvfs, zp, B_FALSE) || 1203 1.27 chs zfs_owner_overquota(zfsvfs, zp, B_TRUE)) { 1204 1.4 haad if (abuf != NULL) 1205 1.4 haad dmu_return_arcbuf(abuf); 1206 1.27 chs error = SET_ERROR(EDQUOT); 1207 1.4 haad break; 1208 1.4 haad } 1209 1.4 haad 1210 1.4 haad if (xuio && abuf == NULL) { 1211 1.4 haad ASSERT(i_iov < iovcnt); 1212 1.4 haad aiov = &iovp[i_iov]; 1213 1.4 haad abuf = dmu_xuio_arcbuf(xuio, i_iov); 1214 1.4 haad dmu_xuio_clear(xuio, i_iov); 1215 1.4 haad DTRACE_PROBE3(zfs_cp_write, int, i_iov, 1216 1.4 haad iovec_t *, aiov, arc_buf_t *, abuf); 1217 1.4 haad ASSERT((aiov->iov_base == abuf->b_data) || 1218 1.4 haad ((char *)aiov->iov_base - (char *)abuf->b_data + 1219 1.4 haad aiov->iov_len == arc_buf_size(abuf))); 1220 1.4 haad i_iov++; 1221 1.4 haad } else if (abuf == NULL && n >= max_blksz && 1222 1.27 chs woff >= zp->z_size && 1223 1.4 haad P2PHASE(woff, max_blksz) == 0 && 1224 1.4 haad zp->z_blksz == max_blksz) { 1225 1.4 haad /* 1226 1.4 haad * This write covers a full block. "Borrow" a buffer 1227 1.4 haad * from the dmu so that we can fill it before we enter 1228 1.4 haad * a transaction. This avoids the possibility of 1229 1.4 haad * holding up the transaction if the data copy hangs 1230 1.4 haad * up on a pagefault (e.g., from an NFS server mapping). 1231 1.4 haad */ 1232 1.36 hannken #if defined(illumos) || defined(__NetBSD__) 1233 1.4 haad size_t cbytes; 1234 1.27 chs #endif 1235 1.4 haad 1236 1.27 chs abuf = dmu_request_arcbuf(sa_get_db(zp->z_sa_hdl), 1237 1.27 chs max_blksz); 1238 1.4 haad ASSERT(abuf != NULL); 1239 1.4 haad ASSERT(arc_buf_size(abuf) == max_blksz); 1240 1.36 hannken #if defined(illumos) || defined(__NetBSD__) 1241 1.4 haad if (error = uiocopy(abuf->b_data, max_blksz, 1242 1.4 haad UIO_WRITE, uio, &cbytes)) { 1243 1.4 haad dmu_return_arcbuf(abuf); 1244 1.4 haad break; 1245 1.4 haad } 1246 1.4 haad ASSERT(cbytes == max_blksz); 1247 1.27 chs #endif 1248 1.27 chs #ifdef __FreeBSD__ 1249 1.27 chs ssize_t resid = uio->uio_resid; 1250 1.27 chs 1251 1.27 chs error = vn_io_fault_uiomove(abuf->b_data, max_blksz, uio); 1252 1.27 chs if (error != 0) { 1253 1.27 chs uio->uio_offset -= resid - uio->uio_resid; 1254 1.27 chs uio->uio_resid = resid; 1255 1.27 chs dmu_return_arcbuf(abuf); 1256 1.27 chs break; 1257 1.27 chs } 1258 1.27 chs #endif 1259 1.4 haad } 1260 1.4 haad 1261 1.1 haad /* 1262 1.1 haad * Start a transaction. 1263 1.1 haad */ 1264 1.1 haad tx = dmu_tx_create(zfsvfs->z_os); 1265 1.27 chs dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 1266 1.1 haad dmu_tx_hold_write(tx, zp->z_id, woff, MIN(n, max_blksz)); 1267 1.27 chs zfs_sa_upgrade_txholds(tx, zp); 1268 1.27 chs error = dmu_tx_assign(tx, TXG_WAIT); 1269 1.1 haad if (error) { 1270 1.1 haad dmu_tx_abort(tx); 1271 1.4 haad if (abuf != NULL) 1272 1.4 haad dmu_return_arcbuf(abuf); 1273 1.1 haad break; 1274 1.1 haad } 1275 1.1 haad 1276 1.1 haad /* 1277 1.1 haad * If zfs_range_lock() over-locked we grow the blocksize 1278 1.1 haad * and then reduce the lock range. This will only happen 1279 1.1 haad * on the first iteration since zfs_range_reduce() will 1280 1.1 haad * shrink down r_len to the appropriate size. 1281 1.1 haad */ 1282 1.1 haad if (rl->r_len == UINT64_MAX) { 1283 1.1 haad uint64_t new_blksz; 1284 1.1 haad 1285 1.1 haad if (zp->z_blksz > max_blksz) { 1286 1.27 chs /* 1287 1.27 chs * File's blocksize is already larger than the 1288 1.27 chs * "recordsize" property. Only let it grow to 1289 1.27 chs * the next power of 2. 1290 1.27 chs */ 1291 1.1 haad ASSERT(!ISP2(zp->z_blksz)); 1292 1.27 chs new_blksz = MIN(end_size, 1293 1.27 chs 1 << highbit64(zp->z_blksz)); 1294 1.1 haad } else { 1295 1.1 haad new_blksz = MIN(end_size, max_blksz); 1296 1.1 haad } 1297 1.1 haad zfs_grow_blocksize(zp, new_blksz, tx); 1298 1.1 haad zfs_range_reduce(rl, woff, n); 1299 1.1 haad } 1300 1.1 haad 1301 1.1 haad /* 1302 1.1 haad * XXX - should we really limit each write to z_max_blksz? 1303 1.1 haad * Perhaps we should use SPA_MAXBLOCKSIZE chunks? 1304 1.1 haad */ 1305 1.1 haad nbytes = MIN(n, max_blksz - P2PHASE(woff, max_blksz)); 1306 1.2 haad 1307 1.27 chs if (woff + nbytes > zp->z_size) 1308 1.27 chs vnode_pager_setsize(vp, woff + nbytes); 1309 1.27 chs 1310 1.4 haad if (abuf == NULL) { 1311 1.4 haad tx_bytes = uio->uio_resid; 1312 1.27 chs error = dmu_write_uio_dbuf(sa_get_db(zp->z_sa_hdl), 1313 1.27 chs uio, nbytes, tx); 1314 1.4 haad tx_bytes -= uio->uio_resid; 1315 1.1 haad } else { 1316 1.4 haad tx_bytes = nbytes; 1317 1.4 haad ASSERT(xuio == NULL || tx_bytes == aiov->iov_len); 1318 1.4 haad /* 1319 1.4 haad * If this is not a full block write, but we are 1320 1.4 haad * extending the file past EOF and this data starts 1321 1.4 haad * block-aligned, use assign_arcbuf(). Otherwise, 1322 1.4 haad * write via dmu_write(). 1323 1.4 haad */ 1324 1.4 haad if (tx_bytes < max_blksz && (!write_eof || 1325 1.4 haad aiov->iov_base != abuf->b_data)) { 1326 1.4 haad ASSERT(xuio); 1327 1.4 haad dmu_write(zfsvfs->z_os, zp->z_id, woff, 1328 1.4 haad aiov->iov_len, aiov->iov_base, tx); 1329 1.4 haad dmu_return_arcbuf(abuf); 1330 1.4 haad xuio_stat_wbuf_copied(); 1331 1.4 haad } else { 1332 1.4 haad ASSERT(xuio || tx_bytes == max_blksz); 1333 1.27 chs dmu_assign_arcbuf(sa_get_db(zp->z_sa_hdl), 1334 1.27 chs woff, abuf, tx); 1335 1.4 haad } 1336 1.36 hannken #if defined(illumos) || defined(__NetBSD__) 1337 1.4 haad ASSERT(tx_bytes <= uio->uio_resid); 1338 1.4 haad uioskip(uio, tx_bytes); 1339 1.27 chs #endif 1340 1.1 haad } 1341 1.4 haad if (tx_bytes && vn_has_cached_data(vp)) { 1342 1.27 chs update_pages(vp, woff, tx_bytes, zfsvfs->z_os, 1343 1.27 chs zp->z_id, segflg, tx); 1344 1.4 haad } 1345 1.27 chs 1346 1.1 haad /* 1347 1.1 haad * If we made no progress, we're done. If we made even 1348 1.1 haad * partial progress, update the znode and ZIL accordingly. 1349 1.1 haad */ 1350 1.1 haad if (tx_bytes == 0) { 1351 1.27 chs (void) sa_update(zp->z_sa_hdl, SA_ZPL_SIZE(zfsvfs), 1352 1.27 chs (void *)&zp->z_size, sizeof (uint64_t), tx); 1353 1.1 haad dmu_tx_commit(tx); 1354 1.1 haad ASSERT(error != 0); 1355 1.1 haad break; 1356 1.1 haad } 1357 1.1 haad 1358 1.1 haad /* 1359 1.1 haad * Clear Set-UID/Set-GID bits on successful write if not 1360 1.1 haad * privileged and at least one of the excute bits is set. 1361 1.1 haad * 1362 1.1 haad * It would be nice to to this after all writes have 1363 1.1 haad * been done, but that would still expose the ISUID/ISGID 1364 1.1 haad * to another app after the partial write is committed. 1365 1.1 haad * 1366 1.1 haad * Note: we don't call zfs_fuid_map_id() here because 1367 1.1 haad * user 0 is not an ephemeral uid. 1368 1.1 haad */ 1369 1.1 haad mutex_enter(&zp->z_acl_lock); 1370 1.27 chs if ((zp->z_mode & (S_IXUSR | (S_IXUSR >> 3) | 1371 1.1 haad (S_IXUSR >> 6))) != 0 && 1372 1.27 chs (zp->z_mode & (S_ISUID | S_ISGID)) != 0 && 1373 1.27 chs secpolicy_vnode_setid_retain(vp, cr, 1374 1.27 chs (zp->z_mode & S_ISUID) != 0 && zp->z_uid == 0) != 0) { 1375 1.27 chs uint64_t newmode; 1376 1.27 chs zp->z_mode &= ~(S_ISUID | S_ISGID); 1377 1.27 chs newmode = zp->z_mode; 1378 1.27 chs (void) sa_update(zp->z_sa_hdl, SA_ZPL_MODE(zfsvfs), 1379 1.27 chs (void *)&newmode, sizeof (uint64_t), tx); 1380 1.70 hannken #ifdef __NetBSD__ 1381 1.86 yamt if (zfsvfs->z_use_namecache) 1382 1.86 yamt cache_enter_id(vp, zp->z_mode, zp->z_uid, 1383 1.86 yamt zp->z_gid, true); 1384 1.70 hannken #endif 1385 1.1 haad } 1386 1.1 haad mutex_exit(&zp->z_acl_lock); 1387 1.1 haad 1388 1.27 chs zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime, 1389 1.27 chs B_TRUE); 1390 1.1 haad 1391 1.1 haad /* 1392 1.1 haad * Update the file size (zp_size) if it has changed; 1393 1.1 haad * account for possible concurrent updates. 1394 1.1 haad */ 1395 1.27 chs while ((end_size = zp->z_size) < uio->uio_loffset) { 1396 1.27 chs (void) atomic_cas_64(&zp->z_size, end_size, 1397 1.1 haad uio->uio_loffset); 1398 1.27 chs #ifdef illumos 1399 1.27 chs ASSERT(error == 0); 1400 1.27 chs #else 1401 1.27 chs ASSERT(error == 0 || error == EFAULT); 1402 1.27 chs #endif 1403 1.27 chs } 1404 1.27 chs /* 1405 1.27 chs * If we are replaying and eof is non zero then force 1406 1.27 chs * the file size to the specified eof. Note, there's no 1407 1.27 chs * concurrency during replay. 1408 1.27 chs */ 1409 1.27 chs if (zfsvfs->z_replay && zfsvfs->z_replay_eof != 0) 1410 1.27 chs zp->z_size = zfsvfs->z_replay_eof; 1411 1.27 chs 1412 1.27 chs if (error == 0) 1413 1.27 chs error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx); 1414 1.27 chs else 1415 1.27 chs (void) sa_bulk_update(zp->z_sa_hdl, bulk, count, tx); 1416 1.27 chs 1417 1.99 yamt zfs_log_write(zilog, tx, TX_WRITE, zp, woff, tx_bytes, commit); 1418 1.1 haad dmu_tx_commit(tx); 1419 1.1 haad 1420 1.1 haad if (error != 0) 1421 1.1 haad break; 1422 1.1 haad ASSERT(tx_bytes == nbytes); 1423 1.1 haad n -= nbytes; 1424 1.27 chs 1425 1.27 chs #ifdef illumos 1426 1.27 chs if (!xuio && n > 0) 1427 1.27 chs uio_prefaultpages(MIN(n, max_blksz), uio); 1428 1.27 chs #endif 1429 1.1 haad } 1430 1.1 haad 1431 1.1 haad zfs_range_unlock(rl); 1432 1.1 haad 1433 1.1 haad /* 1434 1.1 haad * If we're in replay mode, or we made no progress, return error. 1435 1.1 haad * Otherwise, it's at least a partial write, so it's successful. 1436 1.1 haad */ 1437 1.4 haad if (zfsvfs->z_replay || uio->uio_resid == start_resid) { 1438 1.1 haad ZFS_EXIT(zfsvfs); 1439 1.1 haad return (error); 1440 1.1 haad } 1441 1.1 haad 1442 1.27 chs #ifdef __FreeBSD__ 1443 1.27 chs /* 1444 1.27 chs * EFAULT means that at least one page of the source buffer was not 1445 1.27 chs * available. VFS will re-try remaining I/O upon this error. 1446 1.27 chs */ 1447 1.27 chs if (error == EFAULT) { 1448 1.27 chs ZFS_EXIT(zfsvfs); 1449 1.27 chs return (error); 1450 1.27 chs } 1451 1.27 chs #endif 1452 1.27 chs 1453 1.99 yamt if (commit) 1454 1.27 chs zil_commit(zilog, zp->z_id); 1455 1.1 haad 1456 1.1 haad ZFS_EXIT(zfsvfs); 1457 1.1 haad return (0); 1458 1.1 haad } 1459 1.1 haad 1460 1.1 haad void 1461 1.4 haad zfs_get_done(zgd_t *zgd, int error) 1462 1.1 haad { 1463 1.4 haad znode_t *zp = zgd->zgd_private; 1464 1.4 haad objset_t *os = zp->z_zfsvfs->z_os; 1465 1.4 haad 1466 1.4 haad if (zgd->zgd_db) 1467 1.4 haad dmu_buf_rele(zgd->zgd_db, zgd); 1468 1.4 haad 1469 1.4 haad zfs_range_unlock(zgd->zgd_rl); 1470 1.1 haad 1471 1.2 haad /* 1472 1.2 haad * Release the vnode asynchronously as we currently have the 1473 1.2 haad * txg stopped from syncing. 1474 1.2 haad */ 1475 1.92 yamt VN_RELE_ASYNC(ZTOV(zp), dsl_pool_vnrele_taskq(dmu_objset_pool(os))); 1476 1.4 haad 1477 1.4 haad if (error == 0 && zgd->zgd_bp) 1478 1.4 haad zil_add_block(zgd->zgd_zilog, zgd->zgd_bp); 1479 1.4 haad 1480 1.1 haad kmem_free(zgd, sizeof (zgd_t)); 1481 1.1 haad } 1482 1.1 haad 1483 1.4 haad #ifdef DEBUG 1484 1.4 haad static int zil_fault_io = 0; 1485 1.4 haad #endif 1486 1.4 haad 1487 1.1 haad /* 1488 1.1 haad * Get data to generate a TX_WRITE intent log record. 1489 1.1 haad */ 1490 1.1 haad int 1491 1.1 haad zfs_get_data(void *arg, lr_write_t *lr, char *buf, zio_t *zio) 1492 1.1 haad { 1493 1.1 haad zfsvfs_t *zfsvfs = arg; 1494 1.1 haad objset_t *os = zfsvfs->z_os; 1495 1.1 haad znode_t *zp; 1496 1.4 haad uint64_t object = lr->lr_foid; 1497 1.4 haad uint64_t offset = lr->lr_offset; 1498 1.4 haad uint64_t size = lr->lr_length; 1499 1.4 haad blkptr_t *bp = &lr->lr_blkptr; 1500 1.1 haad dmu_buf_t *db; 1501 1.1 haad zgd_t *zgd; 1502 1.1 haad int error = 0; 1503 1.1 haad 1504 1.4 haad ASSERT(zio != NULL); 1505 1.4 haad ASSERT(size != 0); 1506 1.1 haad 1507 1.1 haad /* 1508 1.1 haad * Nothing to do if the file has been removed 1509 1.1 haad */ 1510 1.92 yamt if (zfs_zget(zfsvfs, object, &zp) != 0) 1511 1.27 chs return (SET_ERROR(ENOENT)); 1512 1.1 haad if (zp->z_unlinked) { 1513 1.2 haad /* 1514 1.2 haad * Release the vnode asynchronously as we currently have the 1515 1.2 haad * txg stopped from syncing. 1516 1.2 haad */ 1517 1.92 yamt VN_RELE_ASYNC(ZTOV(zp), 1518 1.4 haad dsl_pool_vnrele_taskq(dmu_objset_pool(os))); 1519 1.27 chs return (SET_ERROR(ENOENT)); 1520 1.1 haad } 1521 1.1 haad 1522 1.4 haad zgd = (zgd_t *)kmem_zalloc(sizeof (zgd_t), KM_SLEEP); 1523 1.4 haad zgd->zgd_zilog = zfsvfs->z_log; 1524 1.4 haad zgd->zgd_private = zp; 1525 1.4 haad 1526 1.1 haad /* 1527 1.1 haad * Write records come in two flavors: immediate and indirect. 1528 1.1 haad * For small writes it's cheaper to store the data with the 1529 1.1 haad * log record (immediate); for large writes it's cheaper to 1530 1.1 haad * sync the data and get a pointer to it (indirect) so that 1531 1.1 haad * we don't have to write the data twice. 1532 1.1 haad */ 1533 1.1 haad if (buf != NULL) { /* immediate write */ 1534 1.4 haad zgd->zgd_rl = zfs_range_lock(zp, offset, size, RL_READER); 1535 1.1 haad /* test for truncation needs to be done while range locked */ 1536 1.27 chs if (offset >= zp->z_size) { 1537 1.27 chs error = SET_ERROR(ENOENT); 1538 1.4 haad } else { 1539 1.4 haad error = dmu_read(os, object, offset, size, buf, 1540 1.4 haad DMU_READ_NO_PREFETCH); 1541 1.1 haad } 1542 1.4 haad ASSERT(error == 0 || error == ENOENT); 1543 1.1 haad } else { /* indirect write */ 1544 1.1 haad /* 1545 1.1 haad * Have to lock the whole block to ensure when it's 1546 1.1 haad * written out and it's checksum is being calculated 1547 1.1 haad * that no one can change the data. We need to re-check 1548 1.1 haad * blocksize after we get the lock in case it's changed! 1549 1.1 haad */ 1550 1.1 haad for (;;) { 1551 1.4 haad uint64_t blkoff; 1552 1.4 haad size = zp->z_blksz; 1553 1.4 haad blkoff = ISP2(size) ? P2PHASE(offset, size) : offset; 1554 1.4 haad offset -= blkoff; 1555 1.4 haad zgd->zgd_rl = zfs_range_lock(zp, offset, size, 1556 1.4 haad RL_READER); 1557 1.4 haad if (zp->z_blksz == size) 1558 1.1 haad break; 1559 1.4 haad offset += blkoff; 1560 1.4 haad zfs_range_unlock(zgd->zgd_rl); 1561 1.1 haad } 1562 1.1 haad /* test for truncation needs to be done while range locked */ 1563 1.27 chs if (lr->lr_offset >= zp->z_size) 1564 1.27 chs error = SET_ERROR(ENOENT); 1565 1.4 haad #ifdef DEBUG 1566 1.4 haad if (zil_fault_io) { 1567 1.27 chs error = SET_ERROR(EIO); 1568 1.4 haad zil_fault_io = 0; 1569 1.1 haad } 1570 1.4 haad #endif 1571 1.1 haad if (error == 0) 1572 1.27 chs error = dmu_buf_hold(os, object, offset, zgd, &db, 1573 1.27 chs DMU_READ_NO_PREFETCH); 1574 1.4 haad 1575 1.4 haad if (error == 0) { 1576 1.27 chs blkptr_t *obp = dmu_buf_get_blkptr(db); 1577 1.27 chs if (obp) { 1578 1.27 chs ASSERT(BP_IS_HOLE(bp)); 1579 1.27 chs *bp = *obp; 1580 1.27 chs } 1581 1.27 chs 1582 1.4 haad zgd->zgd_db = db; 1583 1.4 haad zgd->zgd_bp = bp; 1584 1.4 haad 1585 1.4 haad ASSERT(db->db_offset == offset); 1586 1.4 haad ASSERT(db->db_size == size); 1587 1.4 haad 1588 1.4 haad error = dmu_sync(zio, lr->lr_common.lrc_txg, 1589 1.4 haad zfs_get_done, zgd); 1590 1.4 haad ASSERT(error || lr->lr_length <= zp->z_blksz); 1591 1.4 haad 1592 1.4 haad /* 1593 1.4 haad * On success, we need to wait for the write I/O 1594 1.4 haad * initiated by dmu_sync() to complete before we can 1595 1.4 haad * release this dbuf. We will finish everything up 1596 1.4 haad * in the zfs_get_done() callback. 1597 1.4 haad */ 1598 1.4 haad if (error == 0) 1599 1.4 haad return (0); 1600 1.4 haad 1601 1.4 haad if (error == EALREADY) { 1602 1.4 haad lr->lr_common.lrc_txtype = TX_WRITE2; 1603 1.4 haad error = 0; 1604 1.4 haad } 1605 1.4 haad } 1606 1.1 haad } 1607 1.4 haad 1608 1.4 haad zfs_get_done(zgd, error); 1609 1.4 haad 1610 1.1 haad return (error); 1611 1.1 haad } 1612 1.1 haad 1613 1.1 haad /*ARGSUSED*/ 1614 1.1 haad static int 1615 1.1 haad zfs_access(vnode_t *vp, int mode, int flag, cred_t *cr, 1616 1.1 haad caller_context_t *ct) 1617 1.1 haad { 1618 1.1 haad znode_t *zp = VTOZ(vp); 1619 1.1 haad zfsvfs_t *zfsvfs = zp->z_zfsvfs; 1620 1.1 haad int error; 1621 1.1 haad 1622 1.1 haad ZFS_ENTER(zfsvfs); 1623 1.1 haad ZFS_VERIFY_ZP(zp); 1624 1.1 haad 1625 1.1 haad if (flag & V_ACE_MASK) 1626 1.1 haad error = zfs_zaccess(zp, mode, flag, B_FALSE, cr); 1627 1.1 haad else 1628 1.1 haad error = zfs_zaccess_rwx(zp, mode, flag, cr); 1629 1.1 haad 1630 1.1 haad ZFS_EXIT(zfsvfs); 1631 1.1 haad return (error); 1632 1.1 haad } 1633 1.1 haad 1634 1.27 chs #ifdef __FreeBSD__ 1635 1.27 chs static int 1636 1.27 chs zfs_dd_callback(struct mount *mp, void *arg, int lkflags, struct vnode **vpp) 1637 1.27 chs { 1638 1.27 chs int error; 1639 1.27 chs 1640 1.27 chs *vpp = arg; 1641 1.27 chs error = vn_lock(*vpp, lkflags); 1642 1.27 chs if (error != 0) 1643 1.27 chs vrele(*vpp); 1644 1.27 chs return (error); 1645 1.27 chs } 1646 1.27 chs 1647 1.27 chs static int 1648 1.27 chs zfs_lookup_lock(vnode_t *dvp, vnode_t *vp, const char *name, int lkflags) 1649 1.27 chs { 1650 1.27 chs znode_t *zdp = VTOZ(dvp); 1651 1.27 chs zfsvfs_t *zfsvfs = zdp->z_zfsvfs; 1652 1.27 chs int error; 1653 1.27 chs int ltype; 1654 1.27 chs 1655 1.27 chs ASSERT_VOP_LOCKED(dvp, __func__); 1656 1.27 chs #ifdef DIAGNOSTIC 1657 1.27 chs if ((zdp->z_pflags & ZFS_XATTR) == 0) 1658 1.27 chs VERIFY(!RRM_LOCK_HELD(&zfsvfs->z_teardown_lock)); 1659 1.27 chs #endif 1660 1.27 chs 1661 1.27 chs if (name[0] == 0 || (name[0] == '.' && name[1] == 0)) { 1662 1.27 chs ASSERT3P(dvp, ==, vp); 1663 1.27 chs vref(dvp); 1664 1.27 chs ltype = lkflags & LK_TYPE_MASK; 1665 1.27 chs if (ltype != VOP_ISLOCKED(dvp)) { 1666 1.27 chs if (ltype == LK_EXCLUSIVE) 1667 1.27 chs vn_lock(dvp, LK_UPGRADE | LK_RETRY); 1668 1.27 chs else /* if (ltype == LK_SHARED) */ 1669 1.27 chs vn_lock(dvp, LK_DOWNGRADE | LK_RETRY); 1670 1.27 chs 1671 1.27 chs /* 1672 1.27 chs * Relock for the "." case could leave us with 1673 1.27 chs * reclaimed vnode. 1674 1.27 chs */ 1675 1.27 chs if (dvp->v_iflag & VI_DOOMED) { 1676 1.27 chs vrele(dvp); 1677 1.27 chs return (SET_ERROR(ENOENT)); 1678 1.27 chs } 1679 1.27 chs } 1680 1.27 chs return (0); 1681 1.27 chs } else if (name[0] == '.' && name[1] == '.' && name[2] == 0) { 1682 1.27 chs /* 1683 1.27 chs * Note that in this case, dvp is the child vnode, and we 1684 1.27 chs * are looking up the parent vnode - exactly reverse from 1685 1.27 chs * normal operation. Unlocking dvp requires some rather 1686 1.27 chs * tricky unlock/relock dance to prevent mp from being freed; 1687 1.27 chs * use vn_vget_ino_gen() which takes care of all that. 1688 1.27 chs * 1689 1.27 chs * XXX Note that there is a time window when both vnodes are 1690 1.27 chs * unlocked. It is possible, although highly unlikely, that 1691 1.27 chs * during that window the parent-child relationship between 1692 1.27 chs * the vnodes may change, for example, get reversed. 1693 1.27 chs * In that case we would have a wrong lock order for the vnodes. 1694 1.27 chs * All other filesystems seem to ignore this problem, so we 1695 1.27 chs * do the same here. 1696 1.27 chs * A potential solution could be implemented as follows: 1697 1.27 chs * - using LK_NOWAIT when locking the second vnode and retrying 1698 1.27 chs * if necessary 1699 1.27 chs * - checking that the parent-child relationship still holds 1700 1.27 chs * after locking both vnodes and retrying if it doesn't 1701 1.27 chs */ 1702 1.27 chs error = vn_vget_ino_gen(dvp, zfs_dd_callback, vp, lkflags, &vp); 1703 1.27 chs return (error); 1704 1.27 chs } else { 1705 1.27 chs error = vn_lock(vp, lkflags); 1706 1.27 chs if (error != 0) 1707 1.27 chs vrele(vp); 1708 1.27 chs return (error); 1709 1.27 chs } 1710 1.27 chs } 1711 1.27 chs 1712 1.27 chs /* 1713 1.27 chs * Lookup an entry in a directory, or an extended attribute directory. 1714 1.27 chs * If it exists, return a held vnode reference for it. 1715 1.27 chs * 1716 1.27 chs * IN: dvp - vnode of directory to search. 1717 1.27 chs * nm - name of entry to lookup. 1718 1.27 chs * pnp - full pathname to lookup [UNUSED]. 1719 1.27 chs * flags - LOOKUP_XATTR set if looking for an attribute. 1720 1.27 chs * rdir - root directory vnode [UNUSED]. 1721 1.27 chs * cr - credentials of caller. 1722 1.27 chs * ct - caller context 1723 1.27 chs * 1724 1.27 chs * OUT: vpp - vnode of located entry, NULL if not found. 1725 1.27 chs * 1726 1.27 chs * RETURN: 0 on success, error code on failure. 1727 1.27 chs * 1728 1.27 chs * Timestamps: 1729 1.27 chs * NA 1730 1.27 chs */ 1731 1.27 chs /* ARGSUSED */ 1732 1.27 chs static int 1733 1.27 chs zfs_lookup(vnode_t *dvp, char *nm, vnode_t **vpp, struct componentname *cnp, 1734 1.27 chs int nameiop, cred_t *cr, kthread_t *td, int flags) 1735 1.27 chs { 1736 1.27 chs znode_t *zdp = VTOZ(dvp); 1737 1.27 chs znode_t *zp; 1738 1.27 chs zfsvfs_t *zfsvfs = zdp->z_zfsvfs; 1739 1.27 chs int error = 0; 1740 1.27 chs 1741 1.27 chs /* fast path (should be redundant with vfs namecache) */ 1742 1.27 chs if (!(flags & LOOKUP_XATTR)) { 1743 1.27 chs if (dvp->v_type != VDIR) { 1744 1.27 chs return (SET_ERROR(ENOTDIR)); 1745 1.27 chs } else if (zdp->z_sa_hdl == NULL) { 1746 1.27 chs return (SET_ERROR(EIO)); 1747 1.27 chs } 1748 1.27 chs } 1749 1.27 chs 1750 1.27 chs DTRACE_PROBE2(zfs__fastpath__lookup__miss, vnode_t *, dvp, char *, nm); 1751 1.27 chs 1752 1.27 chs ZFS_ENTER(zfsvfs); 1753 1.27 chs ZFS_VERIFY_ZP(zdp); 1754 1.27 chs 1755 1.27 chs *vpp = NULL; 1756 1.27 chs 1757 1.27 chs if (flags & LOOKUP_XATTR) { 1758 1.27 chs #ifdef TODO 1759 1.27 chs /* 1760 1.27 chs * If the xattr property is off, refuse the lookup request. 1761 1.27 chs */ 1762 1.27 chs if (!(zfsvfs->z_vfs->vfs_flag & VFS_XATTR)) { 1763 1.27 chs ZFS_EXIT(zfsvfs); 1764 1.27 chs return (SET_ERROR(EINVAL)); 1765 1.27 chs } 1766 1.27 chs #endif 1767 1.27 chs 1768 1.27 chs /* 1769 1.27 chs * We don't allow recursive attributes.. 1770 1.27 chs * Maybe someday we will. 1771 1.27 chs */ 1772 1.27 chs if (zdp->z_pflags & ZFS_XATTR) { 1773 1.27 chs ZFS_EXIT(zfsvfs); 1774 1.27 chs return (SET_ERROR(EINVAL)); 1775 1.27 chs } 1776 1.27 chs 1777 1.27 chs if (error = zfs_get_xattrdir(VTOZ(dvp), vpp, cr, flags)) { 1778 1.27 chs ZFS_EXIT(zfsvfs); 1779 1.27 chs return (error); 1780 1.27 chs } 1781 1.27 chs 1782 1.27 chs /* 1783 1.27 chs * Do we have permission to get into attribute directory? 1784 1.27 chs */ 1785 1.27 chs if (error = zfs_zaccess(VTOZ(*vpp), ACE_EXECUTE, 0, 1786 1.27 chs B_FALSE, cr)) { 1787 1.27 chs vrele(*vpp); 1788 1.27 chs *vpp = NULL; 1789 1.27 chs } 1790 1.27 chs 1791 1.27 chs ZFS_EXIT(zfsvfs); 1792 1.27 chs return (error); 1793 1.27 chs } 1794 1.27 chs 1795 1.27 chs /* 1796 1.27 chs * Check accessibility of directory. 1797 1.27 chs */ 1798 1.27 chs if (error = zfs_zaccess(zdp, ACE_EXECUTE, 0, B_FALSE, cr)) { 1799 1.27 chs ZFS_EXIT(zfsvfs); 1800 1.27 chs return (error); 1801 1.27 chs } 1802 1.27 chs 1803 1.27 chs if (zfsvfs->z_utf8 && u8_validate(nm, strlen(nm), 1804 1.27 chs NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 1805 1.27 chs ZFS_EXIT(zfsvfs); 1806 1.27 chs return (SET_ERROR(EILSEQ)); 1807 1.27 chs } 1808 1.27 chs 1809 1.27 chs 1810 1.27 chs /* 1811 1.27 chs * First handle the special cases. 1812 1.27 chs */ 1813 1.27 chs if ((cnp->cn_flags & ISDOTDOT) != 0) { 1814 1.27 chs /* 1815 1.27 chs * If we are a snapshot mounted under .zfs, return 1816 1.27 chs * the vp for the snapshot directory. 1817 1.27 chs */ 1818 1.27 chs if (zdp->z_id == zfsvfs->z_root && zfsvfs->z_parent != zfsvfs) { 1819 1.27 chs struct componentname cn; 1820 1.27 chs vnode_t *zfsctl_vp; 1821 1.27 chs int ltype; 1822 1.27 chs 1823 1.27 chs ZFS_EXIT(zfsvfs); 1824 1.27 chs ltype = VOP_ISLOCKED(dvp); 1825 1.27 chs VOP_UNLOCK(dvp, 0); 1826 1.27 chs error = zfsctl_root(zfsvfs->z_parent, LK_SHARED, 1827 1.27 chs &zfsctl_vp); 1828 1.27 chs if (error == 0) { 1829 1.27 chs cn.cn_nameptr = "snapshot"; 1830 1.27 chs cn.cn_namelen = strlen(cn.cn_nameptr); 1831 1.27 chs cn.cn_nameiop = cnp->cn_nameiop; 1832 1.27 chs cn.cn_flags = cnp->cn_flags; 1833 1.27 chs cn.cn_lkflags = cnp->cn_lkflags; 1834 1.27 chs error = VOP_LOOKUP(zfsctl_vp, vpp, &cn); 1835 1.27 chs vput(zfsctl_vp); 1836 1.27 chs } 1837 1.27 chs vn_lock(dvp, ltype | LK_RETRY); 1838 1.27 chs return (error); 1839 1.27 chs } 1840 1.27 chs } 1841 1.27 chs if (zfs_has_ctldir(zdp) && strcmp(nm, ZFS_CTLDIR_NAME) == 0) { 1842 1.27 chs ZFS_EXIT(zfsvfs); 1843 1.27 chs if ((cnp->cn_flags & ISLASTCN) != 0 && nameiop != LOOKUP) 1844 1.27 chs return (SET_ERROR(ENOTSUP)); 1845 1.27 chs error = zfsctl_root(zfsvfs, cnp->cn_lkflags, vpp); 1846 1.27 chs return (error); 1847 1.27 chs } 1848 1.27 chs 1849 1.27 chs /* 1850 1.27 chs * The loop is retry the lookup if the parent-child relationship 1851 1.27 chs * changes during the dot-dot locking complexities. 1852 1.27 chs */ 1853 1.27 chs for (;;) { 1854 1.27 chs uint64_t parent; 1855 1.27 chs 1856 1.27 chs error = zfs_dirlook(zdp, nm, &zp); 1857 1.27 chs if (error == 0) 1858 1.27 chs *vpp = ZTOV(zp); 1859 1.27 chs 1860 1.27 chs ZFS_EXIT(zfsvfs); 1861 1.27 chs if (error != 0) 1862 1.27 chs break; 1863 1.27 chs 1864 1.27 chs error = zfs_lookup_lock(dvp, *vpp, nm, cnp->cn_lkflags); 1865 1.27 chs if (error != 0) { 1866 1.27 chs /* 1867 1.27 chs * If we've got a locking error, then the vnode 1868 1.27 chs * got reclaimed because of a force unmount. 1869 1.27 chs * We never enter doomed vnodes into the name cache. 1870 1.27 chs */ 1871 1.27 chs *vpp = NULL; 1872 1.27 chs return (error); 1873 1.27 chs } 1874 1.27 chs 1875 1.27 chs if ((cnp->cn_flags & ISDOTDOT) == 0) 1876 1.27 chs break; 1877 1.27 chs 1878 1.27 chs ZFS_ENTER(zfsvfs); 1879 1.27 chs if (zdp->z_sa_hdl == NULL) { 1880 1.27 chs error = SET_ERROR(EIO); 1881 1.27 chs } else { 1882 1.27 chs error = sa_lookup(zdp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs), 1883 1.27 chs &parent, sizeof (parent)); 1884 1.27 chs } 1885 1.27 chs if (error != 0) { 1886 1.27 chs ZFS_EXIT(zfsvfs); 1887 1.27 chs vput(ZTOV(zp)); 1888 1.27 chs break; 1889 1.27 chs } 1890 1.27 chs if (zp->z_id == parent) { 1891 1.27 chs ZFS_EXIT(zfsvfs); 1892 1.27 chs break; 1893 1.27 chs } 1894 1.27 chs vput(ZTOV(zp)); 1895 1.27 chs } 1896 1.27 chs 1897 1.27 chs out: 1898 1.27 chs if (error != 0) 1899 1.27 chs *vpp = NULL; 1900 1.27 chs 1901 1.27 chs /* Translate errors and add SAVENAME when needed. */ 1902 1.27 chs if (cnp->cn_flags & ISLASTCN) { 1903 1.27 chs switch (nameiop) { 1904 1.27 chs case CREATE: 1905 1.27 chs case RENAME: 1906 1.27 chs if (error == ENOENT) { 1907 1.27 chs error = EJUSTRETURN; 1908 1.27 chs cnp->cn_flags |= SAVENAME; 1909 1.27 chs break; 1910 1.27 chs } 1911 1.27 chs /* FALLTHROUGH */ 1912 1.27 chs case DELETE: 1913 1.27 chs if (error == 0) 1914 1.27 chs cnp->cn_flags |= SAVENAME; 1915 1.27 chs break; 1916 1.27 chs } 1917 1.27 chs } 1918 1.27 chs 1919 1.27 chs /* Insert name into cache (as non-existent) if appropriate. */ 1920 1.27 chs if (zfsvfs->z_use_namecache && 1921 1.27 chs error == ENOENT && (cnp->cn_flags & MAKEENTRY) != 0) 1922 1.27 chs cache_enter(dvp, NULL, cnp); 1923 1.27 chs 1924 1.27 chs /* Insert name into cache if appropriate. */ 1925 1.27 chs if (zfsvfs->z_use_namecache && 1926 1.27 chs error == 0 && (cnp->cn_flags & MAKEENTRY)) { 1927 1.27 chs if (!(cnp->cn_flags & ISLASTCN) || 1928 1.27 chs (nameiop != DELETE && nameiop != RENAME)) { 1929 1.27 chs cache_enter(dvp, *vpp, cnp); 1930 1.27 chs } 1931 1.27 chs } 1932 1.27 chs 1933 1.27 chs return (error); 1934 1.27 chs } 1935 1.27 chs #endif /* __FreeBSD__ */ 1936 1.27 chs 1937 1.27 chs #ifdef __NetBSD__ 1938 1.1 haad /* 1939 1.4 haad * If vnode is for a device return a specfs vnode instead. 1940 1.4 haad */ 1941 1.4 haad static int 1942 1.4 haad specvp_check(vnode_t **vpp, cred_t *cr) 1943 1.4 haad { 1944 1.4 haad int error = 0; 1945 1.4 haad 1946 1.4 haad if (IS_DEVVP(*vpp)) { 1947 1.4 haad struct vnode *svp; 1948 1.4 haad 1949 1.4 haad svp = specvp(*vpp, (*vpp)->v_rdev, (*vpp)->v_type, cr); 1950 1.4 haad VN_RELE(*vpp); 1951 1.4 haad if (svp == NULL) 1952 1.4 haad error = ENOSYS; 1953 1.4 haad *vpp = svp; 1954 1.4 haad } 1955 1.4 haad return (error); 1956 1.4 haad } 1957 1.4 haad 1958 1.4 haad /* 1959 1.1 haad * Lookup an entry in a directory, or an extended attribute directory. 1960 1.1 haad * If it exists, return a held vnode reference for it. 1961 1.1 haad * 1962 1.1 haad * IN: dvp - vnode of directory to search. 1963 1.1 haad * nm - name of entry to lookup. 1964 1.1 haad * pnp - full pathname to lookup [UNUSED]. 1965 1.1 haad * flags - LOOKUP_XATTR set if looking for an attribute. 1966 1.1 haad * rdir - root directory vnode [UNUSED]. 1967 1.1 haad * cr - credentials of caller. 1968 1.1 haad * ct - caller context 1969 1.1 haad * direntflags - directory lookup flags 1970 1.1 haad * realpnp - returned pathname. 1971 1.1 haad * 1972 1.1 haad * OUT: vpp - vnode of located entry, NULL if not found. 1973 1.1 haad * 1974 1.1 haad * RETURN: 0 if success 1975 1.1 haad * error code if failure 1976 1.1 haad * 1977 1.1 haad * Timestamps: 1978 1.1 haad * NA 1979 1.1 haad */ 1980 1.1 haad /* ARGSUSED */ 1981 1.1 haad static int 1982 1.44 hannken zfs_lookup(vnode_t *dvp, char *nm, vnode_t **vpp, int flags, 1983 1.44 hannken struct componentname *cnp, int nameiop, cred_t *cr) 1984 1.1 haad { 1985 1.1 haad znode_t *zdp = VTOZ(dvp); 1986 1.27 chs znode_t *zp; 1987 1.1 haad zfsvfs_t *zfsvfs = zdp->z_zfsvfs; 1988 1.4 haad int error = 0; 1989 1.4 haad 1990 1.4 haad /* fast path */ 1991 1.27 chs if (!(flags & LOOKUP_XATTR)) { 1992 1.4 haad if (dvp->v_type != VDIR) { 1993 1.4 haad return (ENOTDIR); 1994 1.27 chs } else if (zdp->z_sa_hdl == NULL) { 1995 1.27 chs return (SET_ERROR(EIO)); 1996 1.4 haad } 1997 1.4 haad 1998 1.4 haad if (nm[0] == 0 || (nm[0] == '.' && nm[1] == '\0')) { 1999 1.4 haad error = zfs_fastaccesschk_execute(zdp, cr); 2000 1.4 haad if (!error) { 2001 1.4 haad *vpp = dvp; 2002 1.4 haad VN_HOLD(*vpp); 2003 1.4 haad return (0); 2004 1.4 haad } 2005 1.4 haad return (error); 2006 1.4 haad } else { 2007 1.4 haad vnode_t *tvp = dnlc_lookup(dvp, nm); 2008 1.4 haad 2009 1.4 haad if (tvp) { 2010 1.4 haad error = zfs_fastaccesschk_execute(zdp, cr); 2011 1.4 haad if (error) { 2012 1.4 haad VN_RELE(tvp); 2013 1.4 haad return (error); 2014 1.4 haad } 2015 1.4 haad if (tvp == DNLC_NO_VNODE) { 2016 1.4 haad VN_RELE(tvp); 2017 1.4 haad return (ENOENT); 2018 1.4 haad } else { 2019 1.4 haad *vpp = tvp; 2020 1.4 haad return (specvp_check(vpp, cr)); 2021 1.4 haad } 2022 1.4 haad } 2023 1.4 haad } 2024 1.4 haad } 2025 1.4 haad 2026 1.4 haad DTRACE_PROBE2(zfs__fastpath__lookup__miss, vnode_t *, dvp, char *, nm); 2027 1.1 haad 2028 1.1 haad ZFS_ENTER(zfsvfs); 2029 1.1 haad ZFS_VERIFY_ZP(zdp); 2030 1.1 haad 2031 1.1 haad *vpp = NULL; 2032 1.12 riastrad 2033 1.1 haad if (flags & LOOKUP_XATTR) { 2034 1.2 haad #ifdef TODO 2035 1.1 haad /* 2036 1.1 haad * If the xattr property is off, refuse the lookup request. 2037 1.1 haad */ 2038 1.1 haad if (!(zfsvfs->z_vfs->vfs_flag & VFS_XATTR)) { 2039 1.1 haad ZFS_EXIT(zfsvfs); 2040 1.1 haad return (EINVAL); 2041 1.1 haad } 2042 1.2 haad #endif 2043 1.1 haad 2044 1.1 haad /* 2045 1.1 haad * We don't allow recursive attributes.. 2046 1.1 haad * Maybe someday we will. 2047 1.1 haad */ 2048 1.27 chs if (zdp->z_pflags & ZFS_XATTR) { 2049 1.1 haad ZFS_EXIT(zfsvfs); 2050 1.1 haad return (EINVAL); 2051 1.1 haad } 2052 1.1 haad 2053 1.1 haad if (error = zfs_get_xattrdir(VTOZ(dvp), vpp, cr, flags)) { 2054 1.1 haad ZFS_EXIT(zfsvfs); 2055 1.1 haad return (error); 2056 1.1 haad } 2057 1.1 haad 2058 1.1 haad /* 2059 1.1 haad * Do we have permission to get into attribute directory? 2060 1.1 haad */ 2061 1.1 haad if (error = zfs_zaccess(VTOZ(*vpp), ACE_EXECUTE, 0, 2062 1.1 haad B_FALSE, cr)) { 2063 1.1 haad VN_RELE(*vpp); 2064 1.1 haad *vpp = NULL; 2065 1.1 haad } 2066 1.1 haad 2067 1.1 haad ZFS_EXIT(zfsvfs); 2068 1.1 haad return (error); 2069 1.1 haad } 2070 1.1 haad 2071 1.1 haad if (dvp->v_type != VDIR) { 2072 1.1 haad ZFS_EXIT(zfsvfs); 2073 1.1 haad return (ENOTDIR); 2074 1.1 haad } 2075 1.1 haad 2076 1.1 haad /* 2077 1.1 haad * Check accessibility of directory. 2078 1.1 haad */ 2079 1.2 haad if (error = zfs_zaccess(zdp, ACE_EXECUTE, 0, B_FALSE, cr)) { 2080 1.2 haad ZFS_EXIT(zfsvfs); 2081 1.2 haad return (error); 2082 1.2 haad } 2083 1.2 haad 2084 1.1 haad if (zfsvfs->z_utf8 && u8_validate(nm, strlen(nm), 2085 1.1 haad NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 2086 1.1 haad ZFS_EXIT(zfsvfs); 2087 1.1 haad return (EILSEQ); 2088 1.1 haad } 2089 1.1 haad 2090 1.44 hannken /* 2091 1.44 hannken * First handle the special cases. 2092 1.44 hannken */ 2093 1.44 hannken if ((cnp->cn_flags & ISDOTDOT) != 0) { 2094 1.44 hannken /* 2095 1.44 hannken * If we are a snapshot mounted under .zfs, return 2096 1.44 hannken * the vp for the snapshot directory. 2097 1.44 hannken */ 2098 1.44 hannken if (zdp->z_id == zfsvfs->z_root && zfsvfs->z_parent != zfsvfs) { 2099 1.44 hannken ZFS_EXIT(zfsvfs); 2100 1.44 hannken error = zfsctl_snapshot(zfsvfs->z_parent, vpp); 2101 1.44 hannken 2102 1.44 hannken return (error); 2103 1.44 hannken } 2104 1.44 hannken } 2105 1.44 hannken if (zfs_has_ctldir(zdp) && strcmp(nm, ZFS_CTLDIR_NAME) == 0) { 2106 1.44 hannken ZFS_EXIT(zfsvfs); 2107 1.44 hannken if ((cnp->cn_flags & ISLASTCN) != 0 && nameiop != LOOKUP) 2108 1.44 hannken return (SET_ERROR(ENOTSUP)); 2109 1.44 hannken error = zfsctl_root(zfsvfs, vpp); 2110 1.44 hannken return (error); 2111 1.44 hannken } 2112 1.44 hannken 2113 1.27 chs error = zfs_dirlook(zdp, nm, &zp); 2114 1.27 chs if (error == 0) { 2115 1.27 chs *vpp = ZTOV(zp); 2116 1.4 haad error = specvp_check(vpp, cr); 2117 1.27 chs } 2118 1.1 haad 2119 1.1 haad ZFS_EXIT(zfsvfs); 2120 1.1 haad return (error); 2121 1.1 haad } 2122 1.27 chs #endif 2123 1.1 haad 2124 1.1 haad /* 2125 1.1 haad * Attempt to create a new entry in a directory. If the entry 2126 1.1 haad * already exists, truncate the file if permissible, else return 2127 1.1 haad * an error. Return the vp of the created or trunc'd file. 2128 1.1 haad * 2129 1.1 haad * IN: dvp - vnode of directory to put new file entry in. 2130 1.1 haad * name - name of new file entry. 2131 1.1 haad * vap - attributes of new file. 2132 1.1 haad * excl - flag indicating exclusive or non-exclusive mode. 2133 1.1 haad * mode - mode to open file with. 2134 1.1 haad * cr - credentials of caller. 2135 1.1 haad * flag - large file flag [UNUSED]. 2136 1.1 haad * ct - caller context 2137 1.27 chs * vsecp - ACL to be set 2138 1.1 haad * 2139 1.1 haad * OUT: vpp - vnode of created or trunc'd entry. 2140 1.1 haad * 2141 1.27 chs * RETURN: 0 on success, error code on failure. 2142 1.1 haad * 2143 1.1 haad * Timestamps: 2144 1.1 haad * dvp - ctime|mtime updated if new entry created 2145 1.1 haad * vp - ctime|mtime always, atime if new 2146 1.1 haad */ 2147 1.1 haad 2148 1.1 haad /* ARGSUSED */ 2149 1.1 haad static int 2150 1.2 haad zfs_create(vnode_t *dvp, char *name, vattr_t *vap, int excl, int mode, 2151 1.27 chs vnode_t **vpp, cred_t *cr, kthread_t *td) 2152 1.1 haad { 2153 1.1 haad znode_t *zp, *dzp = VTOZ(dvp); 2154 1.1 haad zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 2155 1.1 haad zilog_t *zilog; 2156 1.1 haad objset_t *os; 2157 1.1 haad dmu_tx_t *tx; 2158 1.1 haad int error; 2159 1.27 chs ksid_t *ksid; 2160 1.27 chs uid_t uid; 2161 1.27 chs gid_t gid = crgetgid(cr); 2162 1.27 chs zfs_acl_ids_t acl_ids; 2163 1.27 chs boolean_t fuid_dirtied; 2164 1.2 haad void *vsecp = NULL; 2165 1.2 haad int flag = 0; 2166 1.27 chs uint64_t txtype; 2167 1.1 haad 2168 1.1 haad /* 2169 1.1 haad * If we have an ephemeral id, ACL, or XVATTR then 2170 1.1 haad * make sure file system is at proper version 2171 1.1 haad */ 2172 1.1 haad 2173 1.27 chs ksid = crgetsid(cr, KSID_OWNER); 2174 1.27 chs if (ksid) 2175 1.27 chs uid = ksid_getid(ksid); 2176 1.27 chs else 2177 1.27 chs uid = crgetuid(cr); 2178 1.27 chs 2179 1.1 haad if (zfsvfs->z_use_fuids == B_FALSE && 2180 1.1 haad (vsecp || (vap->va_mask & AT_XVATTR) || 2181 1.27 chs IS_EPHEMERAL(uid) || IS_EPHEMERAL(gid))) 2182 1.27 chs return (SET_ERROR(EINVAL)); 2183 1.1 haad 2184 1.1 haad ZFS_ENTER(zfsvfs); 2185 1.1 haad ZFS_VERIFY_ZP(dzp); 2186 1.1 haad os = zfsvfs->z_os; 2187 1.1 haad zilog = zfsvfs->z_log; 2188 1.1 haad 2189 1.1 haad if (zfsvfs->z_utf8 && u8_validate(name, strlen(name), 2190 1.1 haad NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 2191 1.1 haad ZFS_EXIT(zfsvfs); 2192 1.27 chs return (SET_ERROR(EILSEQ)); 2193 1.1 haad } 2194 1.1 haad 2195 1.1 haad if (vap->va_mask & AT_XVATTR) { 2196 1.27 chs if ((error = secpolicy_xvattr(dvp, (xvattr_t *)vap, 2197 1.1 haad crgetuid(cr), cr, vap->va_type)) != 0) { 2198 1.1 haad ZFS_EXIT(zfsvfs); 2199 1.1 haad return (error); 2200 1.1 haad } 2201 1.1 haad } 2202 1.27 chs 2203 1.1 haad *vpp = NULL; 2204 1.1 haad 2205 1.2 haad if ((vap->va_mode & S_ISVTX) && secpolicy_vnode_stky_modify(cr)) 2206 1.2 haad vap->va_mode &= ~S_ISVTX; 2207 1.1 haad 2208 1.27 chs error = zfs_dirent_lookup(dzp, name, &zp, ZNEW); 2209 1.27 chs if (error) { 2210 1.27 chs ZFS_EXIT(zfsvfs); 2211 1.27 chs return (error); 2212 1.27 chs } 2213 1.27 chs ASSERT3P(zp, ==, NULL); 2214 1.27 chs 2215 1.27 chs /* 2216 1.27 chs * Create a new file object and update the directory 2217 1.27 chs * to reference it. 2218 1.27 chs */ 2219 1.27 chs if (error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr)) { 2220 1.27 chs goto out; 2221 1.27 chs } 2222 1.1 haad 2223 1.27 chs /* 2224 1.27 chs * We only support the creation of regular files in 2225 1.27 chs * extended attribute directories. 2226 1.27 chs */ 2227 1.1 haad 2228 1.27 chs if ((dzp->z_pflags & ZFS_XATTR) && 2229 1.27 chs (vap->va_type != VREG)) { 2230 1.27 chs error = SET_ERROR(EINVAL); 2231 1.27 chs goto out; 2232 1.1 haad } 2233 1.1 haad 2234 1.27 chs if ((error = zfs_acl_ids_create(dzp, 0, vap, 2235 1.27 chs cr, vsecp, &acl_ids)) != 0) 2236 1.27 chs goto out; 2237 1.1 haad 2238 1.27 chs if (zfs_acl_ids_overquota(zfsvfs, &acl_ids)) { 2239 1.27 chs zfs_acl_ids_free(&acl_ids); 2240 1.27 chs error = SET_ERROR(EDQUOT); 2241 1.27 chs goto out; 2242 1.27 chs } 2243 1.1 haad 2244 1.27 chs getnewvnode_reserve(1); 2245 1.4 haad 2246 1.27 chs tx = dmu_tx_create(os); 2247 1.4 haad 2248 1.27 chs dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes + 2249 1.27 chs ZFS_SA_BASE_ATTR_SIZE); 2250 1.4 haad 2251 1.27 chs fuid_dirtied = zfsvfs->z_fuid_dirty; 2252 1.27 chs if (fuid_dirtied) 2253 1.27 chs zfs_fuid_txhold(zfsvfs, tx); 2254 1.27 chs dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name); 2255 1.27 chs dmu_tx_hold_sa(tx, dzp->z_sa_hdl, B_FALSE); 2256 1.27 chs if (!zfsvfs->z_use_sa && 2257 1.27 chs acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) { 2258 1.27 chs dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 2259 1.27 chs 0, acl_ids.z_aclp->z_acl_bytes); 2260 1.27 chs } 2261 1.27 chs error = dmu_tx_assign(tx, TXG_WAIT); 2262 1.27 chs if (error) { 2263 1.27 chs zfs_acl_ids_free(&acl_ids); 2264 1.27 chs dmu_tx_abort(tx); 2265 1.27 chs getnewvnode_drop_reserve(); 2266 1.27 chs ZFS_EXIT(zfsvfs); 2267 1.27 chs return (error); 2268 1.27 chs } 2269 1.27 chs zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids); 2270 1.4 haad 2271 1.27 chs if (fuid_dirtied) 2272 1.27 chs zfs_fuid_sync(zfsvfs, tx); 2273 1.1 haad 2274 1.27 chs (void) zfs_link_create(dzp, name, zp, tx, ZNEW); 2275 1.27 chs txtype = zfs_log_create_txtype(Z_FILE, vsecp, vap); 2276 1.27 chs zfs_log_create(zilog, tx, txtype, dzp, zp, name, 2277 1.27 chs vsecp, acl_ids.z_fuidp, vap); 2278 1.27 chs zfs_acl_ids_free(&acl_ids); 2279 1.27 chs dmu_tx_commit(tx); 2280 1.1 haad 2281 1.27 chs getnewvnode_drop_reserve(); 2282 1.1 haad 2283 1.1 haad out: 2284 1.27 chs if (error == 0) { 2285 1.1 haad *vpp = ZTOV(zp); 2286 1.1 haad } 2287 1.1 haad 2288 1.27 chs if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 2289 1.27 chs zil_commit(zilog, 0); 2290 1.27 chs 2291 1.1 haad ZFS_EXIT(zfsvfs); 2292 1.1 haad return (error); 2293 1.1 haad } 2294 1.1 haad 2295 1.1 haad /* 2296 1.1 haad * Remove an entry from a directory. 2297 1.1 haad * 2298 1.1 haad * IN: dvp - vnode of directory to remove entry from. 2299 1.1 haad * name - name of entry to remove. 2300 1.1 haad * cr - credentials of caller. 2301 1.1 haad * ct - caller context 2302 1.1 haad * flags - case flags 2303 1.1 haad * 2304 1.27 chs * RETURN: 0 on success, error code on failure. 2305 1.1 haad * 2306 1.1 haad * Timestamps: 2307 1.1 haad * dvp - ctime|mtime 2308 1.1 haad * vp - ctime (if nlink > 0) 2309 1.1 haad */ 2310 1.27 chs 2311 1.1 haad /*ARGSUSED*/ 2312 1.1 haad static int 2313 1.27 chs zfs_remove(vnode_t *dvp, vnode_t *vp, char *name, cred_t *cr) 2314 1.1 haad { 2315 1.27 chs znode_t *dzp = VTOZ(dvp); 2316 1.27 chs znode_t *zp = VTOZ(vp); 2317 1.27 chs znode_t *xzp; 2318 1.1 haad zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 2319 1.1 haad zilog_t *zilog; 2320 1.1 haad uint64_t acl_obj, xattr_obj; 2321 1.27 chs uint64_t obj = 0; 2322 1.1 haad dmu_tx_t *tx; 2323 1.1 haad boolean_t unlinked, toobig = FALSE; 2324 1.1 haad uint64_t txtype; 2325 1.1 haad int error; 2326 1.1 haad 2327 1.1 haad ZFS_ENTER(zfsvfs); 2328 1.1 haad ZFS_VERIFY_ZP(dzp); 2329 1.27 chs ZFS_VERIFY_ZP(zp); 2330 1.1 haad zilog = zfsvfs->z_log; 2331 1.27 chs zp = VTOZ(vp); 2332 1.1 haad 2333 1.27 chs xattr_obj = 0; 2334 1.27 chs xzp = NULL; 2335 1.1 haad 2336 1.1 haad if (error = zfs_zaccess_delete(dzp, zp, cr)) { 2337 1.1 haad goto out; 2338 1.1 haad } 2339 1.1 haad 2340 1.1 haad /* 2341 1.1 haad * Need to use rmdir for removing directories. 2342 1.1 haad */ 2343 1.1 haad if (vp->v_type == VDIR) { 2344 1.27 chs error = SET_ERROR(EPERM); 2345 1.1 haad goto out; 2346 1.1 haad } 2347 1.1 haad 2348 1.1 haad vnevent_remove(vp, dvp, name, ct); 2349 1.1 haad 2350 1.27 chs obj = zp->z_id; 2351 1.1 haad 2352 1.27 chs /* are there any extended attributes? */ 2353 1.27 chs error = sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs), 2354 1.27 chs &xattr_obj, sizeof (xattr_obj)); 2355 1.27 chs if (error == 0 && xattr_obj) { 2356 1.27 chs error = zfs_zget(zfsvfs, xattr_obj, &xzp); 2357 1.27 chs ASSERT0(error); 2358 1.27 chs } 2359 1.1 haad 2360 1.1 haad /* 2361 1.1 haad * We may delete the znode now, or we may put it in the unlinked set; 2362 1.1 haad * it depends on whether we're the last link, and on whether there are 2363 1.1 haad * other holds on the vnode. So we dmu_tx_hold() the right things to 2364 1.1 haad * allow for either case. 2365 1.1 haad */ 2366 1.1 haad tx = dmu_tx_create(zfsvfs->z_os); 2367 1.1 haad dmu_tx_hold_zap(tx, dzp->z_id, FALSE, name); 2368 1.27 chs dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 2369 1.27 chs zfs_sa_upgrade_txholds(tx, zp); 2370 1.27 chs zfs_sa_upgrade_txholds(tx, dzp); 2371 1.27 chs 2372 1.27 chs if (xzp) { 2373 1.27 chs dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE); 2374 1.27 chs dmu_tx_hold_sa(tx, xzp->z_sa_hdl, B_FALSE); 2375 1.1 haad } 2376 1.1 haad 2377 1.1 haad /* charge as an update -- would be nice not to charge at all */ 2378 1.1 haad dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL); 2379 1.1 haad 2380 1.27 chs /* 2381 1.27 chs * Mark this transaction as typically resulting in a net free of space 2382 1.27 chs */ 2383 1.27 chs dmu_tx_mark_netfree(tx); 2384 1.27 chs 2385 1.27 chs error = dmu_tx_assign(tx, TXG_WAIT); 2386 1.1 haad if (error) { 2387 1.1 haad dmu_tx_abort(tx); 2388 1.1 haad ZFS_EXIT(zfsvfs); 2389 1.1 haad return (error); 2390 1.1 haad } 2391 1.1 haad 2392 1.1 haad /* 2393 1.1 haad * Remove the directory entry. 2394 1.1 haad */ 2395 1.27 chs error = zfs_link_destroy(dzp, name, zp, tx, ZEXISTS, &unlinked); 2396 1.1 haad 2397 1.1 haad if (error) { 2398 1.1 haad dmu_tx_commit(tx); 2399 1.1 haad goto out; 2400 1.1 haad } 2401 1.1 haad 2402 1.27 chs if (unlinked) { 2403 1.1 haad zfs_unlinked_add(zp, tx); 2404 1.27 chs vp->v_vflag |= VV_NOSYNC; 2405 1.1 haad } 2406 1.1 haad 2407 1.1 haad txtype = TX_REMOVE; 2408 1.27 chs zfs_log_remove(zilog, tx, txtype, dzp, name, obj); 2409 1.1 haad 2410 1.1 haad dmu_tx_commit(tx); 2411 1.1 haad out: 2412 1.1 haad 2413 1.27 chs if (xzp) 2414 1.27 chs vrele(ZTOV(xzp)); 2415 1.1 haad 2416 1.27 chs if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 2417 1.27 chs zil_commit(zilog, 0); 2418 1.1 haad 2419 1.1 haad ZFS_EXIT(zfsvfs); 2420 1.1 haad return (error); 2421 1.1 haad } 2422 1.1 haad 2423 1.1 haad /* 2424 1.1 haad * Create a new directory and insert it into dvp using the name 2425 1.1 haad * provided. Return a pointer to the inserted directory. 2426 1.1 haad * 2427 1.1 haad * IN: dvp - vnode of directory to add subdir to. 2428 1.1 haad * dirname - name of new directory. 2429 1.1 haad * vap - attributes of new directory. 2430 1.1 haad * cr - credentials of caller. 2431 1.1 haad * ct - caller context 2432 1.27 chs * flags - case flags 2433 1.1 haad * vsecp - ACL to be set 2434 1.1 haad * 2435 1.1 haad * OUT: vpp - vnode of created directory. 2436 1.1 haad * 2437 1.27 chs * RETURN: 0 on success, error code on failure. 2438 1.1 haad * 2439 1.1 haad * Timestamps: 2440 1.1 haad * dvp - ctime|mtime updated 2441 1.1 haad * vp - ctime|mtime|atime updated 2442 1.1 haad */ 2443 1.1 haad /*ARGSUSED*/ 2444 1.1 haad static int 2445 1.27 chs zfs_mkdir(vnode_t *dvp, char *dirname, vattr_t *vap, vnode_t **vpp, cred_t *cr) 2446 1.1 haad { 2447 1.1 haad znode_t *zp, *dzp = VTOZ(dvp); 2448 1.1 haad zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 2449 1.1 haad zilog_t *zilog; 2450 1.1 haad uint64_t txtype; 2451 1.1 haad dmu_tx_t *tx; 2452 1.1 haad int error; 2453 1.27 chs ksid_t *ksid; 2454 1.27 chs uid_t uid; 2455 1.27 chs gid_t gid = crgetgid(cr); 2456 1.27 chs zfs_acl_ids_t acl_ids; 2457 1.4 haad boolean_t fuid_dirtied; 2458 1.1 haad 2459 1.1 haad ASSERT(vap->va_type == VDIR); 2460 1.1 haad 2461 1.1 haad /* 2462 1.1 haad * If we have an ephemeral id, ACL, or XVATTR then 2463 1.1 haad * make sure file system is at proper version 2464 1.1 haad */ 2465 1.1 haad 2466 1.27 chs ksid = crgetsid(cr, KSID_OWNER); 2467 1.27 chs if (ksid) 2468 1.27 chs uid = ksid_getid(ksid); 2469 1.27 chs else 2470 1.27 chs uid = crgetuid(cr); 2471 1.1 haad if (zfsvfs->z_use_fuids == B_FALSE && 2472 1.27 chs ((vap->va_mask & AT_XVATTR) || 2473 1.27 chs IS_EPHEMERAL(uid) || IS_EPHEMERAL(gid))) 2474 1.27 chs return (SET_ERROR(EINVAL)); 2475 1.1 haad 2476 1.1 haad ZFS_ENTER(zfsvfs); 2477 1.1 haad ZFS_VERIFY_ZP(dzp); 2478 1.1 haad zilog = zfsvfs->z_log; 2479 1.1 haad 2480 1.27 chs if (dzp->z_pflags & ZFS_XATTR) { 2481 1.1 haad ZFS_EXIT(zfsvfs); 2482 1.27 chs return (SET_ERROR(EINVAL)); 2483 1.1 haad } 2484 1.1 haad 2485 1.1 haad if (zfsvfs->z_utf8 && u8_validate(dirname, 2486 1.1 haad strlen(dirname), NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 2487 1.1 haad ZFS_EXIT(zfsvfs); 2488 1.27 chs return (SET_ERROR(EILSEQ)); 2489 1.1 haad } 2490 1.1 haad 2491 1.27 chs if (vap->va_mask & AT_XVATTR) { 2492 1.27 chs if ((error = secpolicy_xvattr(dvp, (xvattr_t *)vap, 2493 1.1 haad crgetuid(cr), cr, vap->va_type)) != 0) { 2494 1.1 haad ZFS_EXIT(zfsvfs); 2495 1.1 haad return (error); 2496 1.1 haad } 2497 1.27 chs } 2498 1.27 chs 2499 1.27 chs if ((error = zfs_acl_ids_create(dzp, 0, vap, cr, 2500 1.27 chs NULL, &acl_ids)) != 0) { 2501 1.27 chs ZFS_EXIT(zfsvfs); 2502 1.27 chs return (error); 2503 1.27 chs } 2504 1.1 haad 2505 1.1 haad /* 2506 1.1 haad * First make sure the new directory doesn't exist. 2507 1.27 chs * 2508 1.27 chs * Existence is checked first to make sure we don't return 2509 1.27 chs * EACCES instead of EEXIST which can cause some applications 2510 1.27 chs * to fail. 2511 1.1 haad */ 2512 1.1 haad *vpp = NULL; 2513 1.1 haad 2514 1.27 chs if (error = zfs_dirent_lookup(dzp, dirname, &zp, ZNEW)) { 2515 1.27 chs zfs_acl_ids_free(&acl_ids); 2516 1.1 haad ZFS_EXIT(zfsvfs); 2517 1.1 haad return (error); 2518 1.1 haad } 2519 1.27 chs ASSERT3P(zp, ==, NULL); 2520 1.1 haad 2521 1.1 haad if (error = zfs_zaccess(dzp, ACE_ADD_SUBDIRECTORY, 0, B_FALSE, cr)) { 2522 1.27 chs zfs_acl_ids_free(&acl_ids); 2523 1.1 haad ZFS_EXIT(zfsvfs); 2524 1.1 haad return (error); 2525 1.1 haad } 2526 1.1 haad 2527 1.4 haad if (zfs_acl_ids_overquota(zfsvfs, &acl_ids)) { 2528 1.4 haad zfs_acl_ids_free(&acl_ids); 2529 1.4 haad ZFS_EXIT(zfsvfs); 2530 1.27 chs return (SET_ERROR(EDQUOT)); 2531 1.1 haad } 2532 1.4 haad 2533 1.1 haad /* 2534 1.1 haad * Add a new entry to the directory. 2535 1.1 haad */ 2536 1.27 chs getnewvnode_reserve(1); 2537 1.1 haad tx = dmu_tx_create(zfsvfs->z_os); 2538 1.1 haad dmu_tx_hold_zap(tx, dzp->z_id, TRUE, dirname); 2539 1.1 haad dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL); 2540 1.4 haad fuid_dirtied = zfsvfs->z_fuid_dirty; 2541 1.4 haad if (fuid_dirtied) 2542 1.4 haad zfs_fuid_txhold(zfsvfs, tx); 2543 1.27 chs if (!zfsvfs->z_use_sa && acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) { 2544 1.27 chs dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, 2545 1.27 chs acl_ids.z_aclp->z_acl_bytes); 2546 1.27 chs } 2547 1.27 chs 2548 1.27 chs dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes + 2549 1.27 chs ZFS_SA_BASE_ATTR_SIZE); 2550 1.27 chs 2551 1.27 chs error = dmu_tx_assign(tx, TXG_WAIT); 2552 1.1 haad if (error) { 2553 1.4 haad zfs_acl_ids_free(&acl_ids); 2554 1.1 haad dmu_tx_abort(tx); 2555 1.27 chs getnewvnode_drop_reserve(); 2556 1.1 haad ZFS_EXIT(zfsvfs); 2557 1.1 haad return (error); 2558 1.1 haad } 2559 1.1 haad 2560 1.1 haad /* 2561 1.1 haad * Create new node. 2562 1.1 haad */ 2563 1.27 chs zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids); 2564 1.1 haad 2565 1.4 haad if (fuid_dirtied) 2566 1.4 haad zfs_fuid_sync(zfsvfs, tx); 2567 1.27 chs 2568 1.1 haad /* 2569 1.1 haad * Now put new name in parent dir. 2570 1.1 haad */ 2571 1.27 chs (void) zfs_link_create(dzp, dirname, zp, tx, ZNEW); 2572 1.1 haad 2573 1.1 haad *vpp = ZTOV(zp); 2574 1.1 haad 2575 1.27 chs txtype = zfs_log_create_txtype(Z_DIR, NULL, vap); 2576 1.27 chs zfs_log_create(zilog, tx, txtype, dzp, zp, dirname, NULL, 2577 1.4 haad acl_ids.z_fuidp, vap); 2578 1.1 haad 2579 1.4 haad zfs_acl_ids_free(&acl_ids); 2580 1.27 chs 2581 1.1 haad dmu_tx_commit(tx); 2582 1.1 haad 2583 1.27 chs getnewvnode_drop_reserve(); 2584 1.27 chs 2585 1.27 chs if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 2586 1.27 chs zil_commit(zilog, 0); 2587 1.1 haad 2588 1.1 haad ZFS_EXIT(zfsvfs); 2589 1.1 haad return (0); 2590 1.1 haad } 2591 1.1 haad 2592 1.1 haad /* 2593 1.1 haad * Remove a directory subdir entry. If the current working 2594 1.1 haad * directory is the same as the subdir to be removed, the 2595 1.1 haad * remove will fail. 2596 1.1 haad * 2597 1.1 haad * IN: dvp - vnode of directory to remove from. 2598 1.1 haad * name - name of directory to be removed. 2599 1.1 haad * cwd - vnode of current working directory. 2600 1.1 haad * cr - credentials of caller. 2601 1.1 haad * ct - caller context 2602 1.1 haad * flags - case flags 2603 1.1 haad * 2604 1.27 chs * RETURN: 0 on success, error code on failure. 2605 1.1 haad * 2606 1.1 haad * Timestamps: 2607 1.1 haad * dvp - ctime|mtime updated 2608 1.1 haad */ 2609 1.1 haad /*ARGSUSED*/ 2610 1.1 haad static int 2611 1.27 chs zfs_rmdir(vnode_t *dvp, vnode_t *vp, char *name, cred_t *cr) 2612 1.1 haad { 2613 1.1 haad znode_t *dzp = VTOZ(dvp); 2614 1.27 chs znode_t *zp = VTOZ(vp); 2615 1.1 haad zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 2616 1.1 haad zilog_t *zilog; 2617 1.1 haad dmu_tx_t *tx; 2618 1.1 haad int error; 2619 1.1 haad 2620 1.1 haad ZFS_ENTER(zfsvfs); 2621 1.1 haad ZFS_VERIFY_ZP(dzp); 2622 1.27 chs ZFS_VERIFY_ZP(zp); 2623 1.1 haad zilog = zfsvfs->z_log; 2624 1.1 haad 2625 1.1 haad 2626 1.1 haad if (error = zfs_zaccess_delete(dzp, zp, cr)) { 2627 1.1 haad goto out; 2628 1.1 haad } 2629 1.1 haad 2630 1.1 haad if (vp->v_type != VDIR) { 2631 1.27 chs error = SET_ERROR(ENOTDIR); 2632 1.1 haad goto out; 2633 1.1 haad } 2634 1.1 haad 2635 1.1 haad vnevent_rmdir(vp, dvp, name, ct); 2636 1.1 haad 2637 1.1 haad tx = dmu_tx_create(zfsvfs->z_os); 2638 1.1 haad dmu_tx_hold_zap(tx, dzp->z_id, FALSE, name); 2639 1.27 chs dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 2640 1.1 haad dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL); 2641 1.27 chs zfs_sa_upgrade_txholds(tx, zp); 2642 1.27 chs zfs_sa_upgrade_txholds(tx, dzp); 2643 1.27 chs dmu_tx_mark_netfree(tx); 2644 1.27 chs error = dmu_tx_assign(tx, TXG_WAIT); 2645 1.1 haad if (error) { 2646 1.1 haad dmu_tx_abort(tx); 2647 1.1 haad ZFS_EXIT(zfsvfs); 2648 1.1 haad return (error); 2649 1.1 haad } 2650 1.1 haad 2651 1.2 haad cache_purge(dvp); 2652 1.2 haad 2653 1.27 chs error = zfs_link_destroy(dzp, name, zp, tx, ZEXISTS, NULL); 2654 1.1 haad 2655 1.1 haad if (error == 0) { 2656 1.1 haad uint64_t txtype = TX_RMDIR; 2657 1.27 chs zfs_log_remove(zilog, tx, txtype, dzp, name, ZFS_NO_OBJECT); 2658 1.1 haad } 2659 1.1 haad 2660 1.1 haad dmu_tx_commit(tx); 2661 1.1 haad 2662 1.27 chs cache_purge(vp); 2663 1.1 haad out: 2664 1.27 chs if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 2665 1.27 chs zil_commit(zilog, 0); 2666 1.1 haad 2667 1.1 haad ZFS_EXIT(zfsvfs); 2668 1.1 haad return (error); 2669 1.1 haad } 2670 1.1 haad 2671 1.1 haad /* 2672 1.1 haad * Read as many directory entries as will fit into the provided 2673 1.1 haad * buffer from the given directory cursor position (specified in 2674 1.27 chs * the uio structure). 2675 1.1 haad * 2676 1.1 haad * IN: vp - vnode of directory to read. 2677 1.1 haad * uio - structure supplying read location, range info, 2678 1.1 haad * and return buffer. 2679 1.1 haad * cr - credentials of caller. 2680 1.1 haad * ct - caller context 2681 1.1 haad * flags - case flags 2682 1.1 haad * 2683 1.1 haad * OUT: uio - updated offset and range, buffer filled. 2684 1.1 haad * eofp - set to true if end-of-file detected. 2685 1.1 haad * 2686 1.27 chs * RETURN: 0 on success, error code on failure. 2687 1.1 haad * 2688 1.1 haad * Timestamps: 2689 1.1 haad * vp - atime updated 2690 1.1 haad * 2691 1.1 haad * Note that the low 4 bits of the cookie returned by zap is always zero. 2692 1.1 haad * This allows us to use the low range for "special" directory entries: 2693 1.1 haad * We use 0 for '.', and 1 for '..'. If this is the root of the filesystem, 2694 1.1 haad * we use the offset 2 for the '.zfs' directory. 2695 1.1 haad */ 2696 1.1 haad /* ARGSUSED */ 2697 1.1 haad static int 2698 1.27 chs zfs_readdir(vnode_t *vp, uio_t *uio, cred_t *cr, int *eofp, int *ncookies, off_t **cookies) 2699 1.1 haad { 2700 1.1 haad znode_t *zp = VTOZ(vp); 2701 1.1 haad iovec_t *iovp; 2702 1.1 haad edirent_t *eodp; 2703 1.1 haad dirent64_t *odp; 2704 1.1 haad zfsvfs_t *zfsvfs = zp->z_zfsvfs; 2705 1.1 haad objset_t *os; 2706 1.1 haad caddr_t outbuf; 2707 1.1 haad size_t bufsize; 2708 1.1 haad zap_cursor_t zc; 2709 1.1 haad zap_attribute_t zap; 2710 1.1 haad uint_t bytes_wanted; 2711 1.1 haad uint64_t offset; /* must be unsigned; checks for < 1 */ 2712 1.27 chs uint64_t parent; 2713 1.1 haad int local_eof; 2714 1.1 haad int outcount; 2715 1.1 haad int error; 2716 1.1 haad uint8_t prefetch; 2717 1.1 haad boolean_t check_sysattrs; 2718 1.2 haad uint8_t type; 2719 1.27 chs int ncooks = 0; 2720 1.27 chs off_t *cooks = NULL; 2721 1.2 haad int flags = 0; 2722 1.27 chs #ifdef __FreeBSD__ 2723 1.27 chs boolean_t user = uio->uio_segflg != UIO_SYSSPACE; 2724 1.27 chs #endif 2725 1.27 chs #ifdef __NetBSD__ 2726 1.27 chs boolean_t user = !VMSPACE_IS_KERNEL_P(uio->uio_vmspace); 2727 1.27 chs #endif 2728 1.1 haad 2729 1.1 haad ZFS_ENTER(zfsvfs); 2730 1.1 haad ZFS_VERIFY_ZP(zp); 2731 1.1 haad 2732 1.27 chs if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs), 2733 1.27 chs &parent, sizeof (parent))) != 0) { 2734 1.27 chs ZFS_EXIT(zfsvfs); 2735 1.27 chs return (error); 2736 1.27 chs } 2737 1.27 chs 2738 1.1 haad /* 2739 1.1 haad * If we are not given an eof variable, 2740 1.1 haad * use a local one. 2741 1.1 haad */ 2742 1.1 haad if (eofp == NULL) 2743 1.1 haad eofp = &local_eof; 2744 1.1 haad 2745 1.1 haad /* 2746 1.1 haad * Check for valid iov_len. 2747 1.1 haad */ 2748 1.1 haad if (uio->uio_iov->iov_len <= 0) { 2749 1.1 haad ZFS_EXIT(zfsvfs); 2750 1.27 chs return (SET_ERROR(EINVAL)); 2751 1.1 haad } 2752 1.1 haad 2753 1.1 haad /* 2754 1.1 haad * Quit if directory has been removed (posix) 2755 1.1 haad */ 2756 1.1 haad if ((*eofp = zp->z_unlinked) != 0) { 2757 1.1 haad ZFS_EXIT(zfsvfs); 2758 1.1 haad return (0); 2759 1.1 haad } 2760 1.1 haad 2761 1.1 haad error = 0; 2762 1.1 haad os = zfsvfs->z_os; 2763 1.1 haad offset = uio->uio_loffset; 2764 1.1 haad prefetch = zp->z_zn_prefetch; 2765 1.27 chs 2766 1.1 haad /* 2767 1.1 haad * Initialize the iterator cursor. 2768 1.1 haad */ 2769 1.1 haad if (offset <= 3) { 2770 1.1 haad /* 2771 1.1 haad * Start iteration from the beginning of the directory. 2772 1.1 haad */ 2773 1.1 haad zap_cursor_init(&zc, os, zp->z_id); 2774 1.1 haad } else { 2775 1.1 haad /* 2776 1.1 haad * The offset is a serialized cursor. 2777 1.1 haad */ 2778 1.1 haad zap_cursor_init_serialized(&zc, os, zp->z_id, offset); 2779 1.1 haad } 2780 1.1 haad 2781 1.1 haad /* 2782 1.1 haad * Get space to change directory entries into fs independent format. 2783 1.1 haad */ 2784 1.1 haad iovp = uio->uio_iov; 2785 1.1 haad bytes_wanted = iovp->iov_len; 2786 1.27 chs if (user || uio->uio_iovcnt != 1) { 2787 1.1 haad bufsize = bytes_wanted; 2788 1.1 haad outbuf = kmem_alloc(bufsize, KM_SLEEP); 2789 1.1 haad odp = (struct dirent64 *)outbuf; 2790 1.1 haad } else { 2791 1.1 haad bufsize = bytes_wanted; 2792 1.27 chs outbuf = NULL; 2793 1.1 haad odp = (struct dirent64 *)iovp->iov_base; 2794 1.1 haad } 2795 1.1 haad eodp = (struct edirent *)odp; 2796 1.1 haad 2797 1.2 haad if (ncookies != NULL) { 2798 1.2 haad /* 2799 1.2 haad * Minimum entry size is dirent size and 1 byte for a file name. 2800 1.2 haad */ 2801 1.27 chs #ifdef __FreeBSD__ 2802 1.27 chs ncooks = uio->uio_resid / (sizeof(struct dirent) - sizeof(((struct dirent *)NULL)->d_name) + 1); 2803 1.27 chs cooks = malloc(ncooks * sizeof(u_long), M_TEMP, M_WAITOK); 2804 1.27 chs #endif 2805 1.27 chs #ifdef __NetBSD__ 2806 1.2 haad ncooks = uio->uio_resid / _DIRENT_MINSIZE(odp); 2807 1.45 hannken cooks = malloc(ncooks * sizeof(off_t), M_TEMP, M_WAITOK); 2808 1.27 chs #endif 2809 1.2 haad *cookies = cooks; 2810 1.2 haad *ncookies = ncooks; 2811 1.2 haad } 2812 1.2 haad 2813 1.1 haad /* 2814 1.1 haad * If this VFS supports the system attribute view interface; and 2815 1.1 haad * we're looking at an extended attribute directory; and we care 2816 1.1 haad * about normalization conflicts on this vfs; then we must check 2817 1.1 haad * for normalization conflicts with the sysattr name space. 2818 1.1 haad */ 2819 1.2 haad #ifdef TODO 2820 1.1 haad check_sysattrs = vfs_has_feature(vp->v_vfsp, VFSFT_SYSATTR_VIEWS) && 2821 1.1 haad (vp->v_flag & V_XATTRDIR) && zfsvfs->z_norm && 2822 1.1 haad (flags & V_RDDIR_ENTFLAGS); 2823 1.2 haad #else 2824 1.2 haad check_sysattrs = 0; 2825 1.2 haad #endif 2826 1.1 haad 2827 1.1 haad /* 2828 1.1 haad * Transform to file-system independent format 2829 1.1 haad */ 2830 1.1 haad outcount = 0; 2831 1.1 haad while (outcount < bytes_wanted) { 2832 1.1 haad ino64_t objnum; 2833 1.1 haad ushort_t reclen; 2834 1.27 chs off64_t *next = NULL; 2835 1.1 haad 2836 1.1 haad /* 2837 1.1 haad * Special case `.', `..', and `.zfs'. 2838 1.1 haad */ 2839 1.1 haad if (offset == 0) { 2840 1.1 haad (void) strcpy(zap.za_name, "."); 2841 1.1 haad zap.za_normalization_conflict = 0; 2842 1.1 haad objnum = zp->z_id; 2843 1.2 haad type = DT_DIR; 2844 1.1 haad } else if (offset == 1) { 2845 1.1 haad (void) strcpy(zap.za_name, ".."); 2846 1.1 haad zap.za_normalization_conflict = 0; 2847 1.27 chs objnum = parent; 2848 1.2 haad type = DT_DIR; 2849 1.1 haad } else if (offset == 2 && zfs_show_ctldir(zp)) { 2850 1.1 haad (void) strcpy(zap.za_name, ZFS_CTLDIR_NAME); 2851 1.1 haad zap.za_normalization_conflict = 0; 2852 1.1 haad objnum = ZFSCTL_INO_ROOT; 2853 1.2 haad type = DT_DIR; 2854 1.1 haad } else { 2855 1.1 haad /* 2856 1.1 haad * Grab next entry. 2857 1.1 haad */ 2858 1.1 haad if (error = zap_cursor_retrieve(&zc, &zap)) { 2859 1.1 haad if ((*eofp = (error == ENOENT)) != 0) 2860 1.1 haad break; 2861 1.1 haad else 2862 1.1 haad goto update; 2863 1.1 haad } 2864 1.1 haad 2865 1.1 haad if (zap.za_integer_length != 8 || 2866 1.1 haad zap.za_num_integers != 1) { 2867 1.1 haad cmn_err(CE_WARN, "zap_readdir: bad directory " 2868 1.1 haad "entry, obj = %lld, offset = %lld\n", 2869 1.1 haad (u_longlong_t)zp->z_id, 2870 1.1 haad (u_longlong_t)offset); 2871 1.27 chs error = SET_ERROR(ENXIO); 2872 1.1 haad goto update; 2873 1.1 haad } 2874 1.1 haad 2875 1.1 haad objnum = ZFS_DIRENT_OBJ(zap.za_first_integer); 2876 1.1 haad /* 2877 1.1 haad * MacOS X can extract the object type here such as: 2878 1.1 haad * uint8_t type = ZFS_DIRENT_TYPE(zap.za_first_integer); 2879 1.1 haad */ 2880 1.2 haad type = ZFS_DIRENT_TYPE(zap.za_first_integer); 2881 1.1 haad 2882 1.1 haad if (check_sysattrs && !zap.za_normalization_conflict) { 2883 1.2 haad #ifdef TODO 2884 1.1 haad zap.za_normalization_conflict = 2885 1.1 haad xattr_sysattr_casechk(zap.za_name); 2886 1.2 haad #else 2887 1.2 haad panic("%s:%u: TODO", __func__, __LINE__); 2888 1.2 haad #endif 2889 1.1 haad } 2890 1.1 haad } 2891 1.1 haad 2892 1.4 haad if (flags & V_RDDIR_ACCFILTER) { 2893 1.4 haad /* 2894 1.4 haad * If we have no access at all, don't include 2895 1.4 haad * this entry in the returned information 2896 1.4 haad */ 2897 1.4 haad znode_t *ezp; 2898 1.4 haad if (zfs_zget(zp->z_zfsvfs, objnum, &ezp) != 0) 2899 1.4 haad goto skip_entry; 2900 1.4 haad if (!zfs_has_access(ezp, cr)) { 2901 1.27 chs vrele(ZTOV(ezp)); 2902 1.4 haad goto skip_entry; 2903 1.4 haad } 2904 1.27 chs vrele(ZTOV(ezp)); 2905 1.4 haad } 2906 1.4 haad 2907 1.1 haad if (flags & V_RDDIR_ENTFLAGS) 2908 1.1 haad reclen = EDIRENT_RECLEN(strlen(zap.za_name)); 2909 1.1 haad else 2910 1.27 chs reclen = DIRENT64_RECLEN(strlen(zap.za_name)); 2911 1.1 haad 2912 1.1 haad /* 2913 1.1 haad * Will this entry fit in the buffer? 2914 1.1 haad */ 2915 1.1 haad if (outcount + reclen > bufsize) { 2916 1.1 haad /* 2917 1.1 haad * Did we manage to fit anything in the buffer? 2918 1.1 haad */ 2919 1.1 haad if (!outcount) { 2920 1.27 chs error = SET_ERROR(EINVAL); 2921 1.1 haad goto update; 2922 1.1 haad } 2923 1.1 haad break; 2924 1.1 haad } 2925 1.1 haad if (flags & V_RDDIR_ENTFLAGS) { 2926 1.1 haad /* 2927 1.1 haad * Add extended flag entry: 2928 1.1 haad */ 2929 1.1 haad eodp->ed_ino = objnum; 2930 1.1 haad eodp->ed_reclen = reclen; 2931 1.1 haad /* NOTE: ed_off is the offset for the *next* entry */ 2932 1.1 haad next = &(eodp->ed_off); 2933 1.1 haad eodp->ed_eflags = zap.za_normalization_conflict ? 2934 1.1 haad ED_CASE_CONFLICT : 0; 2935 1.1 haad (void) strncpy(eodp->ed_name, zap.za_name, 2936 1.1 haad EDIRENT_NAMELEN(reclen)); 2937 1.1 haad eodp = (edirent_t *)((intptr_t)eodp + reclen); 2938 1.1 haad } else { 2939 1.1 haad /* 2940 1.1 haad * Add normal entry: 2941 1.1 haad */ 2942 1.1 haad odp->d_ino = objnum; 2943 1.1 haad odp->d_reclen = reclen; 2944 1.2 haad odp->d_namlen = strlen(zap.za_name); 2945 1.2 haad (void) strlcpy(odp->d_name, zap.za_name, odp->d_namlen + 1); 2946 1.2 haad odp->d_type = type; 2947 1.1 haad odp = (dirent64_t *)((intptr_t)odp + reclen); 2948 1.1 haad } 2949 1.1 haad outcount += reclen; 2950 1.1 haad 2951 1.27 chs ASSERT(outcount <= bufsize); 2952 1.1 haad 2953 1.1 haad /* Prefetch znode */ 2954 1.1 haad if (prefetch) 2955 1.27 chs dmu_prefetch(os, objnum, 0, 0, 0, 2956 1.27 chs ZIO_PRIORITY_SYNC_READ); 2957 1.1 haad 2958 1.4 haad skip_entry: 2959 1.1 haad /* 2960 1.1 haad * Move to the next entry, fill in the previous offset. 2961 1.1 haad */ 2962 1.1 haad if (offset > 2 || (offset == 2 && !zfs_show_ctldir(zp))) { 2963 1.1 haad zap_cursor_advance(&zc); 2964 1.1 haad offset = zap_cursor_serialize(&zc); 2965 1.1 haad } else { 2966 1.1 haad offset += 1; 2967 1.1 haad } 2968 1.2 haad 2969 1.2 haad if (cooks != NULL) { 2970 1.2 haad *cooks++ = offset; 2971 1.2 haad ncooks--; 2972 1.27 chs #ifdef __FreeBSD__ 2973 1.27 chs KASSERT(ncooks >= 0, ("ncookies=%d", ncooks)); 2974 1.27 chs #endif 2975 1.27 chs #ifdef __NetBSD__ 2976 1.27 chs KASSERTMSG(ncooks >= 0, "ncooks=%d", ncooks); 2977 1.27 chs #endif 2978 1.2 haad } 2979 1.1 haad } 2980 1.1 haad zp->z_zn_prefetch = B_FALSE; /* a lookup will re-enable pre-fetching */ 2981 1.1 haad 2982 1.2 haad /* Subtract unused cookies */ 2983 1.2 haad if (ncookies != NULL) 2984 1.2 haad *ncookies -= ncooks; 2985 1.2 haad 2986 1.27 chs if (!user && uio->uio_iovcnt == 1) { 2987 1.1 haad iovp->iov_base += outcount; 2988 1.1 haad iovp->iov_len -= outcount; 2989 1.1 haad uio->uio_resid -= outcount; 2990 1.27 chs } else if (error = uiomove(outbuf, (size_t)outcount, UIO_READ, uio)) { 2991 1.1 haad /* 2992 1.1 haad * Reset the pointer. 2993 1.1 haad */ 2994 1.1 haad offset = uio->uio_loffset; 2995 1.1 haad } 2996 1.1 haad 2997 1.1 haad update: 2998 1.1 haad zap_cursor_fini(&zc); 2999 1.27 chs if (user || uio->uio_iovcnt != 1) 3000 1.1 haad kmem_free(outbuf, bufsize); 3001 1.1 haad 3002 1.1 haad if (error == ENOENT) 3003 1.1 haad error = 0; 3004 1.1 haad 3005 1.1 haad ZFS_ACCESSTIME_STAMP(zfsvfs, zp); 3006 1.1 haad 3007 1.1 haad uio->uio_loffset = offset; 3008 1.1 haad ZFS_EXIT(zfsvfs); 3009 1.2 haad if (error != 0 && cookies != NULL) { 3010 1.27 chs #ifdef __FreeBSD__ 3011 1.27 chs free(*cookies, M_TEMP); 3012 1.27 chs #endif 3013 1.27 chs #ifdef __NetBSD__ 3014 1.27 chs kmem_free(*cookies, ncooks * sizeof(off_t)); 3015 1.27 chs #endif 3016 1.2 haad *cookies = NULL; 3017 1.2 haad *ncookies = 0; 3018 1.2 haad } 3019 1.1 haad return (error); 3020 1.1 haad } 3021 1.1 haad 3022 1.1 haad static int 3023 1.1 haad zfs_fsync(vnode_t *vp, int syncflag, cred_t *cr, caller_context_t *ct) 3024 1.1 haad { 3025 1.1 haad znode_t *zp = VTOZ(vp); 3026 1.1 haad zfsvfs_t *zfsvfs = zp->z_zfsvfs; 3027 1.1 haad 3028 1.27 chs if (zfsvfs->z_os->os_sync != ZFS_SYNC_DISABLED) { 3029 1.27 chs ZFS_ENTER(zfsvfs); 3030 1.27 chs ZFS_VERIFY_ZP(zp); 3031 1.2 haad 3032 1.27 chs #ifdef __NetBSD__ 3033 1.27 chs if (!zp->z_unlinked) 3034 1.27 chs #endif 3035 1.27 chs zil_commit(zfsvfs->z_log, zp->z_id); 3036 1.27 chs ZFS_EXIT(zfsvfs); 3037 1.2 haad } 3038 1.1 haad return (0); 3039 1.1 haad } 3040 1.1 haad 3041 1.1 haad 3042 1.1 haad /* 3043 1.1 haad * Get the requested file attributes and place them in the provided 3044 1.1 haad * vattr structure. 3045 1.1 haad * 3046 1.1 haad * IN: vp - vnode of file. 3047 1.1 haad * vap - va_mask identifies requested attributes. 3048 1.1 haad * If AT_XVATTR set, then optional attrs are requested 3049 1.1 haad * flags - ATTR_NOACLCHECK (CIFS server context) 3050 1.1 haad * cr - credentials of caller. 3051 1.1 haad * ct - caller context 3052 1.1 haad * 3053 1.1 haad * OUT: vap - attribute values. 3054 1.1 haad * 3055 1.27 chs * RETURN: 0 (always succeeds). 3056 1.1 haad */ 3057 1.1 haad /* ARGSUSED */ 3058 1.1 haad static int 3059 1.1 haad zfs_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr, 3060 1.1 haad caller_context_t *ct) 3061 1.1 haad { 3062 1.1 haad znode_t *zp = VTOZ(vp); 3063 1.1 haad zfsvfs_t *zfsvfs = zp->z_zfsvfs; 3064 1.1 haad int error = 0; 3065 1.2 haad uint32_t blksize; 3066 1.2 haad u_longlong_t nblocks; 3067 1.1 haad uint64_t links; 3068 1.27 chs uint64_t mtime[2], ctime[2], crtime[2], rdev; 3069 1.1 haad xvattr_t *xvap = (xvattr_t *)vap; /* vap may be an xvattr_t * */ 3070 1.1 haad xoptattr_t *xoap = NULL; 3071 1.1 haad boolean_t skipaclchk = (flags & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE; 3072 1.27 chs sa_bulk_attr_t bulk[4]; 3073 1.27 chs int count = 0; 3074 1.1 haad 3075 1.1 haad ZFS_ENTER(zfsvfs); 3076 1.1 haad ZFS_VERIFY_ZP(zp); 3077 1.27 chs 3078 1.27 chs zfs_fuid_map_ids(zp, cr, &vap->va_uid, &vap->va_gid); 3079 1.27 chs 3080 1.27 chs SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, &mtime, 16); 3081 1.27 chs SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, &ctime, 16); 3082 1.27 chs SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CRTIME(zfsvfs), NULL, &crtime, 16); 3083 1.27 chs if (vp->v_type == VBLK || vp->v_type == VCHR) 3084 1.27 chs SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_RDEV(zfsvfs), NULL, 3085 1.27 chs &rdev, 8); 3086 1.27 chs 3087 1.27 chs if ((error = sa_bulk_lookup(zp->z_sa_hdl, bulk, count)) != 0) { 3088 1.27 chs ZFS_EXIT(zfsvfs); 3089 1.27 chs return (error); 3090 1.27 chs } 3091 1.1 haad 3092 1.1 haad /* 3093 1.1 haad * If ACL is trivial don't bother looking for ACE_READ_ATTRIBUTES. 3094 1.1 haad * Also, if we are the owner don't bother, since owner should 3095 1.1 haad * always be allowed to read basic attributes of file. 3096 1.1 haad */ 3097 1.27 chs if (!(zp->z_pflags & ZFS_ACL_TRIVIAL) && 3098 1.27 chs (vap->va_uid != crgetuid(cr))) { 3099 1.1 haad if (error = zfs_zaccess(zp, ACE_READ_ATTRIBUTES, 0, 3100 1.1 haad skipaclchk, cr)) { 3101 1.1 haad ZFS_EXIT(zfsvfs); 3102 1.1 haad return (error); 3103 1.1 haad } 3104 1.1 haad } 3105 1.1 haad 3106 1.1 haad /* 3107 1.1 haad * Return all attributes. It's cheaper to provide the answer 3108 1.1 haad * than to determine whether we were asked the question. 3109 1.1 haad */ 3110 1.27 chs 3111 1.27 chs vap->va_type = IFTOVT(zp->z_mode); 3112 1.27 chs vap->va_mode = zp->z_mode & ~S_IFMT; 3113 1.27 chs #ifdef illumos 3114 1.27 chs vap->va_fsid = zp->z_zfsvfs->z_vfs->vfs_dev; 3115 1.27 chs #endif 3116 1.27 chs #ifdef __FreeBSD__ 3117 1.27 chs vap->va_fsid = vp->v_mount->mnt_stat.f_fsid.val[0]; 3118 1.44 hannken vap->va_nodeid = zp->z_id; 3119 1.27 chs #endif 3120 1.27 chs #ifdef __NetBSD__ 3121 1.98 yamt /* 3122 1.98 yamt * note: f_fsid is a signed long. 3123 1.98 yamt * we don't want sign extension here. 3124 1.98 yamt */ 3125 1.98 yamt vap->va_fsid = (uint32_t)vp->v_mount->mnt_stat.f_fsid; 3126 1.44 hannken vap->va_nodeid = zp->z_id; 3127 1.44 hannken /* 3128 1.44 hannken * If we are a snapshot mounted under .zfs, return 3129 1.44 hannken * the object id of the snapshot to make getcwd happy. 3130 1.44 hannken */ 3131 1.44 hannken if (zp->z_id == zfsvfs->z_root && zfsvfs->z_parent != zfsvfs) { 3132 1.44 hannken vnode_t *cvp = vp->v_mount->mnt_vnodecovered; 3133 1.44 hannken 3134 1.44 hannken if (cvp && zfsctl_is_node(cvp)) 3135 1.44 hannken vap->va_nodeid = dmu_objset_id(zfsvfs->z_os); 3136 1.44 hannken } 3137 1.27 chs #endif 3138 1.1 haad if ((vp->v_flag & VROOT) && zfs_show_ctldir(zp)) 3139 1.27 chs links = zp->z_links + 1; 3140 1.1 haad else 3141 1.27 chs links = zp->z_links; 3142 1.75 simonb /* XXX NetBSD: use LINK_MAX when that value matches 32-bit nlink_t */ 3143 1.75 simonb vap->va_nlink = MIN(links, UINT32_MAX); /* nlink_t limit! */ 3144 1.27 chs vap->va_size = zp->z_size; 3145 1.27 chs #ifdef illumos 3146 1.27 chs vap->va_rdev = vp->v_rdev; 3147 1.27 chs #else 3148 1.27 chs if (vp->v_type == VBLK || vp->v_type == VCHR) 3149 1.27 chs vap->va_rdev = zfs_cmpldev(rdev); 3150 1.27 chs #endif 3151 1.1 haad vap->va_seq = zp->z_seq; 3152 1.2 haad vap->va_flags = 0; /* FreeBSD: Reset chflags(2) flags. */ 3153 1.27 chs vap->va_filerev = zp->z_seq; 3154 1.1 haad 3155 1.1 haad /* 3156 1.1 haad * Add in any requested optional attributes and the create time. 3157 1.1 haad * Also set the corresponding bits in the returned attribute bitmap. 3158 1.1 haad */ 3159 1.1 haad if ((xoap = xva_getxoptattr(xvap)) != NULL && zfsvfs->z_use_fuids) { 3160 1.1 haad if (XVA_ISSET_REQ(xvap, XAT_ARCHIVE)) { 3161 1.1 haad xoap->xoa_archive = 3162 1.27 chs ((zp->z_pflags & ZFS_ARCHIVE) != 0); 3163 1.1 haad XVA_SET_RTN(xvap, XAT_ARCHIVE); 3164 1.1 haad } 3165 1.1 haad 3166 1.1 haad if (XVA_ISSET_REQ(xvap, XAT_READONLY)) { 3167 1.1 haad xoap->xoa_readonly = 3168 1.27 chs ((zp->z_pflags & ZFS_READONLY) != 0); 3169 1.1 haad XVA_SET_RTN(xvap, XAT_READONLY); 3170 1.1 haad } 3171 1.1 haad 3172 1.1 haad if (XVA_ISSET_REQ(xvap, XAT_SYSTEM)) { 3173 1.1 haad xoap->xoa_system = 3174 1.27 chs ((zp->z_pflags & ZFS_SYSTEM) != 0); 3175 1.1 haad XVA_SET_RTN(xvap, XAT_SYSTEM); 3176 1.1 haad } 3177 1.1 haad 3178 1.1 haad if (XVA_ISSET_REQ(xvap, XAT_HIDDEN)) { 3179 1.1 haad xoap->xoa_hidden = 3180 1.27 chs ((zp->z_pflags & ZFS_HIDDEN) != 0); 3181 1.1 haad XVA_SET_RTN(xvap, XAT_HIDDEN); 3182 1.1 haad } 3183 1.1 haad 3184 1.1 haad if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) { 3185 1.1 haad xoap->xoa_nounlink = 3186 1.27 chs ((zp->z_pflags & ZFS_NOUNLINK) != 0); 3187 1.1 haad XVA_SET_RTN(xvap, XAT_NOUNLINK); 3188 1.1 haad } 3189 1.1 haad 3190 1.1 haad if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) { 3191 1.1 haad xoap->xoa_immutable = 3192 1.27 chs ((zp->z_pflags & ZFS_IMMUTABLE) != 0); 3193 1.1 haad XVA_SET_RTN(xvap, XAT_IMMUTABLE); 3194 1.1 haad } 3195 1.1 haad 3196 1.1 haad if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) { 3197 1.1 haad xoap->xoa_appendonly = 3198 1.27 chs ((zp->z_pflags & ZFS_APPENDONLY) != 0); 3199 1.1 haad XVA_SET_RTN(xvap, XAT_APPENDONLY); 3200 1.1 haad } 3201 1.1 haad 3202 1.1 haad if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) { 3203 1.1 haad xoap->xoa_nodump = 3204 1.27 chs ((zp->z_pflags & ZFS_NODUMP) != 0); 3205 1.1 haad XVA_SET_RTN(xvap, XAT_NODUMP); 3206 1.1 haad } 3207 1.1 haad 3208 1.1 haad if (XVA_ISSET_REQ(xvap, XAT_OPAQUE)) { 3209 1.1 haad xoap->xoa_opaque = 3210 1.27 chs ((zp->z_pflags & ZFS_OPAQUE) != 0); 3211 1.1 haad XVA_SET_RTN(xvap, XAT_OPAQUE); 3212 1.1 haad } 3213 1.1 haad 3214 1.1 haad if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) { 3215 1.1 haad xoap->xoa_av_quarantined = 3216 1.27 chs ((zp->z_pflags & ZFS_AV_QUARANTINED) != 0); 3217 1.1 haad XVA_SET_RTN(xvap, XAT_AV_QUARANTINED); 3218 1.1 haad } 3219 1.1 haad 3220 1.1 haad if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) { 3221 1.1 haad xoap->xoa_av_modified = 3222 1.27 chs ((zp->z_pflags & ZFS_AV_MODIFIED) != 0); 3223 1.1 haad XVA_SET_RTN(xvap, XAT_AV_MODIFIED); 3224 1.1 haad } 3225 1.1 haad 3226 1.1 haad if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP) && 3227 1.27 chs vp->v_type == VREG) { 3228 1.27 chs zfs_sa_get_scanstamp(zp, xvap); 3229 1.27 chs } 3230 1.1 haad 3231 1.27 chs if (XVA_ISSET_REQ(xvap, XAT_REPARSE)) { 3232 1.27 chs xoap->xoa_reparse = ((zp->z_pflags & ZFS_REPARSE) != 0); 3233 1.27 chs XVA_SET_RTN(xvap, XAT_REPARSE); 3234 1.27 chs } 3235 1.27 chs if (XVA_ISSET_REQ(xvap, XAT_GEN)) { 3236 1.27 chs xoap->xoa_generation = zp->z_gen; 3237 1.27 chs XVA_SET_RTN(xvap, XAT_GEN); 3238 1.1 haad } 3239 1.1 haad 3240 1.27 chs if (XVA_ISSET_REQ(xvap, XAT_OFFLINE)) { 3241 1.27 chs xoap->xoa_offline = 3242 1.27 chs ((zp->z_pflags & ZFS_OFFLINE) != 0); 3243 1.27 chs XVA_SET_RTN(xvap, XAT_OFFLINE); 3244 1.1 haad } 3245 1.4 haad 3246 1.27 chs if (XVA_ISSET_REQ(xvap, XAT_SPARSE)) { 3247 1.27 chs xoap->xoa_sparse = 3248 1.27 chs ((zp->z_pflags & ZFS_SPARSE) != 0); 3249 1.27 chs XVA_SET_RTN(xvap, XAT_SPARSE); 3250 1.4 haad } 3251 1.1 haad } 3252 1.1 haad 3253 1.27 chs ZFS_TIME_DECODE(&vap->va_atime, zp->z_atime); 3254 1.27 chs ZFS_TIME_DECODE(&vap->va_mtime, mtime); 3255 1.27 chs ZFS_TIME_DECODE(&vap->va_ctime, ctime); 3256 1.27 chs ZFS_TIME_DECODE(&vap->va_birthtime, crtime); 3257 1.1 haad 3258 1.1 haad 3259 1.27 chs sa_object_size(zp->z_sa_hdl, &blksize, &nblocks); 3260 1.2 haad vap->va_blksize = blksize; 3261 1.2 haad vap->va_bytes = nblocks << 9; /* nblocks * 512 */ 3262 1.1 haad 3263 1.1 haad if (zp->z_blksz == 0) { 3264 1.1 haad /* 3265 1.1 haad * Block size hasn't been set; suggest maximal I/O transfers. 3266 1.1 haad */ 3267 1.1 haad vap->va_blksize = zfsvfs->z_max_blksz; 3268 1.1 haad } 3269 1.1 haad 3270 1.1 haad ZFS_EXIT(zfsvfs); 3271 1.1 haad return (0); 3272 1.1 haad } 3273 1.1 haad 3274 1.1 haad /* 3275 1.1 haad * Set the file attributes to the values contained in the 3276 1.1 haad * vattr structure. 3277 1.1 haad * 3278 1.1 haad * IN: vp - vnode of file to be modified. 3279 1.1 haad * vap - new attribute values. 3280 1.1 haad * If AT_XVATTR set, then optional attrs are being set 3281 1.1 haad * flags - ATTR_UTIME set if non-default time values provided. 3282 1.1 haad * - ATTR_NOACLCHECK (CIFS context only). 3283 1.1 haad * cr - credentials of caller. 3284 1.1 haad * ct - caller context 3285 1.1 haad * 3286 1.27 chs * RETURN: 0 on success, error code on failure. 3287 1.27 chs * 3288 1.1 haad * Timestamps: 3289 1.1 haad * vp - ctime updated, mtime updated if size changed. 3290 1.1 haad */ 3291 1.1 haad /* ARGSUSED */ 3292 1.1 haad static int 3293 1.1 haad zfs_setattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr, 3294 1.27 chs caller_context_t *ct) 3295 1.1 haad { 3296 1.1 haad znode_t *zp = VTOZ(vp); 3297 1.1 haad zfsvfs_t *zfsvfs = zp->z_zfsvfs; 3298 1.1 haad zilog_t *zilog; 3299 1.1 haad dmu_tx_t *tx; 3300 1.1 haad vattr_t oldva; 3301 1.4 haad xvattr_t tmpxvattr; 3302 1.1 haad uint_t mask = vap->va_mask; 3303 1.27 chs uint_t saved_mask = 0; 3304 1.27 chs uint64_t saved_mode; 3305 1.1 haad int trim_mask = 0; 3306 1.1 haad uint64_t new_mode; 3307 1.4 haad uint64_t new_uid, new_gid; 3308 1.27 chs uint64_t xattr_obj; 3309 1.27 chs uint64_t mtime[2], ctime[2]; 3310 1.1 haad znode_t *attrzp; 3311 1.1 haad int need_policy = FALSE; 3312 1.27 chs int err, err2; 3313 1.1 haad zfs_fuid_info_t *fuidp = NULL; 3314 1.1 haad xvattr_t *xvap = (xvattr_t *)vap; /* vap may be an xvattr_t * */ 3315 1.1 haad xoptattr_t *xoap; 3316 1.27 chs zfs_acl_t *aclp; 3317 1.1 haad boolean_t skipaclchk = (flags & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE; 3318 1.27 chs boolean_t fuid_dirtied = B_FALSE; 3319 1.27 chs sa_bulk_attr_t bulk[7], xattr_bulk[7]; 3320 1.27 chs int count = 0, xattr_count = 0; 3321 1.2 haad 3322 1.1 haad if (mask == 0) 3323 1.1 haad return (0); 3324 1.1 haad 3325 1.1 haad if (mask & AT_NOSET) 3326 1.27 chs return (SET_ERROR(EINVAL)); 3327 1.1 haad 3328 1.1 haad ZFS_ENTER(zfsvfs); 3329 1.1 haad ZFS_VERIFY_ZP(zp); 3330 1.1 haad 3331 1.1 haad zilog = zfsvfs->z_log; 3332 1.1 haad 3333 1.1 haad /* 3334 1.1 haad * Make sure that if we have ephemeral uid/gid or xvattr specified 3335 1.1 haad * that file system is at proper version level 3336 1.1 haad */ 3337 1.1 haad 3338 1.1 haad if (zfsvfs->z_use_fuids == B_FALSE && 3339 1.1 haad (((mask & AT_UID) && IS_EPHEMERAL(vap->va_uid)) || 3340 1.1 haad ((mask & AT_GID) && IS_EPHEMERAL(vap->va_gid)) || 3341 1.1 haad (mask & AT_XVATTR))) { 3342 1.1 haad ZFS_EXIT(zfsvfs); 3343 1.27 chs return (SET_ERROR(EINVAL)); 3344 1.1 haad } 3345 1.1 haad 3346 1.1 haad if (mask & AT_SIZE && vp->v_type == VDIR) { 3347 1.1 haad ZFS_EXIT(zfsvfs); 3348 1.27 chs return (SET_ERROR(EISDIR)); 3349 1.1 haad } 3350 1.1 haad 3351 1.1 haad if (mask & AT_SIZE && vp->v_type != VREG && vp->v_type != VFIFO) { 3352 1.1 haad ZFS_EXIT(zfsvfs); 3353 1.27 chs return (SET_ERROR(EINVAL)); 3354 1.1 haad } 3355 1.1 haad 3356 1.1 haad /* 3357 1.1 haad * If this is an xvattr_t, then get a pointer to the structure of 3358 1.1 haad * optional attributes. If this is NULL, then we have a vattr_t. 3359 1.1 haad */ 3360 1.1 haad xoap = xva_getxoptattr(xvap); 3361 1.1 haad 3362 1.4 haad xva_init(&tmpxvattr); 3363 1.4 haad 3364 1.1 haad /* 3365 1.1 haad * Immutable files can only alter immutable bit and atime 3366 1.1 haad */ 3367 1.27 chs if ((zp->z_pflags & ZFS_IMMUTABLE) && 3368 1.1 haad ((mask & (AT_SIZE|AT_UID|AT_GID|AT_MTIME|AT_MODE)) || 3369 1.1 haad ((mask & AT_XVATTR) && XVA_ISSET_REQ(xvap, XAT_CREATETIME)))) { 3370 1.1 haad ZFS_EXIT(zfsvfs); 3371 1.27 chs return (SET_ERROR(EPERM)); 3372 1.1 haad } 3373 1.1 haad 3374 1.27 chs if ((mask & AT_SIZE) && (zp->z_pflags & ZFS_READONLY)) { 3375 1.1 haad ZFS_EXIT(zfsvfs); 3376 1.27 chs return (SET_ERROR(EPERM)); 3377 1.1 haad } 3378 1.1 haad 3379 1.1 haad /* 3380 1.1 haad * Verify timestamps doesn't overflow 32 bits. 3381 1.1 haad * ZFS can handle large timestamps, but 32bit syscalls can't 3382 1.1 haad * handle times greater than 2039. This check should be removed 3383 1.1 haad * once large timestamps are fully supported. 3384 1.1 haad */ 3385 1.1 haad if (mask & (AT_ATIME | AT_MTIME)) { 3386 1.1 haad if (((mask & AT_ATIME) && TIMESPEC_OVERFLOW(&vap->va_atime)) || 3387 1.1 haad ((mask & AT_MTIME) && TIMESPEC_OVERFLOW(&vap->va_mtime))) { 3388 1.1 haad ZFS_EXIT(zfsvfs); 3389 1.27 chs return (SET_ERROR(EOVERFLOW)); 3390 1.1 haad } 3391 1.1 haad } 3392 1.27 chs if (xoap && (mask & AT_XVATTR) && XVA_ISSET_REQ(xvap, XAT_CREATETIME) && 3393 1.27 chs TIMESPEC_OVERFLOW(&vap->va_birthtime)) { 3394 1.27 chs ZFS_EXIT(zfsvfs); 3395 1.27 chs return (SET_ERROR(EOVERFLOW)); 3396 1.27 chs } 3397 1.1 haad 3398 1.1 haad attrzp = NULL; 3399 1.27 chs aclp = NULL; 3400 1.1 haad 3401 1.4 haad /* Can this be moved to before the top label? */ 3402 1.1 haad if (zfsvfs->z_vfs->vfs_flag & VFS_RDONLY) { 3403 1.1 haad ZFS_EXIT(zfsvfs); 3404 1.27 chs return (SET_ERROR(EROFS)); 3405 1.1 haad } 3406 1.1 haad 3407 1.1 haad /* 3408 1.1 haad * First validate permissions 3409 1.1 haad */ 3410 1.27 chs 3411 1.1 haad if (mask & AT_SIZE) { 3412 1.1 haad /* 3413 1.1 haad * XXX - Note, we are not providing any open 3414 1.1 haad * mode flags here (like FNDELAY), so we may 3415 1.1 haad * block if there are locks present... this 3416 1.1 haad * should be addressed in openat(). 3417 1.1 haad */ 3418 1.1 haad /* XXX - would it be OK to generate a log record here? */ 3419 1.1 haad err = zfs_freesp(zp, vap->va_size, 0, 0, FALSE); 3420 1.1 haad if (err) { 3421 1.1 haad ZFS_EXIT(zfsvfs); 3422 1.1 haad return (err); 3423 1.1 haad } 3424 1.1 haad } 3425 1.27 chs 3426 1.1 haad if (mask & (AT_ATIME|AT_MTIME) || 3427 1.1 haad ((mask & AT_XVATTR) && (XVA_ISSET_REQ(xvap, XAT_HIDDEN) || 3428 1.1 haad XVA_ISSET_REQ(xvap, XAT_READONLY) || 3429 1.1 haad XVA_ISSET_REQ(xvap, XAT_ARCHIVE) || 3430 1.27 chs XVA_ISSET_REQ(xvap, XAT_OFFLINE) || 3431 1.27 chs XVA_ISSET_REQ(xvap, XAT_SPARSE) || 3432 1.1 haad XVA_ISSET_REQ(xvap, XAT_CREATETIME) || 3433 1.27 chs XVA_ISSET_REQ(xvap, XAT_SYSTEM)))) { 3434 1.1 haad need_policy = zfs_zaccess(zp, ACE_WRITE_ATTRIBUTES, 0, 3435 1.1 haad skipaclchk, cr); 3436 1.27 chs } 3437 1.1 haad 3438 1.1 haad if (mask & (AT_UID|AT_GID)) { 3439 1.1 haad int idmask = (mask & (AT_UID|AT_GID)); 3440 1.1 haad int take_owner; 3441 1.1 haad int take_group; 3442 1.1 haad 3443 1.1 haad /* 3444 1.1 haad * NOTE: even if a new mode is being set, 3445 1.1 haad * we may clear S_ISUID/S_ISGID bits. 3446 1.1 haad */ 3447 1.1 haad 3448 1.1 haad if (!(mask & AT_MODE)) 3449 1.27 chs vap->va_mode = zp->z_mode; 3450 1.1 haad 3451 1.1 haad /* 3452 1.1 haad * Take ownership or chgrp to group we are a member of 3453 1.1 haad */ 3454 1.1 haad 3455 1.1 haad take_owner = (mask & AT_UID) && (vap->va_uid == crgetuid(cr)); 3456 1.1 haad take_group = (mask & AT_GID) && 3457 1.1 haad zfs_groupmember(zfsvfs, vap->va_gid, cr); 3458 1.1 haad 3459 1.1 haad /* 3460 1.1 haad * If both AT_UID and AT_GID are set then take_owner and 3461 1.1 haad * take_group must both be set in order to allow taking 3462 1.1 haad * ownership. 3463 1.1 haad * 3464 1.1 haad * Otherwise, send the check through secpolicy_vnode_setattr() 3465 1.1 haad * 3466 1.1 haad */ 3467 1.27 chs 3468 1.1 haad if (((idmask == (AT_UID|AT_GID)) && take_owner && take_group) || 3469 1.1 haad ((idmask == AT_UID) && take_owner) || 3470 1.1 haad ((idmask == AT_GID) && take_group)) { 3471 1.1 haad if (zfs_zaccess(zp, ACE_WRITE_OWNER, 0, 3472 1.1 haad skipaclchk, cr) == 0) { 3473 1.1 haad /* 3474 1.1 haad * Remove setuid/setgid for non-privileged users 3475 1.1 haad */ 3476 1.27 chs secpolicy_setid_clear(vap, vp, cr); 3477 1.1 haad trim_mask = (mask & (AT_UID|AT_GID)); 3478 1.1 haad } else { 3479 1.1 haad need_policy = TRUE; 3480 1.1 haad } 3481 1.1 haad } else { 3482 1.1 haad need_policy = TRUE; 3483 1.1 haad } 3484 1.1 haad } 3485 1.1 haad 3486 1.27 chs oldva.va_mode = zp->z_mode; 3487 1.1 haad zfs_fuid_map_ids(zp, cr, &oldva.va_uid, &oldva.va_gid); 3488 1.1 haad if (mask & AT_XVATTR) { 3489 1.4 haad /* 3490 1.4 haad * Update xvattr mask to include only those attributes 3491 1.4 haad * that are actually changing. 3492 1.4 haad * 3493 1.4 haad * the bits will be restored prior to actually setting 3494 1.4 haad * the attributes so the caller thinks they were set. 3495 1.4 haad */ 3496 1.4 haad if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) { 3497 1.4 haad if (xoap->xoa_appendonly != 3498 1.27 chs ((zp->z_pflags & ZFS_APPENDONLY) != 0)) { 3499 1.4 haad need_policy = TRUE; 3500 1.4 haad } else { 3501 1.4 haad XVA_CLR_REQ(xvap, XAT_APPENDONLY); 3502 1.4 haad XVA_SET_REQ(&tmpxvattr, XAT_APPENDONLY); 3503 1.4 haad } 3504 1.4 haad } 3505 1.4 haad 3506 1.4 haad if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) { 3507 1.4 haad if (xoap->xoa_nounlink != 3508 1.27 chs ((zp->z_pflags & ZFS_NOUNLINK) != 0)) { 3509 1.4 haad need_policy = TRUE; 3510 1.4 haad } else { 3511 1.4 haad XVA_CLR_REQ(xvap, XAT_NOUNLINK); 3512 1.4 haad XVA_SET_REQ(&tmpxvattr, XAT_NOUNLINK); 3513 1.4 haad } 3514 1.4 haad } 3515 1.4 haad 3516 1.4 haad if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) { 3517 1.4 haad if (xoap->xoa_immutable != 3518 1.27 chs ((zp->z_pflags & ZFS_IMMUTABLE) != 0)) { 3519 1.4 haad need_policy = TRUE; 3520 1.4 haad } else { 3521 1.4 haad XVA_CLR_REQ(xvap, XAT_IMMUTABLE); 3522 1.4 haad XVA_SET_REQ(&tmpxvattr, XAT_IMMUTABLE); 3523 1.4 haad } 3524 1.4 haad } 3525 1.4 haad 3526 1.4 haad if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) { 3527 1.4 haad if (xoap->xoa_nodump != 3528 1.27 chs ((zp->z_pflags & ZFS_NODUMP) != 0)) { 3529 1.77 simonb #if 0 3530 1.77 simonb /* 3531 1.77 simonb * XXXSB - zfs_netbsd_setattr() 3532 1.77 simonb * has already checked if this 3533 1.77 simonb * request is authorised, and our 3534 1.77 simonb * secpolicy_xvattr() doesn't check 3535 1.77 simonb * kauth chflags. Fix this when we 3536 1.77 simonb * migrate to openzfs. 3537 1.77 simonb */ 3538 1.4 haad need_policy = TRUE; 3539 1.77 simonb #endif 3540 1.4 haad } else { 3541 1.4 haad XVA_CLR_REQ(xvap, XAT_NODUMP); 3542 1.4 haad XVA_SET_REQ(&tmpxvattr, XAT_NODUMP); 3543 1.4 haad } 3544 1.4 haad } 3545 1.4 haad 3546 1.4 haad if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) { 3547 1.4 haad if (xoap->xoa_av_modified != 3548 1.27 chs ((zp->z_pflags & ZFS_AV_MODIFIED) != 0)) { 3549 1.4 haad need_policy = TRUE; 3550 1.4 haad } else { 3551 1.4 haad XVA_CLR_REQ(xvap, XAT_AV_MODIFIED); 3552 1.4 haad XVA_SET_REQ(&tmpxvattr, XAT_AV_MODIFIED); 3553 1.4 haad } 3554 1.4 haad } 3555 1.4 haad 3556 1.4 haad if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) { 3557 1.4 haad if ((vp->v_type != VREG && 3558 1.4 haad xoap->xoa_av_quarantined) || 3559 1.4 haad xoap->xoa_av_quarantined != 3560 1.27 chs ((zp->z_pflags & ZFS_AV_QUARANTINED) != 0)) { 3561 1.4 haad need_policy = TRUE; 3562 1.4 haad } else { 3563 1.4 haad XVA_CLR_REQ(xvap, XAT_AV_QUARANTINED); 3564 1.4 haad XVA_SET_REQ(&tmpxvattr, XAT_AV_QUARANTINED); 3565 1.4 haad } 3566 1.4 haad } 3567 1.4 haad 3568 1.4 haad if (XVA_ISSET_REQ(xvap, XAT_REPARSE)) { 3569 1.4 haad ZFS_EXIT(zfsvfs); 3570 1.27 chs return (SET_ERROR(EPERM)); 3571 1.4 haad } 3572 1.4 haad 3573 1.4 haad if (need_policy == FALSE && 3574 1.4 haad (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP) || 3575 1.4 haad XVA_ISSET_REQ(xvap, XAT_OPAQUE))) { 3576 1.1 haad need_policy = TRUE; 3577 1.1 haad } 3578 1.1 haad } 3579 1.4 haad 3580 1.1 haad if (mask & AT_MODE) { 3581 1.1 haad if (zfs_zaccess(zp, ACE_WRITE_ACL, 0, skipaclchk, cr) == 0) { 3582 1.1 haad err = secpolicy_setid_setsticky_clear(vp, vap, 3583 1.1 haad &oldva, cr); 3584 1.1 haad if (err) { 3585 1.1 haad ZFS_EXIT(zfsvfs); 3586 1.1 haad return (err); 3587 1.1 haad } 3588 1.1 haad trim_mask |= AT_MODE; 3589 1.1 haad } else { 3590 1.1 haad need_policy = TRUE; 3591 1.1 haad } 3592 1.1 haad } 3593 1.1 haad 3594 1.1 haad if (need_policy) { 3595 1.1 haad /* 3596 1.1 haad * If trim_mask is set then take ownership 3597 1.1 haad * has been granted or write_acl is present and user 3598 1.1 haad * has the ability to modify mode. In that case remove 3599 1.1 haad * UID|GID and or MODE from mask so that 3600 1.1 haad * secpolicy_vnode_setattr() doesn't revoke it. 3601 1.1 haad */ 3602 1.1 haad 3603 1.1 haad if (trim_mask) { 3604 1.1 haad saved_mask = vap->va_mask; 3605 1.1 haad vap->va_mask &= ~trim_mask; 3606 1.27 chs if (trim_mask & AT_MODE) { 3607 1.27 chs /* 3608 1.27 chs * Save the mode, as secpolicy_vnode_setattr() 3609 1.27 chs * will overwrite it with ova.va_mode. 3610 1.27 chs */ 3611 1.27 chs saved_mode = vap->va_mode; 3612 1.27 chs } 3613 1.1 haad } 3614 1.1 haad err = secpolicy_vnode_setattr(cr, vp, vap, &oldva, flags, 3615 1.1 haad (int (*)(void *, int, cred_t *))zfs_zaccess_unix, zp); 3616 1.1 haad if (err) { 3617 1.1 haad ZFS_EXIT(zfsvfs); 3618 1.1 haad return (err); 3619 1.1 haad } 3620 1.1 haad 3621 1.27 chs if (trim_mask) { 3622 1.1 haad vap->va_mask |= saved_mask; 3623 1.27 chs if (trim_mask & AT_MODE) { 3624 1.27 chs /* 3625 1.27 chs * Recover the mode after 3626 1.27 chs * secpolicy_vnode_setattr(). 3627 1.27 chs */ 3628 1.27 chs vap->va_mode = saved_mode; 3629 1.27 chs } 3630 1.27 chs } 3631 1.1 haad } 3632 1.27 chs 3633 1.1 haad /* 3634 1.1 haad * secpolicy_vnode_setattr, or take ownership may have 3635 1.1 haad * changed va_mask 3636 1.1 haad */ 3637 1.1 haad mask = vap->va_mask; 3638 1.1 haad 3639 1.27 chs if ((mask & (AT_UID | AT_GID))) { 3640 1.27 chs err = sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs), 3641 1.27 chs &xattr_obj, sizeof (xattr_obj)); 3642 1.27 chs 3643 1.27 chs if (err == 0 && xattr_obj) { 3644 1.27 chs err = zfs_zget(zp->z_zfsvfs, xattr_obj, &attrzp); 3645 1.27 chs if (err == 0) { 3646 1.27 chs err = vn_lock(ZTOV(attrzp), LK_EXCLUSIVE); 3647 1.27 chs if (err != 0) 3648 1.27 chs vrele(ZTOV(attrzp)); 3649 1.27 chs } 3650 1.27 chs if (err) 3651 1.27 chs goto out2; 3652 1.27 chs } 3653 1.27 chs if (mask & AT_UID) { 3654 1.27 chs new_uid = zfs_fuid_create(zfsvfs, 3655 1.27 chs (uint64_t)vap->va_uid, cr, ZFS_OWNER, &fuidp); 3656 1.27 chs if (new_uid != zp->z_uid && 3657 1.27 chs zfs_fuid_overquota(zfsvfs, B_FALSE, new_uid)) { 3658 1.27 chs if (attrzp) 3659 1.27 chs vput(ZTOV(attrzp)); 3660 1.27 chs err = SET_ERROR(EDQUOT); 3661 1.27 chs goto out2; 3662 1.27 chs } 3663 1.27 chs } 3664 1.27 chs 3665 1.27 chs if (mask & AT_GID) { 3666 1.27 chs new_gid = zfs_fuid_create(zfsvfs, (uint64_t)vap->va_gid, 3667 1.27 chs cr, ZFS_GROUP, &fuidp); 3668 1.27 chs if (new_gid != zp->z_gid && 3669 1.27 chs zfs_fuid_overquota(zfsvfs, B_TRUE, new_gid)) { 3670 1.27 chs if (attrzp) 3671 1.27 chs vput(ZTOV(attrzp)); 3672 1.27 chs err = SET_ERROR(EDQUOT); 3673 1.27 chs goto out2; 3674 1.27 chs } 3675 1.27 chs } 3676 1.27 chs } 3677 1.1 haad tx = dmu_tx_create(zfsvfs->z_os); 3678 1.1 haad 3679 1.1 haad if (mask & AT_MODE) { 3680 1.27 chs uint64_t pmode = zp->z_mode; 3681 1.27 chs uint64_t acl_obj; 3682 1.27 chs new_mode = (pmode & S_IFMT) | (vap->va_mode & ~S_IFMT); 3683 1.1 haad 3684 1.27 chs if (zp->z_zfsvfs->z_acl_mode == ZFS_ACL_RESTRICTED && 3685 1.27 chs !(zp->z_pflags & ZFS_ACL_TRIVIAL)) { 3686 1.27 chs err = SET_ERROR(EPERM); 3687 1.27 chs goto out; 3688 1.27 chs } 3689 1.1 haad 3690 1.4 haad if (err = zfs_acl_chmod_setattr(zp, &aclp, new_mode)) 3691 1.4 haad goto out; 3692 1.27 chs 3693 1.27 chs if (!zp->z_is_sa && ((acl_obj = zfs_external_acl(zp)) != 0)) { 3694 1.27 chs /* 3695 1.27 chs * Are we upgrading ACL from old V0 format 3696 1.27 chs * to V1 format? 3697 1.27 chs */ 3698 1.27 chs if (zfsvfs->z_version >= ZPL_VERSION_FUID && 3699 1.27 chs zfs_znode_acl_version(zp) == 3700 1.1 haad ZFS_ACL_VERSION_INITIAL) { 3701 1.27 chs dmu_tx_hold_free(tx, acl_obj, 0, 3702 1.1 haad DMU_OBJECT_END); 3703 1.1 haad dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 3704 1.1 haad 0, aclp->z_acl_bytes); 3705 1.1 haad } else { 3706 1.27 chs dmu_tx_hold_write(tx, acl_obj, 0, 3707 1.1 haad aclp->z_acl_bytes); 3708 1.1 haad } 3709 1.27 chs } else if (!zp->z_is_sa && aclp->z_acl_bytes > ZFS_ACE_SPACE) { 3710 1.1 haad dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 3711 1.1 haad 0, aclp->z_acl_bytes); 3712 1.1 haad } 3713 1.27 chs dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE); 3714 1.27 chs } else { 3715 1.27 chs if ((mask & AT_XVATTR) && 3716 1.27 chs XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP)) 3717 1.27 chs dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE); 3718 1.27 chs else 3719 1.27 chs dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 3720 1.27 chs } 3721 1.27 chs 3722 1.27 chs if (attrzp) { 3723 1.27 chs dmu_tx_hold_sa(tx, attrzp->z_sa_hdl, B_FALSE); 3724 1.1 haad } 3725 1.1 haad 3726 1.27 chs fuid_dirtied = zfsvfs->z_fuid_dirty; 3727 1.27 chs if (fuid_dirtied) 3728 1.27 chs zfs_fuid_txhold(zfsvfs, tx); 3729 1.4 haad 3730 1.27 chs zfs_sa_upgrade_txholds(tx, zp); 3731 1.1 haad 3732 1.27 chs err = dmu_tx_assign(tx, TXG_WAIT); 3733 1.27 chs if (err) 3734 1.4 haad goto out; 3735 1.1 haad 3736 1.27 chs count = 0; 3737 1.1 haad /* 3738 1.1 haad * Set each attribute requested. 3739 1.1 haad * We group settings according to the locks they need to acquire. 3740 1.1 haad * 3741 1.1 haad * Note: you cannot set ctime directly, although it will be 3742 1.1 haad * updated as a side-effect of calling this function. 3743 1.1 haad */ 3744 1.1 haad 3745 1.27 chs if (mask & (AT_UID|AT_GID|AT_MODE)) 3746 1.27 chs mutex_enter(&zp->z_acl_lock); 3747 1.27 chs 3748 1.27 chs SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL, 3749 1.27 chs &zp->z_pflags, sizeof (zp->z_pflags)); 3750 1.27 chs 3751 1.27 chs if (attrzp) { 3752 1.27 chs if (mask & (AT_UID|AT_GID|AT_MODE)) 3753 1.27 chs mutex_enter(&attrzp->z_acl_lock); 3754 1.27 chs SA_ADD_BULK_ATTR(xattr_bulk, xattr_count, 3755 1.27 chs SA_ZPL_FLAGS(zfsvfs), NULL, &attrzp->z_pflags, 3756 1.27 chs sizeof (attrzp->z_pflags)); 3757 1.27 chs } 3758 1.27 chs 3759 1.27 chs if (mask & (AT_UID|AT_GID)) { 3760 1.27 chs 3761 1.27 chs if (mask & AT_UID) { 3762 1.27 chs SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_UID(zfsvfs), NULL, 3763 1.27 chs &new_uid, sizeof (new_uid)); 3764 1.27 chs zp->z_uid = new_uid; 3765 1.27 chs if (attrzp) { 3766 1.27 chs SA_ADD_BULK_ATTR(xattr_bulk, xattr_count, 3767 1.27 chs SA_ZPL_UID(zfsvfs), NULL, &new_uid, 3768 1.27 chs sizeof (new_uid)); 3769 1.27 chs attrzp->z_uid = new_uid; 3770 1.27 chs } 3771 1.27 chs } 3772 1.27 chs 3773 1.27 chs if (mask & AT_GID) { 3774 1.27 chs SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GID(zfsvfs), 3775 1.27 chs NULL, &new_gid, sizeof (new_gid)); 3776 1.27 chs zp->z_gid = new_gid; 3777 1.27 chs if (attrzp) { 3778 1.27 chs SA_ADD_BULK_ATTR(xattr_bulk, xattr_count, 3779 1.27 chs SA_ZPL_GID(zfsvfs), NULL, &new_gid, 3780 1.27 chs sizeof (new_gid)); 3781 1.27 chs attrzp->z_gid = new_gid; 3782 1.27 chs } 3783 1.27 chs } 3784 1.27 chs if (!(mask & AT_MODE)) { 3785 1.27 chs SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs), 3786 1.27 chs NULL, &new_mode, sizeof (new_mode)); 3787 1.27 chs new_mode = zp->z_mode; 3788 1.27 chs } 3789 1.27 chs err = zfs_acl_chown_setattr(zp); 3790 1.27 chs ASSERT(err == 0); 3791 1.27 chs if (attrzp) { 3792 1.27 chs err = zfs_acl_chown_setattr(attrzp); 3793 1.27 chs ASSERT(err == 0); 3794 1.27 chs } 3795 1.27 chs } 3796 1.1 haad 3797 1.1 haad if (mask & AT_MODE) { 3798 1.27 chs SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs), NULL, 3799 1.27 chs &new_mode, sizeof (new_mode)); 3800 1.27 chs zp->z_mode = new_mode; 3801 1.27 chs ASSERT3U((uintptr_t)aclp, !=, 0); 3802 1.4 haad err = zfs_aclset_common(zp, aclp, cr, tx); 3803 1.27 chs ASSERT0(err); 3804 1.27 chs if (zp->z_acl_cached) 3805 1.27 chs zfs_acl_free(zp->z_acl_cached); 3806 1.4 haad zp->z_acl_cached = aclp; 3807 1.4 haad aclp = NULL; 3808 1.1 haad } 3809 1.1 haad 3810 1.1 haad 3811 1.27 chs if (mask & AT_ATIME) { 3812 1.27 chs ZFS_TIME_ENCODE(&vap->va_atime, zp->z_atime); 3813 1.27 chs SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ATIME(zfsvfs), NULL, 3814 1.27 chs &zp->z_atime, sizeof (zp->z_atime)); 3815 1.1 haad } 3816 1.1 haad 3817 1.27 chs if (mask & AT_MTIME) { 3818 1.27 chs ZFS_TIME_ENCODE(&vap->va_mtime, mtime); 3819 1.27 chs SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, 3820 1.27 chs mtime, sizeof (mtime)); 3821 1.1 haad } 3822 1.1 haad 3823 1.1 haad /* XXX - shouldn't this be done *before* the ATIME/MTIME checks? */ 3824 1.27 chs if (mask & AT_SIZE && !(mask & AT_MTIME)) { 3825 1.27 chs SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), 3826 1.27 chs NULL, mtime, sizeof (mtime)); 3827 1.27 chs SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, 3828 1.27 chs &ctime, sizeof (ctime)); 3829 1.27 chs zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime, 3830 1.27 chs B_TRUE); 3831 1.27 chs } else if (mask != 0) { 3832 1.27 chs SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, 3833 1.27 chs &ctime, sizeof (ctime)); 3834 1.27 chs zfs_tstamp_update_setup(zp, STATE_CHANGED, mtime, ctime, 3835 1.27 chs B_TRUE); 3836 1.27 chs if (attrzp) { 3837 1.27 chs SA_ADD_BULK_ATTR(xattr_bulk, xattr_count, 3838 1.27 chs SA_ZPL_CTIME(zfsvfs), NULL, 3839 1.27 chs &ctime, sizeof (ctime)); 3840 1.27 chs zfs_tstamp_update_setup(attrzp, STATE_CHANGED, 3841 1.27 chs mtime, ctime, B_TRUE); 3842 1.27 chs } 3843 1.27 chs } 3844 1.1 haad /* 3845 1.1 haad * Do this after setting timestamps to prevent timestamp 3846 1.1 haad * update from toggling bit 3847 1.1 haad */ 3848 1.1 haad 3849 1.1 haad if (xoap && (mask & AT_XVATTR)) { 3850 1.4 haad 3851 1.27 chs if (XVA_ISSET_REQ(xvap, XAT_CREATETIME)) 3852 1.27 chs xoap->xoa_createtime = vap->va_birthtime; 3853 1.4 haad /* 3854 1.4 haad * restore trimmed off masks 3855 1.4 haad * so that return masks can be set for caller. 3856 1.4 haad */ 3857 1.4 haad 3858 1.4 haad if (XVA_ISSET_REQ(&tmpxvattr, XAT_APPENDONLY)) { 3859 1.4 haad XVA_SET_REQ(xvap, XAT_APPENDONLY); 3860 1.4 haad } 3861 1.4 haad if (XVA_ISSET_REQ(&tmpxvattr, XAT_NOUNLINK)) { 3862 1.4 haad XVA_SET_REQ(xvap, XAT_NOUNLINK); 3863 1.4 haad } 3864 1.4 haad if (XVA_ISSET_REQ(&tmpxvattr, XAT_IMMUTABLE)) { 3865 1.4 haad XVA_SET_REQ(xvap, XAT_IMMUTABLE); 3866 1.4 haad } 3867 1.4 haad if (XVA_ISSET_REQ(&tmpxvattr, XAT_NODUMP)) { 3868 1.4 haad XVA_SET_REQ(xvap, XAT_NODUMP); 3869 1.4 haad } 3870 1.4 haad if (XVA_ISSET_REQ(&tmpxvattr, XAT_AV_MODIFIED)) { 3871 1.4 haad XVA_SET_REQ(xvap, XAT_AV_MODIFIED); 3872 1.4 haad } 3873 1.4 haad if (XVA_ISSET_REQ(&tmpxvattr, XAT_AV_QUARANTINED)) { 3874 1.4 haad XVA_SET_REQ(xvap, XAT_AV_QUARANTINED); 3875 1.4 haad } 3876 1.4 haad 3877 1.27 chs if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP)) 3878 1.1 haad ASSERT(vp->v_type == VREG); 3879 1.1 haad 3880 1.27 chs zfs_xvattr_set(zp, xvap, tx); 3881 1.1 haad } 3882 1.1 haad 3883 1.4 haad if (fuid_dirtied) 3884 1.4 haad zfs_fuid_sync(zfsvfs, tx); 3885 1.4 haad 3886 1.1 haad if (mask != 0) 3887 1.1 haad zfs_log_setattr(zilog, tx, TX_SETATTR, zp, vap, mask, fuidp); 3888 1.1 haad 3889 1.27 chs if (mask & (AT_UID|AT_GID|AT_MODE)) 3890 1.27 chs mutex_exit(&zp->z_acl_lock); 3891 1.1 haad 3892 1.27 chs if (attrzp) { 3893 1.27 chs if (mask & (AT_UID|AT_GID|AT_MODE)) 3894 1.27 chs mutex_exit(&attrzp->z_acl_lock); 3895 1.27 chs } 3896 1.4 haad out: 3897 1.27 chs if (err == 0 && attrzp) { 3898 1.27 chs err2 = sa_bulk_update(attrzp->z_sa_hdl, xattr_bulk, 3899 1.27 chs xattr_count, tx); 3900 1.27 chs ASSERT(err2 == 0); 3901 1.27 chs } 3902 1.27 chs 3903 1.1 haad if (attrzp) 3904 1.27 chs vput(ZTOV(attrzp)); 3905 1.1 haad 3906 1.4 haad if (aclp) 3907 1.4 haad zfs_acl_free(aclp); 3908 1.4 haad 3909 1.4 haad if (fuidp) { 3910 1.4 haad zfs_fuid_info_free(fuidp); 3911 1.4 haad fuidp = NULL; 3912 1.4 haad } 3913 1.4 haad 3914 1.27 chs if (err) { 3915 1.4 haad dmu_tx_abort(tx); 3916 1.27 chs } else { 3917 1.27 chs err2 = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx); 3918 1.4 haad dmu_tx_commit(tx); 3919 1.27 chs } 3920 1.4 haad 3921 1.27 chs out2: 3922 1.27 chs if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 3923 1.27 chs zil_commit(zilog, 0); 3924 1.1 haad 3925 1.1 haad ZFS_EXIT(zfsvfs); 3926 1.1 haad return (err); 3927 1.1 haad } 3928 1.1 haad 3929 1.1 haad /* 3930 1.27 chs * We acquire all but fdvp locks using non-blocking acquisitions. If we 3931 1.27 chs * fail to acquire any lock in the path we will drop all held locks, 3932 1.27 chs * acquire the new lock in a blocking fashion, and then release it and 3933 1.27 chs * restart the rename. This acquire/release step ensures that we do not 3934 1.27 chs * spin on a lock waiting for release. On error release all vnode locks 3935 1.27 chs * and decrement references the way tmpfs_rename() would do. 3936 1.1 haad */ 3937 1.27 chs static int 3938 1.27 chs zfs_rename_relock(struct vnode *sdvp, struct vnode **svpp, 3939 1.27 chs struct vnode *tdvp, struct vnode **tvpp, 3940 1.27 chs const struct componentname *scnp, const struct componentname *tcnp) 3941 1.1 haad { 3942 1.27 chs zfsvfs_t *zfsvfs; 3943 1.27 chs struct vnode *nvp, *svp, *tvp; 3944 1.27 chs znode_t *sdzp, *tdzp, *szp, *tzp; 3945 1.37 hannken #ifdef __FreeBSD__ 3946 1.27 chs const char *snm = scnp->cn_nameptr; 3947 1.27 chs const char *tnm = tcnp->cn_nameptr; 3948 1.37 hannken #endif 3949 1.37 hannken #ifdef __NetBSD__ 3950 1.37 hannken char *snm, *tnm; 3951 1.37 hannken #endif 3952 1.27 chs int error; 3953 1.27 chs 3954 1.27 chs #ifdef __FreeBSD__ 3955 1.27 chs VOP_UNLOCK(tdvp, 0); 3956 1.27 chs if (*tvpp != NULL && *tvpp != tdvp) 3957 1.27 chs VOP_UNLOCK(*tvpp, 0); 3958 1.27 chs #endif 3959 1.27 chs 3960 1.27 chs relock: 3961 1.27 chs error = vn_lock(sdvp, LK_EXCLUSIVE); 3962 1.27 chs if (error) 3963 1.27 chs goto out; 3964 1.27 chs sdzp = VTOZ(sdvp); 3965 1.27 chs 3966 1.27 chs #ifdef __NetBSD__ 3967 1.27 chs if (tdvp == sdvp) { 3968 1.27 chs } else { 3969 1.27 chs #endif 3970 1.27 chs error = vn_lock(tdvp, LK_EXCLUSIVE | LK_NOWAIT); 3971 1.27 chs if (error != 0) { 3972 1.27 chs VOP_UNLOCK(sdvp, 0); 3973 1.27 chs if (error != EBUSY) 3974 1.27 chs goto out; 3975 1.27 chs error = vn_lock(tdvp, LK_EXCLUSIVE); 3976 1.27 chs if (error) 3977 1.27 chs goto out; 3978 1.27 chs VOP_UNLOCK(tdvp, 0); 3979 1.27 chs goto relock; 3980 1.27 chs } 3981 1.27 chs #ifdef __NetBSD__ 3982 1.27 chs } /* end if (tdvp == sdvp) */ 3983 1.27 chs #endif 3984 1.27 chs 3985 1.27 chs tdzp = VTOZ(tdvp); 3986 1.27 chs 3987 1.27 chs /* 3988 1.27 chs * Before using sdzp and tdzp we must ensure that they are live. 3989 1.27 chs * As a porting legacy from illumos we have two things to worry 3990 1.27 chs * about. One is typical for FreeBSD and it is that the vnode is 3991 1.27 chs * not reclaimed (doomed). The other is that the znode is live. 3992 1.27 chs * The current code can invalidate the znode without acquiring the 3993 1.27 chs * corresponding vnode lock if the object represented by the znode 3994 1.27 chs * and vnode is no longer valid after a rollback or receive operation. 3995 1.27 chs * z_teardown_lock hidden behind ZFS_ENTER and ZFS_EXIT is the lock 3996 1.27 chs * that protects the znodes from the invalidation. 3997 1.27 chs */ 3998 1.27 chs zfsvfs = sdzp->z_zfsvfs; 3999 1.27 chs ASSERT3P(zfsvfs, ==, tdzp->z_zfsvfs); 4000 1.27 chs ZFS_ENTER(zfsvfs); 4001 1.27 chs 4002 1.27 chs /* 4003 1.27 chs * We can not use ZFS_VERIFY_ZP() here because it could directly return 4004 1.27 chs * bypassing the cleanup code in the case of an error. 4005 1.27 chs */ 4006 1.27 chs if (tdzp->z_sa_hdl == NULL || sdzp->z_sa_hdl == NULL) { 4007 1.27 chs ZFS_EXIT(zfsvfs); 4008 1.27 chs VOP_UNLOCK(sdvp, 0); 4009 1.27 chs #ifdef __NetBSD__ 4010 1.27 chs if (tdvp != sdvp) 4011 1.27 chs #endif 4012 1.27 chs VOP_UNLOCK(tdvp, 0); 4013 1.27 chs error = SET_ERROR(EIO); 4014 1.27 chs goto out; 4015 1.27 chs } 4016 1.27 chs 4017 1.27 chs /* 4018 1.27 chs * Re-resolve svp to be certain it still exists and fetch the 4019 1.27 chs * correct vnode. 4020 1.27 chs */ 4021 1.37 hannken #ifdef __NetBSD__ 4022 1.37 hannken /* ZFS wants a null-terminated name. */ 4023 1.37 hannken snm = PNBUF_GET(); 4024 1.37 hannken strlcpy(snm, scnp->cn_nameptr, scnp->cn_namelen + 1); 4025 1.37 hannken #endif 4026 1.27 chs error = zfs_dirent_lookup(sdzp, snm, &szp, ZEXISTS); 4027 1.37 hannken #ifdef __NetBSD__ 4028 1.37 hannken PNBUF_PUT(snm); 4029 1.37 hannken #endif 4030 1.27 chs if (error != 0) { 4031 1.27 chs /* Source entry invalid or not there. */ 4032 1.27 chs ZFS_EXIT(zfsvfs); 4033 1.27 chs VOP_UNLOCK(sdvp, 0); 4034 1.27 chs #ifdef __NetBSD__ 4035 1.27 chs if (tdvp != sdvp) 4036 1.27 chs #endif 4037 1.27 chs VOP_UNLOCK(tdvp, 0); 4038 1.27 chs if ((scnp->cn_flags & ISDOTDOT) != 0 || 4039 1.27 chs (scnp->cn_namelen == 1 && scnp->cn_nameptr[0] == '.')) 4040 1.27 chs error = SET_ERROR(EINVAL); 4041 1.27 chs goto out; 4042 1.27 chs } 4043 1.27 chs svp = ZTOV(szp); 4044 1.1 haad 4045 1.27 chs /* 4046 1.27 chs * Re-resolve tvp, if it disappeared we just carry on. 4047 1.27 chs */ 4048 1.37 hannken #ifdef __NetBSD__ 4049 1.37 hannken /* ZFS wants a null-terminated name. */ 4050 1.37 hannken tnm = PNBUF_GET(); 4051 1.37 hannken strlcpy(tnm, tcnp->cn_nameptr, tcnp->cn_namelen + 1); 4052 1.37 hannken #endif 4053 1.27 chs error = zfs_dirent_lookup(tdzp, tnm, &tzp, 0); 4054 1.37 hannken #ifdef __NetBSD__ 4055 1.37 hannken PNBUF_PUT(tnm); 4056 1.37 hannken #endif 4057 1.27 chs if (error != 0) { 4058 1.27 chs ZFS_EXIT(zfsvfs); 4059 1.27 chs VOP_UNLOCK(sdvp, 0); 4060 1.27 chs #ifdef __NetBSD__ 4061 1.27 chs if (tdvp != sdvp) 4062 1.27 chs #endif 4063 1.27 chs VOP_UNLOCK(tdvp, 0); 4064 1.27 chs vrele(svp); 4065 1.27 chs if ((tcnp->cn_flags & ISDOTDOT) != 0) 4066 1.27 chs error = SET_ERROR(EINVAL); 4067 1.27 chs goto out; 4068 1.1 haad } 4069 1.27 chs if (tzp != NULL) 4070 1.27 chs tvp = ZTOV(tzp); 4071 1.27 chs else 4072 1.27 chs tvp = NULL; 4073 1.1 haad 4074 1.27 chs /* 4075 1.27 chs * At present the vnode locks must be acquired before z_teardown_lock, 4076 1.27 chs * although it would be more logical to use the opposite order. 4077 1.27 chs */ 4078 1.27 chs ZFS_EXIT(zfsvfs); 4079 1.1 haad 4080 1.1 haad /* 4081 1.27 chs * Now try acquire locks on svp and tvp. 4082 1.1 haad */ 4083 1.27 chs nvp = svp; 4084 1.27 chs error = vn_lock(nvp, LK_EXCLUSIVE | LK_NOWAIT); 4085 1.27 chs if (error != 0) { 4086 1.27 chs VOP_UNLOCK(sdvp, 0); 4087 1.27 chs #ifdef __NetBSD__ 4088 1.27 chs if (tdvp != sdvp) 4089 1.27 chs #endif 4090 1.27 chs VOP_UNLOCK(tdvp, 0); 4091 1.27 chs if (tvp != NULL) 4092 1.27 chs vrele(tvp); 4093 1.27 chs if (error != EBUSY) { 4094 1.27 chs vrele(nvp); 4095 1.27 chs goto out; 4096 1.27 chs } 4097 1.27 chs error = vn_lock(nvp, LK_EXCLUSIVE); 4098 1.27 chs if (error != 0) { 4099 1.27 chs vrele(nvp); 4100 1.27 chs goto out; 4101 1.27 chs } 4102 1.27 chs VOP_UNLOCK(nvp, 0); 4103 1.27 chs /* 4104 1.27 chs * Concurrent rename race. 4105 1.27 chs * XXX ? 4106 1.27 chs */ 4107 1.27 chs if (nvp == tdvp) { 4108 1.27 chs vrele(nvp); 4109 1.27 chs error = SET_ERROR(EINVAL); 4110 1.27 chs goto out; 4111 1.27 chs } 4112 1.27 chs #ifdef __NetBSD__ 4113 1.27 chs if (*svpp != NULL) 4114 1.27 chs #endif 4115 1.27 chs vrele(*svpp); 4116 1.27 chs *svpp = nvp; 4117 1.27 chs goto relock; 4118 1.27 chs } 4119 1.27 chs #ifdef __NetBSD__ 4120 1.27 chs if (*svpp != NULL) 4121 1.27 chs #endif 4122 1.27 chs vrele(*svpp); 4123 1.27 chs *svpp = nvp; 4124 1.27 chs 4125 1.27 chs if (*tvpp != NULL) 4126 1.27 chs vrele(*tvpp); 4127 1.27 chs *tvpp = NULL; 4128 1.27 chs if (tvp != NULL) { 4129 1.27 chs nvp = tvp; 4130 1.27 chs 4131 1.27 chs #ifdef __NetBSD__ 4132 1.27 chs if (tvp == svp || tvp == sdvp) { 4133 1.27 chs } else { 4134 1.27 chs #endif 4135 1.27 chs error = vn_lock(nvp, LK_EXCLUSIVE | LK_NOWAIT); 4136 1.27 chs if (error != 0) { 4137 1.27 chs VOP_UNLOCK(sdvp, 0); 4138 1.27 chs #ifdef __NetBSD__ 4139 1.27 chs if (tdvp != sdvp) 4140 1.27 chs #endif 4141 1.27 chs VOP_UNLOCK(tdvp, 0); 4142 1.27 chs #ifdef __NetBSD__ 4143 1.27 chs if (*svpp != tdvp) 4144 1.27 chs #endif 4145 1.27 chs VOP_UNLOCK(*svpp, 0); 4146 1.27 chs if (error != EBUSY) { 4147 1.27 chs vrele(nvp); 4148 1.27 chs goto out; 4149 1.27 chs } 4150 1.27 chs error = vn_lock(nvp, LK_EXCLUSIVE); 4151 1.27 chs if (error != 0) { 4152 1.27 chs vrele(nvp); 4153 1.27 chs goto out; 4154 1.1 haad } 4155 1.27 chs vput(nvp); 4156 1.27 chs goto relock; 4157 1.1 haad } 4158 1.27 chs #ifdef __NetBSD__ 4159 1.27 chs } /* end if (tvp == svp || tvp == sdvp) */ 4160 1.27 chs #endif 4161 1.27 chs 4162 1.27 chs *tvpp = nvp; 4163 1.27 chs } 4164 1.27 chs 4165 1.27 chs KASSERT(VOP_ISLOCKED(sdvp) == LK_EXCLUSIVE); 4166 1.27 chs KASSERT(VOP_ISLOCKED(*svpp) == LK_EXCLUSIVE); 4167 1.27 chs KASSERT(VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE); 4168 1.27 chs KASSERT(*tvpp == NULL || VOP_ISLOCKED(*tvpp) == LK_EXCLUSIVE); 4169 1.27 chs 4170 1.27 chs return (0); 4171 1.27 chs 4172 1.27 chs out: 4173 1.27 chs return (error); 4174 1.27 chs } 4175 1.1 haad 4176 1.27 chs /* 4177 1.27 chs * Note that we must use VRELE_ASYNC in this function as it walks 4178 1.27 chs * up the directory tree and vrele may need to acquire an exclusive 4179 1.27 chs * lock if a last reference to a vnode is dropped. 4180 1.27 chs */ 4181 1.27 chs static int 4182 1.27 chs zfs_rename_check(znode_t *szp, znode_t *sdzp, znode_t *tdzp) 4183 1.27 chs { 4184 1.27 chs zfsvfs_t *zfsvfs; 4185 1.27 chs znode_t *zp, *zp1; 4186 1.27 chs uint64_t parent; 4187 1.27 chs int error; 4188 1.1 haad 4189 1.27 chs zfsvfs = tdzp->z_zfsvfs; 4190 1.27 chs if (tdzp == szp) 4191 1.27 chs return (SET_ERROR(EINVAL)); 4192 1.27 chs if (tdzp == sdzp) 4193 1.27 chs return (0); 4194 1.27 chs if (tdzp->z_id == zfsvfs->z_root) 4195 1.27 chs return (0); 4196 1.27 chs zp = tdzp; 4197 1.27 chs for (;;) { 4198 1.27 chs ASSERT(!zp->z_unlinked); 4199 1.27 chs if ((error = sa_lookup(zp->z_sa_hdl, 4200 1.27 chs SA_ZPL_PARENT(zfsvfs), &parent, sizeof (parent))) != 0) 4201 1.27 chs break; 4202 1.1 haad 4203 1.27 chs if (parent == szp->z_id) { 4204 1.27 chs error = SET_ERROR(EINVAL); 4205 1.27 chs break; 4206 1.27 chs } 4207 1.27 chs if (parent == zfsvfs->z_root) 4208 1.27 chs break; 4209 1.27 chs if (parent == sdzp->z_id) 4210 1.27 chs break; 4211 1.1 haad 4212 1.27 chs error = zfs_zget(zfsvfs, parent, &zp1); 4213 1.27 chs if (error != 0) 4214 1.27 chs break; 4215 1.1 haad 4216 1.27 chs if (zp != tdzp) 4217 1.27 chs VN_RELE_ASYNC(ZTOV(zp), 4218 1.27 chs dsl_pool_vnrele_taskq(dmu_objset_pool(zfsvfs->z_os))); 4219 1.27 chs zp = zp1; 4220 1.27 chs } 4221 1.1 haad 4222 1.27 chs if (error == ENOTDIR) 4223 1.27 chs panic("checkpath: .. not a directory\n"); 4224 1.27 chs if (zp != tdzp) 4225 1.27 chs VN_RELE_ASYNC(ZTOV(zp), 4226 1.27 chs dsl_pool_vnrele_taskq(dmu_objset_pool(zfsvfs->z_os))); 4227 1.27 chs return (error); 4228 1.1 haad } 4229 1.1 haad 4230 1.1 haad /* 4231 1.1 haad * Move an entry from the provided source directory to the target 4232 1.1 haad * directory. Change the entry name as indicated. 4233 1.1 haad * 4234 1.1 haad * IN: sdvp - Source directory containing the "old entry". 4235 1.1 haad * snm - Old entry name. 4236 1.1 haad * tdvp - Target directory to contain the "new entry". 4237 1.1 haad * tnm - New entry name. 4238 1.1 haad * cr - credentials of caller. 4239 1.1 haad * ct - caller context 4240 1.1 haad * flags - case flags 4241 1.1 haad * 4242 1.27 chs * RETURN: 0 on success, error code on failure. 4243 1.1 haad * 4244 1.1 haad * Timestamps: 4245 1.1 haad * sdvp,tdvp - ctime|mtime updated 4246 1.1 haad */ 4247 1.1 haad /*ARGSUSED*/ 4248 1.1 haad static int 4249 1.27 chs zfs_rename(vnode_t *sdvp, vnode_t **svpp, struct componentname *scnp, 4250 1.27 chs vnode_t *tdvp, vnode_t **tvpp, struct componentname *tcnp, 4251 1.27 chs cred_t *cr) 4252 1.1 haad { 4253 1.27 chs zfsvfs_t *zfsvfs; 4254 1.27 chs znode_t *sdzp, *tdzp, *szp, *tzp; 4255 1.27 chs zilog_t *zilog = NULL; 4256 1.1 haad dmu_tx_t *tx; 4257 1.37 hannken #ifdef __FreeBSD__ 4258 1.27 chs char *snm = __UNCONST(scnp->cn_nameptr); 4259 1.27 chs char *tnm = __UNCONST(tcnp->cn_nameptr); 4260 1.37 hannken #endif 4261 1.37 hannken #ifdef __NetBSD__ 4262 1.37 hannken char *snm, *tnm; 4263 1.37 hannken #endif 4264 1.1 haad int error = 0; 4265 1.1 haad 4266 1.27 chs /* Reject renames across filesystems. */ 4267 1.27 chs if (((*svpp) != NULL && (*svpp)->v_mount != tdvp->v_mount) || 4268 1.27 chs ((*tvpp) != NULL && (*svpp)->v_mount != (*tvpp)->v_mount)) { 4269 1.27 chs error = SET_ERROR(EXDEV); 4270 1.27 chs goto out; 4271 1.27 chs } 4272 1.1 haad 4273 1.27 chs if (zfsctl_is_node(tdvp)) { 4274 1.27 chs error = SET_ERROR(EXDEV); 4275 1.27 chs goto out; 4276 1.1 haad } 4277 1.1 haad 4278 1.27 chs /* 4279 1.27 chs * Lock all four vnodes to ensure safety and semantics of renaming. 4280 1.27 chs */ 4281 1.27 chs error = zfs_rename_relock(sdvp, svpp, tdvp, tvpp, scnp, tcnp); 4282 1.27 chs if (error != 0) { 4283 1.27 chs /* no vnodes are locked in the case of error here */ 4284 1.27 chs return (error); 4285 1.1 haad } 4286 1.1 haad 4287 1.27 chs tdzp = VTOZ(tdvp); 4288 1.27 chs sdzp = VTOZ(sdvp); 4289 1.27 chs zfsvfs = tdzp->z_zfsvfs; 4290 1.27 chs zilog = zfsvfs->z_log; 4291 1.37 hannken #ifdef __NetBSD__ 4292 1.37 hannken /* ZFS wants a null-terminated name. */ 4293 1.37 hannken snm = PNBUF_GET(); 4294 1.37 hannken strlcpy(snm, scnp->cn_nameptr, scnp->cn_namelen + 1); 4295 1.37 hannken tnm = PNBUF_GET(); 4296 1.37 hannken strlcpy(tnm, tcnp->cn_nameptr, tcnp->cn_namelen + 1); 4297 1.37 hannken #endif 4298 1.1 haad 4299 1.1 haad /* 4300 1.27 chs * After we re-enter ZFS_ENTER() we will have to revalidate all 4301 1.27 chs * znodes involved. 4302 1.1 haad */ 4303 1.27 chs ZFS_ENTER(zfsvfs); 4304 1.27 chs 4305 1.27 chs if (zfsvfs->z_utf8 && u8_validate(tnm, 4306 1.27 chs strlen(tnm), NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 4307 1.27 chs error = SET_ERROR(EILSEQ); 4308 1.27 chs goto unlockout; 4309 1.1 haad } 4310 1.1 haad 4311 1.30 hannken #ifndef __NetBSD__ 4312 1.27 chs /* If source and target are the same file, there is nothing to do. */ 4313 1.27 chs if ((*svpp) == (*tvpp)) { 4314 1.27 chs error = 0; 4315 1.27 chs goto unlockout; 4316 1.27 chs } 4317 1.30 hannken #endif 4318 1.1 haad 4319 1.27 chs if (((*svpp)->v_type == VDIR && (*svpp)->v_mountedhere != NULL) || 4320 1.27 chs ((*tvpp) != NULL && (*tvpp)->v_type == VDIR && 4321 1.27 chs (*tvpp)->v_mountedhere != NULL)) { 4322 1.27 chs error = SET_ERROR(EXDEV); 4323 1.27 chs goto unlockout; 4324 1.1 haad } 4325 1.1 haad 4326 1.4 haad /* 4327 1.27 chs * We can not use ZFS_VERIFY_ZP() here because it could directly return 4328 1.27 chs * bypassing the cleanup code in the case of an error. 4329 1.4 haad */ 4330 1.27 chs if (tdzp->z_sa_hdl == NULL || sdzp->z_sa_hdl == NULL) { 4331 1.27 chs error = SET_ERROR(EIO); 4332 1.27 chs goto unlockout; 4333 1.4 haad } 4334 1.4 haad 4335 1.27 chs szp = VTOZ(*svpp); 4336 1.27 chs tzp = *tvpp == NULL ? NULL : VTOZ(*tvpp); 4337 1.27 chs if (szp->z_sa_hdl == NULL || (tzp != NULL && tzp->z_sa_hdl == NULL)) { 4338 1.27 chs error = SET_ERROR(EIO); 4339 1.27 chs goto unlockout; 4340 1.1 haad } 4341 1.1 haad 4342 1.27 chs /* 4343 1.27 chs * This is to prevent the creation of links into attribute space 4344 1.27 chs * by renaming a linked file into/outof an attribute directory. 4345 1.27 chs * See the comment in zfs_link() for why this is considered bad. 4346 1.27 chs */ 4347 1.27 chs if ((tdzp->z_pflags & ZFS_XATTR) != (sdzp->z_pflags & ZFS_XATTR)) { 4348 1.27 chs error = SET_ERROR(EINVAL); 4349 1.27 chs goto unlockout; 4350 1.1 haad } 4351 1.1 haad 4352 1.1 haad /* 4353 1.1 haad * Must have write access at the source to remove the old entry 4354 1.1 haad * and write access at the target to create the new entry. 4355 1.1 haad * Note that if target and source are the same, this can be 4356 1.1 haad * done in a single check. 4357 1.1 haad */ 4358 1.27 chs if (error = zfs_zaccess_rename(sdzp, szp, tdzp, tzp, cr)) 4359 1.27 chs goto unlockout; 4360 1.1 haad 4361 1.27 chs if ((*svpp)->v_type == VDIR) { 4362 1.27 chs /* 4363 1.27 chs * Avoid ".", "..", and aliases of "." for obvious reasons. 4364 1.27 chs */ 4365 1.27 chs if ((scnp->cn_namelen == 1 && scnp->cn_nameptr[0] == '.') || 4366 1.27 chs sdzp == szp || 4367 1.27 chs (scnp->cn_flags | tcnp->cn_flags) & ISDOTDOT) { 4368 1.27 chs error = SET_ERROR(EINVAL); 4369 1.27 chs goto unlockout; 4370 1.27 chs } 4371 1.1 haad 4372 1.1 haad /* 4373 1.1 haad * Check to make sure rename is valid. 4374 1.1 haad * Can't do a move like this: /usr/a/b to /usr/a/b/c/d 4375 1.1 haad */ 4376 1.27 chs if (error = zfs_rename_check(szp, sdzp, tdzp)) 4377 1.27 chs goto unlockout; 4378 1.1 haad } 4379 1.1 haad 4380 1.1 haad /* 4381 1.1 haad * Does target exist? 4382 1.1 haad */ 4383 1.1 haad if (tzp) { 4384 1.1 haad /* 4385 1.1 haad * Source and target must be the same type. 4386 1.1 haad */ 4387 1.27 chs if ((*svpp)->v_type == VDIR) { 4388 1.27 chs if ((*tvpp)->v_type != VDIR) { 4389 1.27 chs error = SET_ERROR(ENOTDIR); 4390 1.27 chs goto unlockout; 4391 1.27 chs } else { 4392 1.27 chs cache_purge(tdvp); 4393 1.27 chs if (sdvp != tdvp) 4394 1.27 chs cache_purge(sdvp); 4395 1.1 haad } 4396 1.1 haad } else { 4397 1.27 chs if ((*tvpp)->v_type == VDIR) { 4398 1.27 chs error = SET_ERROR(EISDIR); 4399 1.27 chs goto unlockout; 4400 1.1 haad } 4401 1.1 haad } 4402 1.27 chs 4403 1.1 haad /* 4404 1.1 haad * POSIX dictates that when the source and target 4405 1.1 haad * entries refer to the same file object, rename 4406 1.1 haad * must do nothing and exit without error. 4407 1.1 haad */ 4408 1.12 riastrad #ifndef __NetBSD__ 4409 1.12 riastrad /* 4410 1.12 riastrad * But on NetBSD we have a different system call to do 4411 1.12 riastrad * this, posix_rename, which sorta kinda handles this 4412 1.12 riastrad * case (modulo races), and our tests expect BSD 4413 1.12 riastrad * semantics for rename, so we'll do that until we can 4414 1.12 riastrad * push the choice between BSD and POSIX semantics into 4415 1.12 riastrad * the VOP_RENAME protocol as a flag. 4416 1.12 riastrad */ 4417 1.1 haad if (szp->z_id == tzp->z_id) { 4418 1.1 haad error = 0; 4419 1.27 chs goto unlockout; 4420 1.1 haad } 4421 1.12 riastrad #endif 4422 1.1 haad } 4423 1.1 haad 4424 1.27 chs vnevent_rename_src(*svpp, sdvp, scnp->cn_nameptr, ct); 4425 1.1 haad if (tzp) 4426 1.27 chs vnevent_rename_dest(*tvpp, tdvp, tnm, ct); 4427 1.1 haad 4428 1.1 haad /* 4429 1.1 haad * notify the target directory if it is not the same 4430 1.1 haad * as source directory. 4431 1.1 haad */ 4432 1.1 haad if (tdvp != sdvp) { 4433 1.1 haad vnevent_rename_dest_dir(tdvp, ct); 4434 1.1 haad } 4435 1.1 haad 4436 1.1 haad tx = dmu_tx_create(zfsvfs->z_os); 4437 1.27 chs dmu_tx_hold_sa(tx, szp->z_sa_hdl, B_FALSE); 4438 1.27 chs dmu_tx_hold_sa(tx, sdzp->z_sa_hdl, B_FALSE); 4439 1.1 haad dmu_tx_hold_zap(tx, sdzp->z_id, FALSE, snm); 4440 1.1 haad dmu_tx_hold_zap(tx, tdzp->z_id, TRUE, tnm); 4441 1.27 chs if (sdzp != tdzp) { 4442 1.27 chs dmu_tx_hold_sa(tx, tdzp->z_sa_hdl, B_FALSE); 4443 1.27 chs zfs_sa_upgrade_txholds(tx, tdzp); 4444 1.27 chs } 4445 1.27 chs if (tzp) { 4446 1.27 chs dmu_tx_hold_sa(tx, tzp->z_sa_hdl, B_FALSE); 4447 1.27 chs zfs_sa_upgrade_txholds(tx, tzp); 4448 1.27 chs } 4449 1.27 chs 4450 1.27 chs zfs_sa_upgrade_txholds(tx, szp); 4451 1.1 haad dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL); 4452 1.27 chs error = dmu_tx_assign(tx, TXG_WAIT); 4453 1.1 haad if (error) { 4454 1.1 haad dmu_tx_abort(tx); 4455 1.27 chs goto unlockout; 4456 1.1 haad } 4457 1.1 haad 4458 1.27 chs 4459 1.12 riastrad if (tzp && (tzp->z_id != szp->z_id)) 4460 1.12 riastrad /* Attempt to remove the existing target */ 4461 1.27 chs error = zfs_link_destroy(tdzp, tnm, tzp, tx, 0, NULL); 4462 1.1 haad 4463 1.1 haad if (error == 0) { 4464 1.12 riastrad if (!tzp || (tzp->z_id != szp->z_id)) 4465 1.27 chs error = zfs_link_create(tdzp, tnm, szp, tx, ZRENAMING); 4466 1.1 haad if (error == 0) { 4467 1.27 chs szp->z_pflags |= ZFS_AV_MODIFIED; 4468 1.27 chs 4469 1.27 chs error = sa_update(szp->z_sa_hdl, SA_ZPL_FLAGS(zfsvfs), 4470 1.27 chs (void *)&szp->z_pflags, sizeof (uint64_t), tx); 4471 1.27 chs ASSERT0(error); 4472 1.1 haad 4473 1.27 chs error = zfs_link_destroy(sdzp, snm, szp, tx, 4474 1.12 riastrad /* Kludge for BSD rename semantics. */ 4475 1.27 chs tzp && tzp->z_id == szp->z_id ? 0: ZRENAMING, NULL); 4476 1.27 chs if (error == 0) { 4477 1.27 chs zfs_log_rename(zilog, tx, TX_RENAME, sdzp, 4478 1.27 chs snm, tdzp, tnm, szp); 4479 1.1 haad 4480 1.27 chs /* 4481 1.27 chs * Update path information for the target vnode 4482 1.27 chs */ 4483 1.27 chs vn_renamepath(tdvp, *svpp, tnm, strlen(tnm)); 4484 1.27 chs } else { 4485 1.27 chs /* 4486 1.27 chs * At this point, we have successfully created 4487 1.27 chs * the target name, but have failed to remove 4488 1.27 chs * the source name. Since the create was done 4489 1.27 chs * with the ZRENAMING flag, there are 4490 1.27 chs * complications; for one, the link count is 4491 1.27 chs * wrong. The easiest way to deal with this 4492 1.27 chs * is to remove the newly created target, and 4493 1.27 chs * return the original error. This must 4494 1.27 chs * succeed; fortunately, it is very unlikely to 4495 1.27 chs * fail, since we just created it. 4496 1.27 chs */ 4497 1.27 chs VERIFY3U(zfs_link_destroy(tdzp, tnm, szp, tx, 4498 1.27 chs ZRENAMING, NULL), ==, 0); 4499 1.27 chs } 4500 1.1 haad } 4501 1.2 haad if (error == 0) { 4502 1.27 chs cache_purge(*svpp); 4503 1.27 chs if (*tvpp != NULL) 4504 1.27 chs cache_purge(*tvpp); 4505 1.27 chs cache_purge_negative(tdvp); 4506 1.52 hannken #ifdef __NetBSD__ 4507 1.52 hannken if (*svpp == *tvpp) { 4508 1.52 hannken VN_KNOTE(sdvp, NOTE_WRITE); 4509 1.52 hannken VN_KNOTE(*svpp, (szp->z_links == 0 ? 4510 1.52 hannken NOTE_DELETE : NOTE_LINK)); 4511 1.52 hannken } else { 4512 1.52 hannken genfs_rename_knote(sdvp, *svpp, tdvp, *tvpp, 4513 1.76 thorpej tzp != NULL ? tzp->z_links : 0); 4514 1.52 hannken } 4515 1.52 hannken #endif 4516 1.2 haad } 4517 1.1 haad } 4518 1.1 haad 4519 1.1 haad dmu_tx_commit(tx); 4520 1.1 haad 4521 1.27 chs if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 4522 1.27 chs zil_commit(zilog, 0); 4523 1.4 haad 4524 1.27 chs unlockout: /* all 4 vnodes are locked, ZFS_ENTER called */ 4525 1.27 chs ZFS_EXIT(zfsvfs); 4526 1.1 haad 4527 1.27 chs VOP_UNLOCK(*svpp, 0); 4528 1.27 chs VOP_UNLOCK(sdvp, 0); 4529 1.37 hannken #ifdef __NetBSD__ 4530 1.37 hannken PNBUF_PUT(snm); 4531 1.37 hannken PNBUF_PUT(tnm); 4532 1.37 hannken #endif 4533 1.1 haad 4534 1.27 chs if (*tvpp != sdvp && *tvpp != *svpp) 4535 1.27 chs if (*tvpp != NULL) 4536 1.27 chs VOP_UNLOCK(*tvpp, 0); 4537 1.27 chs if (tdvp != sdvp && tdvp != *svpp) 4538 1.27 chs if (tdvp != *tvpp) 4539 1.27 chs VOP_UNLOCK(tdvp, 0); 4540 1.2 haad 4541 1.27 chs out: 4542 1.1 haad return (error); 4543 1.1 haad } 4544 1.1 haad 4545 1.1 haad /* 4546 1.1 haad * Insert the indicated symbolic reference entry into the directory. 4547 1.1 haad * 4548 1.1 haad * IN: dvp - Directory to contain new symbolic link. 4549 1.1 haad * link - Name for new symlink entry. 4550 1.1 haad * vap - Attributes of new entry. 4551 1.1 haad * cr - credentials of caller. 4552 1.1 haad * ct - caller context 4553 1.1 haad * flags - case flags 4554 1.1 haad * 4555 1.27 chs * RETURN: 0 on success, error code on failure. 4556 1.1 haad * 4557 1.1 haad * Timestamps: 4558 1.1 haad * dvp - ctime|mtime updated 4559 1.1 haad */ 4560 1.1 haad /*ARGSUSED*/ 4561 1.1 haad static int 4562 1.2 haad zfs_symlink(vnode_t *dvp, vnode_t **vpp, char *name, vattr_t *vap, char *link, 4563 1.27 chs cred_t *cr, kthread_t *td) 4564 1.1 haad { 4565 1.1 haad znode_t *zp, *dzp = VTOZ(dvp); 4566 1.1 haad dmu_tx_t *tx; 4567 1.1 haad zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 4568 1.1 haad zilog_t *zilog; 4569 1.27 chs uint64_t len = strlen(link); 4570 1.1 haad int error; 4571 1.4 haad zfs_acl_ids_t acl_ids; 4572 1.4 haad boolean_t fuid_dirtied; 4573 1.27 chs uint64_t txtype = TX_SYMLINK; 4574 1.27 chs int flags = 0; 4575 1.1 haad 4576 1.1 haad ASSERT(vap->va_type == VLNK); 4577 1.1 haad 4578 1.1 haad ZFS_ENTER(zfsvfs); 4579 1.1 haad ZFS_VERIFY_ZP(dzp); 4580 1.1 haad zilog = zfsvfs->z_log; 4581 1.1 haad 4582 1.1 haad if (zfsvfs->z_utf8 && u8_validate(name, strlen(name), 4583 1.1 haad NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 4584 1.1 haad ZFS_EXIT(zfsvfs); 4585 1.27 chs return (SET_ERROR(EILSEQ)); 4586 1.1 haad } 4587 1.27 chs 4588 1.27 chs if (len > MAXPATHLEN) { 4589 1.1 haad ZFS_EXIT(zfsvfs); 4590 1.27 chs return (SET_ERROR(ENAMETOOLONG)); 4591 1.1 haad } 4592 1.1 haad 4593 1.27 chs if ((error = zfs_acl_ids_create(dzp, 0, 4594 1.27 chs vap, cr, NULL, &acl_ids)) != 0) { 4595 1.1 haad ZFS_EXIT(zfsvfs); 4596 1.27 chs return (error); 4597 1.1 haad } 4598 1.1 haad 4599 1.1 haad /* 4600 1.1 haad * Attempt to lock directory; fail if entry already exists. 4601 1.1 haad */ 4602 1.27 chs error = zfs_dirent_lookup(dzp, name, &zp, ZNEW); 4603 1.1 haad if (error) { 4604 1.27 chs zfs_acl_ids_free(&acl_ids); 4605 1.27 chs ZFS_EXIT(zfsvfs); 4606 1.27 chs return (error); 4607 1.27 chs } 4608 1.27 chs 4609 1.27 chs if (error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr)) { 4610 1.27 chs zfs_acl_ids_free(&acl_ids); 4611 1.1 haad ZFS_EXIT(zfsvfs); 4612 1.1 haad return (error); 4613 1.1 haad } 4614 1.1 haad 4615 1.4 haad if (zfs_acl_ids_overquota(zfsvfs, &acl_ids)) { 4616 1.4 haad zfs_acl_ids_free(&acl_ids); 4617 1.4 haad ZFS_EXIT(zfsvfs); 4618 1.27 chs return (SET_ERROR(EDQUOT)); 4619 1.4 haad } 4620 1.27 chs 4621 1.27 chs getnewvnode_reserve(1); 4622 1.1 haad tx = dmu_tx_create(zfsvfs->z_os); 4623 1.4 haad fuid_dirtied = zfsvfs->z_fuid_dirty; 4624 1.1 haad dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, MAX(1, len)); 4625 1.1 haad dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name); 4626 1.27 chs dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes + 4627 1.27 chs ZFS_SA_BASE_ATTR_SIZE + len); 4628 1.27 chs dmu_tx_hold_sa(tx, dzp->z_sa_hdl, B_FALSE); 4629 1.27 chs if (!zfsvfs->z_use_sa && acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) { 4630 1.27 chs dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, 4631 1.27 chs acl_ids.z_aclp->z_acl_bytes); 4632 1.27 chs } 4633 1.4 haad if (fuid_dirtied) 4634 1.4 haad zfs_fuid_txhold(zfsvfs, tx); 4635 1.27 chs error = dmu_tx_assign(tx, TXG_WAIT); 4636 1.1 haad if (error) { 4637 1.4 haad zfs_acl_ids_free(&acl_ids); 4638 1.1 haad dmu_tx_abort(tx); 4639 1.27 chs getnewvnode_drop_reserve(); 4640 1.1 haad ZFS_EXIT(zfsvfs); 4641 1.1 haad return (error); 4642 1.1 haad } 4643 1.1 haad 4644 1.1 haad /* 4645 1.1 haad * Create a new object for the symlink. 4646 1.27 chs * for version 4 ZPL datsets the symlink will be an SA attribute 4647 1.1 haad */ 4648 1.27 chs zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids); 4649 1.1 haad 4650 1.27 chs if (fuid_dirtied) 4651 1.27 chs zfs_fuid_sync(zfsvfs, tx); 4652 1.4 haad 4653 1.27 chs if (zp->z_is_sa) 4654 1.27 chs error = sa_update(zp->z_sa_hdl, SA_ZPL_SYMLINK(zfsvfs), 4655 1.27 chs link, len, tx); 4656 1.27 chs else 4657 1.27 chs zfs_sa_symlink(zp, link, len, tx); 4658 1.1 haad 4659 1.27 chs zp->z_size = len; 4660 1.27 chs (void) sa_update(zp->z_sa_hdl, SA_ZPL_SIZE(zfsvfs), 4661 1.27 chs &zp->z_size, sizeof (zp->z_size), tx); 4662 1.1 haad /* 4663 1.1 haad * Insert the new object into the directory. 4664 1.1 haad */ 4665 1.27 chs (void) zfs_link_create(dzp, name, zp, tx, ZNEW); 4666 1.27 chs 4667 1.27 chs zfs_log_symlink(zilog, tx, txtype, dzp, zp, name, link); 4668 1.27 chs *vpp = ZTOV(zp); 4669 1.4 haad 4670 1.4 haad zfs_acl_ids_free(&acl_ids); 4671 1.1 haad 4672 1.1 haad dmu_tx_commit(tx); 4673 1.1 haad 4674 1.27 chs getnewvnode_drop_reserve(); 4675 1.27 chs 4676 1.27 chs if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 4677 1.27 chs zil_commit(zilog, 0); 4678 1.1 haad 4679 1.1 haad ZFS_EXIT(zfsvfs); 4680 1.1 haad return (error); 4681 1.1 haad } 4682 1.1 haad 4683 1.1 haad /* 4684 1.1 haad * Return, in the buffer contained in the provided uio structure, 4685 1.1 haad * the symbolic path referred to by vp. 4686 1.1 haad * 4687 1.1 haad * IN: vp - vnode of symbolic link. 4688 1.27 chs * uio - structure to contain the link path. 4689 1.1 haad * cr - credentials of caller. 4690 1.1 haad * ct - caller context 4691 1.1 haad * 4692 1.27 chs * OUT: uio - structure containing the link path. 4693 1.1 haad * 4694 1.27 chs * RETURN: 0 on success, error code on failure. 4695 1.1 haad * 4696 1.1 haad * Timestamps: 4697 1.1 haad * vp - atime updated 4698 1.1 haad */ 4699 1.1 haad /* ARGSUSED */ 4700 1.1 haad static int 4701 1.1 haad zfs_readlink(vnode_t *vp, uio_t *uio, cred_t *cr, caller_context_t *ct) 4702 1.1 haad { 4703 1.1 haad znode_t *zp = VTOZ(vp); 4704 1.1 haad zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4705 1.1 haad int error; 4706 1.1 haad 4707 1.1 haad ZFS_ENTER(zfsvfs); 4708 1.1 haad ZFS_VERIFY_ZP(zp); 4709 1.1 haad 4710 1.27 chs if (zp->z_is_sa) 4711 1.27 chs error = sa_lookup_uio(zp->z_sa_hdl, 4712 1.27 chs SA_ZPL_SYMLINK(zfsvfs), uio); 4713 1.27 chs else 4714 1.27 chs error = zfs_sa_readlink(zp, uio); 4715 1.1 haad 4716 1.1 haad ZFS_ACCESSTIME_STAMP(zfsvfs, zp); 4717 1.27 chs 4718 1.1 haad ZFS_EXIT(zfsvfs); 4719 1.1 haad return (error); 4720 1.1 haad } 4721 1.1 haad 4722 1.1 haad /* 4723 1.1 haad * Insert a new entry into directory tdvp referencing svp. 4724 1.1 haad * 4725 1.1 haad * IN: tdvp - Directory to contain new entry. 4726 1.1 haad * svp - vnode of new entry. 4727 1.1 haad * name - name of new entry. 4728 1.1 haad * cr - credentials of caller. 4729 1.1 haad * ct - caller context 4730 1.1 haad * 4731 1.27 chs * RETURN: 0 on success, error code on failure. 4732 1.1 haad * 4733 1.1 haad * Timestamps: 4734 1.1 haad * tdvp - ctime|mtime updated 4735 1.1 haad * svp - ctime updated 4736 1.1 haad */ 4737 1.1 haad /* ARGSUSED */ 4738 1.1 haad static int 4739 1.1 haad zfs_link(vnode_t *tdvp, vnode_t *svp, char *name, cred_t *cr, 4740 1.1 haad caller_context_t *ct, int flags) 4741 1.1 haad { 4742 1.1 haad znode_t *dzp = VTOZ(tdvp); 4743 1.1 haad znode_t *tzp, *szp; 4744 1.1 haad zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 4745 1.1 haad zilog_t *zilog; 4746 1.1 haad dmu_tx_t *tx; 4747 1.1 haad int error; 4748 1.27 chs uint64_t parent; 4749 1.1 haad uid_t owner; 4750 1.1 haad 4751 1.1 haad ASSERT(tdvp->v_type == VDIR); 4752 1.1 haad 4753 1.1 haad ZFS_ENTER(zfsvfs); 4754 1.1 haad ZFS_VERIFY_ZP(dzp); 4755 1.1 haad zilog = zfsvfs->z_log; 4756 1.1 haad 4757 1.27 chs /* 4758 1.27 chs * POSIX dictates that we return EPERM here. 4759 1.27 chs * Better choices include ENOTSUP or EISDIR. 4760 1.27 chs */ 4761 1.27 chs if (svp->v_type == VDIR) { 4762 1.1 haad ZFS_EXIT(zfsvfs); 4763 1.27 chs return (SET_ERROR(EPERM)); 4764 1.1 haad } 4765 1.27 chs 4766 1.1 haad szp = VTOZ(svp); 4767 1.1 haad ZFS_VERIFY_ZP(szp); 4768 1.1 haad 4769 1.27 chs if (szp->z_pflags & (ZFS_APPENDONLY | ZFS_IMMUTABLE | ZFS_READONLY)) { 4770 1.27 chs ZFS_EXIT(zfsvfs); 4771 1.27 chs return (SET_ERROR(EPERM)); 4772 1.27 chs } 4773 1.27 chs 4774 1.27 chs /* Prevent links to .zfs/shares files */ 4775 1.27 chs 4776 1.27 chs if ((error = sa_lookup(szp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs), 4777 1.27 chs &parent, sizeof (uint64_t))) != 0) { 4778 1.27 chs ZFS_EXIT(zfsvfs); 4779 1.27 chs return (error); 4780 1.27 chs } 4781 1.27 chs if (parent == zfsvfs->z_shares_dir) { 4782 1.27 chs ZFS_EXIT(zfsvfs); 4783 1.27 chs return (SET_ERROR(EPERM)); 4784 1.27 chs } 4785 1.27 chs 4786 1.1 haad if (zfsvfs->z_utf8 && u8_validate(name, 4787 1.1 haad strlen(name), NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 4788 1.1 haad ZFS_EXIT(zfsvfs); 4789 1.27 chs return (SET_ERROR(EILSEQ)); 4790 1.1 haad } 4791 1.1 haad 4792 1.1 haad /* 4793 1.1 haad * We do not support links between attributes and non-attributes 4794 1.1 haad * because of the potential security risk of creating links 4795 1.1 haad * into "normal" file space in order to circumvent restrictions 4796 1.1 haad * imposed in attribute space. 4797 1.1 haad */ 4798 1.27 chs if ((szp->z_pflags & ZFS_XATTR) != (dzp->z_pflags & ZFS_XATTR)) { 4799 1.1 haad ZFS_EXIT(zfsvfs); 4800 1.27 chs return (SET_ERROR(EINVAL)); 4801 1.1 haad } 4802 1.1 haad 4803 1.1 haad 4804 1.27 chs owner = zfs_fuid_map_id(zfsvfs, szp->z_uid, cr, ZFS_OWNER); 4805 1.27 chs if (owner != crgetuid(cr) && secpolicy_basic_link(svp, cr) != 0) { 4806 1.1 haad ZFS_EXIT(zfsvfs); 4807 1.27 chs return (SET_ERROR(EPERM)); 4808 1.1 haad } 4809 1.1 haad 4810 1.1 haad if (error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr)) { 4811 1.1 haad ZFS_EXIT(zfsvfs); 4812 1.1 haad return (error); 4813 1.1 haad } 4814 1.1 haad 4815 1.1 haad /* 4816 1.1 haad * Attempt to lock directory; fail if entry already exists. 4817 1.1 haad */ 4818 1.27 chs error = zfs_dirent_lookup(dzp, name, &tzp, ZNEW); 4819 1.1 haad if (error) { 4820 1.1 haad ZFS_EXIT(zfsvfs); 4821 1.1 haad return (error); 4822 1.1 haad } 4823 1.1 haad 4824 1.1 haad tx = dmu_tx_create(zfsvfs->z_os); 4825 1.27 chs dmu_tx_hold_sa(tx, szp->z_sa_hdl, B_FALSE); 4826 1.1 haad dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name); 4827 1.27 chs zfs_sa_upgrade_txholds(tx, szp); 4828 1.27 chs zfs_sa_upgrade_txholds(tx, dzp); 4829 1.27 chs error = dmu_tx_assign(tx, TXG_WAIT); 4830 1.1 haad if (error) { 4831 1.1 haad dmu_tx_abort(tx); 4832 1.1 haad ZFS_EXIT(zfsvfs); 4833 1.1 haad return (error); 4834 1.1 haad } 4835 1.1 haad 4836 1.27 chs error = zfs_link_create(dzp, name, szp, tx, 0); 4837 1.1 haad 4838 1.1 haad if (error == 0) { 4839 1.1 haad uint64_t txtype = TX_LINK; 4840 1.1 haad zfs_log_link(zilog, tx, txtype, dzp, szp, name); 4841 1.1 haad } 4842 1.1 haad 4843 1.1 haad dmu_tx_commit(tx); 4844 1.1 haad 4845 1.1 haad if (error == 0) { 4846 1.1 haad vnevent_link(svp, ct); 4847 1.1 haad } 4848 1.1 haad 4849 1.27 chs if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 4850 1.27 chs zil_commit(zilog, 0); 4851 1.27 chs 4852 1.1 haad ZFS_EXIT(zfsvfs); 4853 1.1 haad return (error); 4854 1.1 haad } 4855 1.1 haad 4856 1.27 chs 4857 1.97 yamt #if !defined(__NetBSD__) 4858 1.2 haad /*ARGSUSED*/ 4859 1.27 chs void 4860 1.27 chs zfs_inactive(vnode_t *vp, cred_t *cr, caller_context_t *ct) 4861 1.27 chs { 4862 1.27 chs znode_t *zp = VTOZ(vp); 4863 1.27 chs zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4864 1.27 chs int error; 4865 1.27 chs 4866 1.27 chs rw_enter(&zfsvfs->z_teardown_inactive_lock, RW_READER); 4867 1.27 chs if (zp->z_sa_hdl == NULL) { 4868 1.27 chs /* 4869 1.27 chs * The fs has been unmounted, or we did a 4870 1.27 chs * suspend/resume and this file no longer exists. 4871 1.27 chs */ 4872 1.27 chs rw_exit(&zfsvfs->z_teardown_inactive_lock); 4873 1.27 chs vrecycle(vp); 4874 1.27 chs return; 4875 1.27 chs } 4876 1.27 chs 4877 1.27 chs if (zp->z_unlinked) { 4878 1.27 chs /* 4879 1.27 chs * Fast path to recycle a vnode of a removed file. 4880 1.27 chs */ 4881 1.27 chs rw_exit(&zfsvfs->z_teardown_inactive_lock); 4882 1.27 chs vrecycle(vp); 4883 1.27 chs return; 4884 1.27 chs } 4885 1.27 chs 4886 1.27 chs if (zp->z_atime_dirty && zp->z_unlinked == 0) { 4887 1.27 chs dmu_tx_t *tx = dmu_tx_create(zfsvfs->z_os); 4888 1.27 chs 4889 1.27 chs dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 4890 1.27 chs zfs_sa_upgrade_txholds(tx, zp); 4891 1.27 chs error = dmu_tx_assign(tx, TXG_WAIT); 4892 1.27 chs if (error) { 4893 1.27 chs dmu_tx_abort(tx); 4894 1.27 chs } else { 4895 1.27 chs (void) sa_update(zp->z_sa_hdl, SA_ZPL_ATIME(zfsvfs), 4896 1.27 chs (void *)&zp->z_atime, sizeof (zp->z_atime), tx); 4897 1.27 chs zp->z_atime_dirty = 0; 4898 1.27 chs dmu_tx_commit(tx); 4899 1.27 chs } 4900 1.27 chs } 4901 1.27 chs rw_exit(&zfsvfs->z_teardown_inactive_lock); 4902 1.27 chs } 4903 1.97 yamt #endif /* !defined(__NetBSD__) */ 4904 1.2 haad 4905 1.27 chs 4906 1.27 chs #ifdef __FreeBSD__ 4907 1.27 chs CTASSERT(sizeof(struct zfid_short) <= sizeof(struct fid)); 4908 1.27 chs CTASSERT(sizeof(struct zfid_long) <= sizeof(struct fid)); 4909 1.27 chs #endif 4910 1.1 haad 4911 1.2 haad /*ARGSUSED*/ 4912 1.1 haad static int 4913 1.2 haad zfs_fid(vnode_t *vp, fid_t *fidp, caller_context_t *ct) 4914 1.1 haad { 4915 1.1 haad znode_t *zp = VTOZ(vp); 4916 1.1 haad zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4917 1.2 haad uint32_t gen; 4918 1.27 chs uint64_t gen64; 4919 1.2 haad uint64_t object = zp->z_id; 4920 1.2 haad zfid_short_t *zfid; 4921 1.27 chs int size, i, error; 4922 1.2 haad 4923 1.2 haad ZFS_ENTER(zfsvfs); 4924 1.2 haad ZFS_VERIFY_ZP(zp); 4925 1.27 chs 4926 1.27 chs if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_GEN(zfsvfs), 4927 1.27 chs &gen64, sizeof (uint64_t))) != 0) { 4928 1.27 chs ZFS_EXIT(zfsvfs); 4929 1.27 chs return (error); 4930 1.27 chs } 4931 1.27 chs 4932 1.27 chs gen = (uint32_t)gen64; 4933 1.2 haad 4934 1.2 haad size = (zfsvfs->z_parent != zfsvfs) ? LONG_FID_LEN : SHORT_FID_LEN; 4935 1.27 chs 4936 1.27 chs #ifdef illumos 4937 1.27 chs if (fidp->fid_len < size) { 4938 1.27 chs fidp->fid_len = size; 4939 1.27 chs ZFS_EXIT(zfsvfs); 4940 1.27 chs return (SET_ERROR(ENOSPC)); 4941 1.27 chs } 4942 1.27 chs #else 4943 1.2 haad fidp->fid_len = size; 4944 1.27 chs #endif 4945 1.2 haad 4946 1.2 haad zfid = (zfid_short_t *)fidp; 4947 1.2 haad 4948 1.2 haad zfid->zf_len = size; 4949 1.1 haad 4950 1.2 haad for (i = 0; i < sizeof (zfid->zf_object); i++) 4951 1.2 haad zfid->zf_object[i] = (uint8_t)(object >> (8 * i)); 4952 1.1 haad 4953 1.2 haad /* Must have a non-zero generation number to distinguish from .zfs */ 4954 1.2 haad if (gen == 0) 4955 1.2 haad gen = 1; 4956 1.2 haad for (i = 0; i < sizeof (zfid->zf_gen); i++) 4957 1.2 haad zfid->zf_gen[i] = (uint8_t)(gen >> (8 * i)); 4958 1.1 haad 4959 1.2 haad if (size == LONG_FID_LEN) { 4960 1.2 haad uint64_t objsetid = dmu_objset_id(zfsvfs->z_os); 4961 1.2 haad zfid_long_t *zlfid; 4962 1.1 haad 4963 1.2 haad zlfid = (zfid_long_t *)fidp; 4964 1.1 haad 4965 1.2 haad for (i = 0; i < sizeof (zlfid->zf_setid); i++) 4966 1.2 haad zlfid->zf_setid[i] = (uint8_t)(objsetid >> (8 * i)); 4967 1.1 haad 4968 1.2 haad /* XXX - this should be the generation number for the objset */ 4969 1.2 haad for (i = 0; i < sizeof (zlfid->zf_setgen); i++) 4970 1.2 haad zlfid->zf_setgen[i] = 0; 4971 1.2 haad } 4972 1.2 haad 4973 1.2 haad ZFS_EXIT(zfsvfs); 4974 1.2 haad return (0); 4975 1.2 haad } 4976 1.1 haad 4977 1.1 haad static int 4978 1.27 chs zfs_pathconf(vnode_t *vp, int cmd, ulong_t *valp, cred_t *cr, 4979 1.1 haad caller_context_t *ct) 4980 1.1 haad { 4981 1.27 chs znode_t *zp, *xzp; 4982 1.27 chs zfsvfs_t *zfsvfs; 4983 1.27 chs int error; 4984 1.4 haad 4985 1.27 chs switch (cmd) { 4986 1.27 chs case _PC_LINK_MAX: 4987 1.27 chs *valp = INT_MAX; 4988 1.27 chs return (0); 4989 1.4 haad 4990 1.27 chs case _PC_FILESIZEBITS: 4991 1.27 chs *valp = 64; 4992 1.2 haad return (0); 4993 1.27 chs #ifdef illumos 4994 1.27 chs case _PC_XATTR_EXISTS: 4995 1.27 chs zp = VTOZ(vp); 4996 1.27 chs zfsvfs = zp->z_zfsvfs; 4997 1.27 chs ZFS_ENTER(zfsvfs); 4998 1.27 chs ZFS_VERIFY_ZP(zp); 4999 1.27 chs *valp = 0; 5000 1.27 chs error = zfs_dirent_lookup(zp, "", &xzp, 5001 1.27 chs ZXATTR | ZEXISTS | ZSHARED); 5002 1.27 chs if (error == 0) { 5003 1.27 chs if (!zfs_dirempty(xzp)) 5004 1.27 chs *valp = 1; 5005 1.27 chs vrele(ZTOV(xzp)); 5006 1.27 chs } else if (error == ENOENT) { 5007 1.2 haad /* 5008 1.27 chs * If there aren't extended attributes, it's the 5009 1.27 chs * same as having zero of them. 5010 1.2 haad */ 5011 1.27 chs error = 0; 5012 1.2 haad } 5013 1.27 chs ZFS_EXIT(zfsvfs); 5014 1.27 chs return (error); 5015 1.1 haad 5016 1.27 chs case _PC_SATTR_ENABLED: 5017 1.27 chs case _PC_SATTR_EXISTS: 5018 1.27 chs *valp = vfs_has_feature(vp->v_vfsp, VFSFT_SYSATTR_VIEWS) && 5019 1.27 chs (vp->v_type == VREG || vp->v_type == VDIR); 5020 1.27 chs return (0); 5021 1.1 haad 5022 1.27 chs case _PC_ACCESS_FILTERING: 5023 1.27 chs *valp = vfs_has_feature(vp->v_vfsp, VFSFT_ACCESS_FILTER) && 5024 1.27 chs vp->v_type == VDIR; 5025 1.27 chs return (0); 5026 1.1 haad 5027 1.27 chs case _PC_ACL_ENABLED: 5028 1.27 chs *valp = _ACL_ACE_ENABLED; 5029 1.27 chs return (0); 5030 1.27 chs #endif /* illumos */ 5031 1.27 chs case _PC_MIN_HOLE_SIZE: 5032 1.27 chs *valp = (int)SPA_MINBLOCKSIZE; 5033 1.27 chs return (0); 5034 1.27 chs #ifdef illumos 5035 1.27 chs case _PC_TIMESTAMP_RESOLUTION: 5036 1.27 chs /* nanosecond timestamp resolution */ 5037 1.27 chs *valp = 1L; 5038 1.27 chs return (0); 5039 1.27 chs #endif 5040 1.27 chs case _PC_ACL_EXTENDED: 5041 1.27 chs *valp = 0; 5042 1.4 haad return (0); 5043 1.4 haad 5044 1.27 chs #ifndef __NetBSD__ 5045 1.27 chs case _PC_ACL_NFS4: 5046 1.27 chs *valp = 1; 5047 1.4 haad return (0); 5048 1.4 haad 5049 1.27 chs case _PC_ACL_PATH_MAX: 5050 1.27 chs *valp = ACL_MAX_ENTRIES; 5051 1.4 haad return (0); 5052 1.27 chs #endif 5053 1.4 haad 5054 1.27 chs default: 5055 1.80 kardel return (EOPNOTSUPP); 5056 1.4 haad } 5057 1.4 haad } 5058 1.4 haad 5059 1.4 haad /*ARGSUSED*/ 5060 1.4 haad static int 5061 1.27 chs zfs_getsecattr(vnode_t *vp, vsecattr_t *vsecp, int flag, cred_t *cr, 5062 1.4 haad caller_context_t *ct) 5063 1.4 haad { 5064 1.4 haad znode_t *zp = VTOZ(vp); 5065 1.4 haad zfsvfs_t *zfsvfs = zp->z_zfsvfs; 5066 1.27 chs int error; 5067 1.27 chs boolean_t skipaclchk = (flag & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE; 5068 1.4 haad 5069 1.4 haad ZFS_ENTER(zfsvfs); 5070 1.4 haad ZFS_VERIFY_ZP(zp); 5071 1.27 chs error = zfs_getacl(zp, vsecp, skipaclchk, cr); 5072 1.27 chs ZFS_EXIT(zfsvfs); 5073 1.4 haad 5074 1.4 haad return (error); 5075 1.4 haad } 5076 1.4 haad 5077 1.27 chs /*ARGSUSED*/ 5078 1.27 chs int 5079 1.27 chs zfs_setsecattr(vnode_t *vp, vsecattr_t *vsecp, int flag, cred_t *cr, 5080 1.4 haad caller_context_t *ct) 5081 1.4 haad { 5082 1.27 chs znode_t *zp = VTOZ(vp); 5083 1.27 chs zfsvfs_t *zfsvfs = zp->z_zfsvfs; 5084 1.27 chs int error; 5085 1.27 chs boolean_t skipaclchk = (flag & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE; 5086 1.27 chs zilog_t *zilog = zfsvfs->z_log; 5087 1.4 haad 5088 1.4 haad ZFS_ENTER(zfsvfs); 5089 1.4 haad ZFS_VERIFY_ZP(zp); 5090 1.4 haad 5091 1.27 chs error = zfs_setacl(zp, vsecp, skipaclchk, cr); 5092 1.4 haad 5093 1.27 chs if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 5094 1.27 chs zil_commit(zilog, 0); 5095 1.4 haad 5096 1.4 haad ZFS_EXIT(zfsvfs); 5097 1.4 haad return (error); 5098 1.4 haad } 5099 1.4 haad 5100 1.4 haad static int 5101 1.27 chs ioflags(int ioflags) 5102 1.4 haad { 5103 1.27 chs int flags = 0; 5104 1.4 haad 5105 1.27 chs if (ioflags & IO_APPEND) 5106 1.27 chs flags |= FAPPEND; 5107 1.27 chs if (ioflags & IO_NDELAY) 5108 1.27 chs flags |= FNONBLOCK; 5109 1.27 chs if (ioflags & IO_SYNC) 5110 1.27 chs flags |= (FSYNC | FDSYNC | FRSYNC); 5111 1.4 haad 5112 1.27 chs return (flags); 5113 1.4 haad } 5114 1.4 haad 5115 1.27 chs #ifdef __NetBSD__ 5116 1.27 chs 5117 1.96 yamt static void zfs_netbsd_update_mctime(vnode_t *vp); 5118 1.96 yamt 5119 1.4 haad static int 5120 1.9 christos zfs_netbsd_open(void *v) 5121 1.4 haad { 5122 1.9 christos struct vop_open_args *ap = v; 5123 1.4 haad 5124 1.12 riastrad return (zfs_open(&ap->a_vp, ap->a_mode, ap->a_cred, NULL)); 5125 1.4 haad } 5126 1.4 haad 5127 1.4 haad static int 5128 1.9 christos zfs_netbsd_close(void *v) 5129 1.4 haad { 5130 1.9 christos struct vop_close_args *ap = v; 5131 1.1 haad 5132 1.2 haad return (zfs_close(ap->a_vp, ap->a_fflag, 0, 0, ap->a_cred, NULL)); 5133 1.2 haad } 5134 1.1 haad 5135 1.2 haad static int 5136 1.9 christos zfs_netbsd_ioctl(void *v) 5137 1.2 haad { 5138 1.9 christos struct vop_ioctl_args *ap = v; 5139 1.1 haad 5140 1.2 haad return (zfs_ioctl(ap->a_vp, ap->a_command, (intptr_t)ap->a_data, 5141 1.2 haad ap->a_fflag, ap->a_cred, NULL, NULL)); 5142 1.1 haad } 5143 1.1 haad 5144 1.2 haad 5145 1.1 haad static int 5146 1.9 christos zfs_netbsd_read(void *v) 5147 1.1 haad { 5148 1.9 christos struct vop_read_args *ap = v; 5149 1.48 hannken vnode_t *vp = ap->a_vp; 5150 1.48 hannken znode_t *zp = VTOZ(vp); 5151 1.2 haad 5152 1.48 hannken switch (vp->v_type) { 5153 1.48 hannken case VBLK: 5154 1.48 hannken case VCHR: 5155 1.48 hannken ZFS_ACCESSTIME_STAMP(zp->z_zfsvfs, zp); 5156 1.48 hannken return (VOCALL(spec_vnodeop_p, VOFFSET(vop_read), ap)); 5157 1.48 hannken case VFIFO: 5158 1.48 hannken ZFS_ACCESSTIME_STAMP(zp->z_zfsvfs, zp); 5159 1.48 hannken return (VOCALL(fifo_vnodeop_p, VOFFSET(vop_read), ap)); 5160 1.85 yamt case VREG: 5161 1.85 yamt break; 5162 1.85 yamt case VDIR: 5163 1.85 yamt /* 5164 1.85 yamt * Note: this is normal on NetBSD because it historically 5165 1.85 yamt * allows read() on a directory. 5166 1.85 yamt * We simply reject it here though because it doesn't make 5167 1.85 yamt * sense to allow read() unless we implement a conversion 5168 1.85 yamt * to the historical version of the UFS dirent structure, 5169 1.85 yamt * which i (yamt) don't think is worth the effort. 5170 1.85 yamt */ 5171 1.85 yamt return EISDIR; 5172 1.85 yamt default: 5173 1.85 yamt return EINVAL; 5174 1.48 hannken } 5175 1.48 hannken 5176 1.48 hannken return (zfs_read(vp, ap->a_uio, ioflags(ap->a_ioflag), ap->a_cred, NULL)); 5177 1.1 haad } 5178 1.1 haad 5179 1.1 haad static int 5180 1.9 christos zfs_netbsd_write(void *v) 5181 1.1 haad { 5182 1.9 christos struct vop_write_args *ap = v; 5183 1.48 hannken vnode_t *vp = ap->a_vp; 5184 1.52 hannken znode_t *zp = VTOZ(vp); 5185 1.52 hannken struct uio *uio = ap->a_uio; 5186 1.52 hannken off_t osize = zp->z_size; 5187 1.52 hannken int error, resid; 5188 1.1 haad 5189 1.48 hannken switch (vp->v_type) { 5190 1.48 hannken case VBLK: 5191 1.48 hannken case VCHR: 5192 1.96 yamt zfs_netbsd_update_mctime(vp); 5193 1.48 hannken return (VOCALL(spec_vnodeop_p, VOFFSET(vop_write), ap)); 5194 1.48 hannken case VFIFO: 5195 1.96 yamt zfs_netbsd_update_mctime(vp); 5196 1.48 hannken return (VOCALL(fifo_vnodeop_p, VOFFSET(vop_write), ap)); 5197 1.85 yamt case VREG: 5198 1.85 yamt break; 5199 1.85 yamt case VDIR: 5200 1.85 yamt /* 5201 1.85 yamt * Note: this shouldn't happen as NetBSD's vn_openchk 5202 1.85 yamt * rejects FWRITE on VDIR. 5203 1.85 yamt */ 5204 1.85 yamt return EIO; 5205 1.85 yamt default: 5206 1.85 yamt return EINVAL; 5207 1.48 hannken } 5208 1.48 hannken 5209 1.52 hannken resid = uio->uio_resid; 5210 1.52 hannken error = zfs_write(vp, uio, ioflags(ap->a_ioflag), ap->a_cred, NULL); 5211 1.52 hannken 5212 1.52 hannken return error; 5213 1.1 haad } 5214 1.1 haad 5215 1.1 haad static int 5216 1.9 christos zfs_netbsd_access(void *v) 5217 1.1 haad { 5218 1.14 riastrad struct vop_access_args /* { 5219 1.14 riastrad struct vnode *a_vp; 5220 1.67 christos accmode_t a_accmode; 5221 1.14 riastrad kauth_cred_t a_cred; 5222 1.14 riastrad } */ *ap = v; 5223 1.81 hannken vnode_t *vp = ap->a_vp; 5224 1.81 hannken znode_t *zp = VTOZ(vp); 5225 1.81 hannken accmode_t accmode; 5226 1.14 riastrad kauth_cred_t cred = ap->a_cred; 5227 1.81 hannken int error = 0; 5228 1.1 haad 5229 1.1 haad /* 5230 1.81 hannken * ZFS itself only knowns about VREAD, VWRITE, VEXEC and VAPPEND, 5231 1.14 riastrad */ 5232 1.81 hannken accmode = ap->a_accmode & (VREAD|VWRITE|VEXEC|VAPPEND); 5233 1.81 hannken if (accmode != 0) 5234 1.81 hannken error = zfs_access(vp, accmode, 0, cred, NULL); 5235 1.1 haad 5236 1.81 hannken /* 5237 1.81 hannken * VADMIN has to be handled by kauth_authorize_vnode(). 5238 1.81 hannken */ 5239 1.81 hannken if (error == 0) { 5240 1.81 hannken accmode = ap->a_accmode & ~(VREAD|VWRITE|VEXEC|VAPPEND); 5241 1.81 hannken if (accmode != 0) { 5242 1.81 hannken error = kauth_authorize_vnode(cred, 5243 1.81 hannken KAUTH_ACCESS_ACTION(accmode, vp->v_type, 5244 1.81 hannken zp->z_mode & ALLPERMS), vp, NULL, 5245 1.81 hannken genfs_can_access(vp, cred, zp->z_uid, 5246 1.81 hannken zp->z_gid, zp->z_mode & ALLPERMS, NULL, accmode)); 5247 1.81 hannken } 5248 1.81 hannken } 5249 1.81 hannken 5250 1.81 hannken /* 5251 1.81 hannken * For VEXEC, ensure that at least one execute bit is set for 5252 1.81 hannken * non-directories. 5253 1.81 hannken */ 5254 1.81 hannken if (error == 0 && (ap->a_accmode & VEXEC) != 0 && vp->v_type != VDIR && 5255 1.81 hannken (zp->z_mode & (S_IXUSR | S_IXGRP | S_IXOTH)) == 0) { 5256 1.81 hannken error = EACCES; 5257 1.81 hannken } 5258 1.1 haad 5259 1.32 hannken /* We expect EACCES as common error. */ 5260 1.32 hannken if (error == EPERM) 5261 1.32 hannken error = EACCES; 5262 1.32 hannken 5263 1.81 hannken return error; 5264 1.2 haad } 5265 1.1 haad 5266 1.2 haad static int 5267 1.9 christos zfs_netbsd_lookup(void *v) 5268 1.2 haad { 5269 1.17 hannken struct vop_lookup_v2_args /* { 5270 1.12 riastrad struct vnode *a_dvp; 5271 1.12 riastrad struct vnode **a_vpp; 5272 1.12 riastrad struct componentname *a_cnp; 5273 1.12 riastrad } */ *ap = v; 5274 1.12 riastrad struct vnode *dvp = ap->a_dvp; 5275 1.12 riastrad struct vnode **vpp = ap->a_vpp; 5276 1.2 haad struct componentname *cnp = ap->a_cnp; 5277 1.86 yamt znode_t *zdp = VTOZ(dvp); 5278 1.86 yamt zfsvfs_t *zfsvfs = zdp->z_zfsvfs; 5279 1.37 hannken char *nm, short_nm[31]; 5280 1.12 riastrad int error; 5281 1.12 riastrad 5282 1.12 riastrad KASSERT(VOP_ISLOCKED(dvp) == LK_EXCLUSIVE); 5283 1.12 riastrad 5284 1.12 riastrad *vpp = NULL; 5285 1.12 riastrad 5286 1.12 riastrad /* 5287 1.12 riastrad * Do an access check before the cache lookup. zfs_lookup does 5288 1.12 riastrad * an access check too, but it's too scary to contemplate 5289 1.12 riastrad * injecting our namecache stuff into zfs internals. 5290 1.12 riastrad * 5291 1.12 riastrad * XXX Is this the correct access check? 5292 1.12 riastrad */ 5293 1.12 riastrad if ((error = VOP_ACCESS(dvp, VEXEC, cnp->cn_cred)) != 0) 5294 1.12 riastrad goto out; 5295 1.12 riastrad 5296 1.12 riastrad /* 5297 1.12 riastrad * Check the namecache before entering zfs_lookup. 5298 1.12 riastrad * cache_lookup does the locking dance for us. 5299 1.12 riastrad */ 5300 1.91 yamt if (zfsvfs->z_use_namecache) { 5301 1.91 yamt if (cache_lookup(dvp, cnp->cn_nameptr, cnp->cn_namelen, 5302 1.91 yamt cnp->cn_nameiop, cnp->cn_flags, NULL, vpp)) { 5303 1.91 yamt return *vpp == NULL ? ENOENT : 0; 5304 1.91 yamt } 5305 1.27 chs } 5306 1.12 riastrad 5307 1.12 riastrad /* 5308 1.12 riastrad * zfs_lookup wants a null-terminated component name, but namei 5309 1.12 riastrad * gives us a pointer into the full pathname. 5310 1.12 riastrad */ 5311 1.37 hannken ASSERT(cnp->cn_namelen < PATH_MAX - 1); 5312 1.37 hannken if (cnp->cn_namelen + 1 > sizeof(short_nm)) 5313 1.37 hannken nm = PNBUF_GET(); 5314 1.37 hannken else 5315 1.37 hannken nm = short_nm; 5316 1.12 riastrad (void)strlcpy(nm, cnp->cn_nameptr, cnp->cn_namelen + 1); 5317 1.12 riastrad 5318 1.44 hannken error = zfs_lookup(dvp, nm, vpp, 0, cnp, cnp->cn_nameiop, cnp->cn_cred); 5319 1.12 riastrad 5320 1.37 hannken if (nm != short_nm) 5321 1.37 hannken PNBUF_PUT(nm); 5322 1.37 hannken 5323 1.12 riastrad /* 5324 1.14 riastrad * Translate errors to match our namei insanity. Also, if the 5325 1.14 riastrad * caller wants to create an entry here, it's apparently our 5326 1.14 riastrad * responsibility as lookup to make sure that's permissible. 5327 1.14 riastrad * Go figure. 5328 1.12 riastrad */ 5329 1.12 riastrad if (cnp->cn_flags & ISLASTCN) { 5330 1.12 riastrad switch (cnp->cn_nameiop) { 5331 1.12 riastrad case CREATE: 5332 1.12 riastrad case RENAME: 5333 1.12 riastrad if (error == ENOENT) { 5334 1.14 riastrad error = VOP_ACCESS(dvp, VWRITE, cnp->cn_cred); 5335 1.14 riastrad if (error) 5336 1.14 riastrad break; 5337 1.12 riastrad error = EJUSTRETURN; 5338 1.12 riastrad break; 5339 1.12 riastrad } 5340 1.34 hannken break; 5341 1.12 riastrad case DELETE: 5342 1.34 hannken if (error == 0) { 5343 1.34 hannken error = VOP_ACCESS(dvp, VWRITE, cnp->cn_cred); 5344 1.34 hannken if (error) { 5345 1.34 hannken VN_RELE(*vpp); 5346 1.34 hannken *vpp = NULL; 5347 1.34 hannken } 5348 1.34 hannken } 5349 1.12 riastrad break; 5350 1.12 riastrad } 5351 1.12 riastrad } 5352 1.12 riastrad 5353 1.12 riastrad if (error) { 5354 1.12 riastrad KASSERT(*vpp == NULL); 5355 1.12 riastrad goto out; 5356 1.12 riastrad } 5357 1.27 chs KASSERT(*vpp != NULL); 5358 1.12 riastrad 5359 1.12 riastrad if ((cnp->cn_namelen == 1) && (cnp->cn_nameptr[0] == '.')) { 5360 1.12 riastrad KASSERT(!(cnp->cn_flags & ISDOTDOT)); 5361 1.12 riastrad KASSERT(dvp == *vpp); 5362 1.12 riastrad } else if ((cnp->cn_namelen == 2) && 5363 1.12 riastrad (cnp->cn_nameptr[0] == '.') && 5364 1.12 riastrad (cnp->cn_nameptr[1] == '.')) { 5365 1.12 riastrad KASSERT(cnp->cn_flags & ISDOTDOT); 5366 1.12 riastrad } else { 5367 1.12 riastrad KASSERT(!(cnp->cn_flags & ISDOTDOT)); 5368 1.12 riastrad } 5369 1.12 riastrad 5370 1.12 riastrad out: 5371 1.12 riastrad KASSERT(VOP_ISLOCKED(dvp) == LK_EXCLUSIVE); 5372 1.12 riastrad 5373 1.12 riastrad /* 5374 1.12 riastrad * Insert name into cache if appropriate. 5375 1.12 riastrad */ 5376 1.12 riastrad 5377 1.86 yamt if (zfsvfs->z_use_namecache) { 5378 1.86 yamt if (error == 0 || 5379 1.86 yamt (error == ENOENT && cnp->cn_nameiop != CREATE)) 5380 1.86 yamt cache_enter(dvp, *vpp, cnp->cn_nameptr, 5381 1.86 yamt cnp->cn_namelen, cnp->cn_flags); 5382 1.86 yamt } 5383 1.2 haad 5384 1.12 riastrad return (error); 5385 1.1 haad } 5386 1.1 haad 5387 1.1 haad static int 5388 1.9 christos zfs_netbsd_create(void *v) 5389 1.1 haad { 5390 1.16 hannken struct vop_create_v3_args /* { 5391 1.12 riastrad struct vnode *a_dvp; 5392 1.12 riastrad struct vnode **a_vpp; 5393 1.12 riastrad struct componentname *a_cnp; 5394 1.12 riastrad struct vattr *a_vap; 5395 1.12 riastrad } */ *ap = v; 5396 1.12 riastrad struct vnode *dvp = ap->a_dvp; 5397 1.12 riastrad struct vnode **vpp = ap->a_vpp; 5398 1.2 haad struct componentname *cnp = ap->a_cnp; 5399 1.12 riastrad struct vattr *vap = ap->a_vap; 5400 1.37 hannken char *nm; 5401 1.2 haad int mode; 5402 1.12 riastrad int error; 5403 1.12 riastrad 5404 1.12 riastrad KASSERT(VOP_ISLOCKED(dvp) == LK_EXCLUSIVE); 5405 1.12 riastrad 5406 1.2 haad vattr_init_mask(vap); 5407 1.2 haad mode = vap->va_mode & ALLPERMS; 5408 1.1 haad 5409 1.37 hannken /* ZFS wants a null-terminated name. */ 5410 1.37 hannken nm = PNBUF_GET(); 5411 1.37 hannken (void)strlcpy(nm, cnp->cn_nameptr, cnp->cn_namelen + 1); 5412 1.37 hannken 5413 1.12 riastrad /* XXX !EXCL is wrong here... */ 5414 1.37 hannken error = zfs_create(dvp, nm, vap, !EXCL, mode, vpp, cnp->cn_cred, NULL); 5415 1.37 hannken 5416 1.37 hannken PNBUF_PUT(nm); 5417 1.12 riastrad 5418 1.16 hannken KASSERT((error == 0) == (*vpp != NULL)); 5419 1.12 riastrad KASSERT(VOP_ISLOCKED(dvp) == LK_EXCLUSIVE); 5420 1.53 brad if (*vpp != NULL) 5421 1.53 brad VOP_UNLOCK(*vpp, 0); 5422 1.12 riastrad 5423 1.12 riastrad return (error); 5424 1.2 haad } 5425 1.1 haad 5426 1.2 haad static int 5427 1.48 hannken zfs_netbsd_mknod(void *v) 5428 1.48 hannken { 5429 1.48 hannken struct vop_mknod_v3_args /* { 5430 1.48 hannken struct vnode *a_dvp; 5431 1.48 hannken struct vnode **a_vpp; 5432 1.48 hannken struct componentname *a_cnp; 5433 1.48 hannken struct vattr *a_vap; 5434 1.48 hannken } */ *ap = v; 5435 1.48 hannken struct vnode *dvp = ap->a_dvp; 5436 1.48 hannken struct vnode **vpp = ap->a_vpp; 5437 1.48 hannken struct componentname *cnp = ap->a_cnp; 5438 1.48 hannken struct vattr *vap = ap->a_vap; 5439 1.48 hannken char *nm; 5440 1.48 hannken int mode; 5441 1.48 hannken int error; 5442 1.48 hannken 5443 1.48 hannken KASSERT(VOP_ISLOCKED(dvp) == LK_EXCLUSIVE); 5444 1.48 hannken 5445 1.48 hannken vattr_init_mask(vap); 5446 1.48 hannken mode = vap->va_mode & ALLPERMS; 5447 1.48 hannken 5448 1.48 hannken /* ZFS wants a null-terminated name. */ 5449 1.48 hannken nm = PNBUF_GET(); 5450 1.48 hannken (void)strlcpy(nm, cnp->cn_nameptr, cnp->cn_namelen + 1); 5451 1.48 hannken 5452 1.48 hannken /* XXX !EXCL is wrong here... */ 5453 1.48 hannken error = zfs_create(dvp, nm, vap, !EXCL, mode, vpp, cnp->cn_cred, NULL); 5454 1.48 hannken 5455 1.48 hannken PNBUF_PUT(nm); 5456 1.48 hannken 5457 1.48 hannken KASSERT((error == 0) == (*vpp != NULL)); 5458 1.48 hannken KASSERT(VOP_ISLOCKED(dvp) == LK_EXCLUSIVE); 5459 1.53 brad if (*vpp != NULL) 5460 1.53 brad VOP_UNLOCK(*vpp, 0); 5461 1.48 hannken 5462 1.48 hannken return (error); 5463 1.48 hannken } 5464 1.48 hannken 5465 1.48 hannken static int 5466 1.9 christos zfs_netbsd_remove(void *v) 5467 1.2 haad { 5468 1.76 thorpej struct vop_remove_v3_args /* { 5469 1.12 riastrad struct vnode *a_dvp; 5470 1.12 riastrad struct vnode *a_vp; 5471 1.12 riastrad struct componentname *a_cnp; 5472 1.76 thorpej nlink_t ctx_vp_new_nlink; 5473 1.12 riastrad } */ *ap = v; 5474 1.12 riastrad struct vnode *dvp = ap->a_dvp; 5475 1.12 riastrad struct vnode *vp = ap->a_vp; 5476 1.12 riastrad struct componentname *cnp = ap->a_cnp; 5477 1.37 hannken char *nm; 5478 1.12 riastrad int error; 5479 1.12 riastrad 5480 1.12 riastrad KASSERT(VOP_ISLOCKED(dvp) == LK_EXCLUSIVE); 5481 1.12 riastrad KASSERT(VOP_ISLOCKED(vp) == LK_EXCLUSIVE); 5482 1.12 riastrad 5483 1.37 hannken /* ZFS wants a null-terminated name. */ 5484 1.37 hannken nm = PNBUF_GET(); 5485 1.37 hannken (void)strlcpy(nm, cnp->cn_nameptr, cnp->cn_namelen + 1); 5486 1.37 hannken 5487 1.37 hannken error = zfs_remove(dvp, vp, nm, cnp->cn_cred); 5488 1.37 hannken 5489 1.76 thorpej /* 5490 1.76 thorpej * XXX Should update ctx_vp_new_nlink, but for now the 5491 1.76 thorpej * XXX the kevent sent on "vp" matches historical behavior. 5492 1.76 thorpej */ 5493 1.76 thorpej 5494 1.37 hannken PNBUF_PUT(nm); 5495 1.27 chs vput(vp); 5496 1.12 riastrad KASSERT(VOP_ISLOCKED(dvp) == LK_EXCLUSIVE); 5497 1.12 riastrad return (error); 5498 1.2 haad } 5499 1.1 haad 5500 1.2 haad static int 5501 1.9 christos zfs_netbsd_mkdir(void *v) 5502 1.2 haad { 5503 1.16 hannken struct vop_mkdir_v3_args /* { 5504 1.12 riastrad struct vnode *a_dvp; 5505 1.12 riastrad struct vnode **a_vpp; 5506 1.12 riastrad struct componentname *a_cnp; 5507 1.12 riastrad struct vattr *a_vap; 5508 1.12 riastrad } */ *ap = v; 5509 1.12 riastrad struct vnode *dvp = ap->a_dvp; 5510 1.12 riastrad struct vnode **vpp = ap->a_vpp; 5511 1.12 riastrad struct componentname *cnp = ap->a_cnp; 5512 1.12 riastrad struct vattr *vap = ap->a_vap; 5513 1.37 hannken char *nm; 5514 1.12 riastrad int error; 5515 1.12 riastrad 5516 1.12 riastrad KASSERT(VOP_ISLOCKED(dvp) == LK_EXCLUSIVE); 5517 1.12 riastrad 5518 1.2 haad vattr_init_mask(vap); 5519 1.1 haad 5520 1.37 hannken /* ZFS wants a null-terminated name. */ 5521 1.37 hannken nm = PNBUF_GET(); 5522 1.37 hannken (void)strlcpy(nm, cnp->cn_nameptr, cnp->cn_namelen + 1); 5523 1.37 hannken 5524 1.37 hannken error = zfs_mkdir(dvp, nm, vap, vpp, cnp->cn_cred); 5525 1.37 hannken 5526 1.37 hannken PNBUF_PUT(nm); 5527 1.12 riastrad 5528 1.16 hannken KASSERT((error == 0) == (*vpp != NULL)); 5529 1.12 riastrad KASSERT(VOP_ISLOCKED(dvp) == LK_EXCLUSIVE); 5530 1.53 brad if (*vpp != NULL) 5531 1.53 brad VOP_UNLOCK(*vpp, 0); 5532 1.12 riastrad 5533 1.12 riastrad return (error); 5534 1.1 haad } 5535 1.1 haad 5536 1.1 haad static int 5537 1.9 christos zfs_netbsd_rmdir(void *v) 5538 1.1 haad { 5539 1.25 riastrad struct vop_rmdir_v2_args /* { 5540 1.12 riastrad struct vnode *a_dvp; 5541 1.12 riastrad struct vnode *a_vp; 5542 1.12 riastrad struct componentname *a_cnp; 5543 1.12 riastrad } */ *ap = v; 5544 1.12 riastrad struct vnode *dvp = ap->a_dvp; 5545 1.12 riastrad struct vnode *vp = ap->a_vp; 5546 1.2 haad struct componentname *cnp = ap->a_cnp; 5547 1.37 hannken char *nm; 5548 1.12 riastrad int error; 5549 1.12 riastrad 5550 1.12 riastrad KASSERT(VOP_ISLOCKED(dvp) == LK_EXCLUSIVE); 5551 1.12 riastrad KASSERT(VOP_ISLOCKED(vp) == LK_EXCLUSIVE); 5552 1.12 riastrad 5553 1.37 hannken /* ZFS wants a null-terminated name. */ 5554 1.37 hannken nm = PNBUF_GET(); 5555 1.37 hannken (void)strlcpy(nm, cnp->cn_nameptr, cnp->cn_namelen + 1); 5556 1.37 hannken 5557 1.37 hannken error = zfs_rmdir(dvp, vp, nm, cnp->cn_cred); 5558 1.37 hannken 5559 1.37 hannken PNBUF_PUT(nm); 5560 1.27 chs vput(vp); 5561 1.12 riastrad KASSERT(VOP_ISLOCKED(dvp) == LK_EXCLUSIVE); 5562 1.12 riastrad return error; 5563 1.2 haad } 5564 1.1 haad 5565 1.2 haad static int 5566 1.9 christos zfs_netbsd_readdir(void *v) 5567 1.2 haad { 5568 1.9 christos struct vop_readdir_args *ap = v; 5569 1.1 haad 5570 1.2 haad return (zfs_readdir(ap->a_vp, ap->a_uio, ap->a_cred, ap->a_eofflag, 5571 1.27 chs ap->a_ncookies, ap->a_cookies)); 5572 1.2 haad } 5573 1.1 haad 5574 1.2 haad static int 5575 1.9 christos zfs_netbsd_fsync(void *v) 5576 1.2 haad { 5577 1.9 christos struct vop_fsync_args *ap = v; 5578 1.93 yamt struct vnode *vp = ap->a_vp; 5579 1.93 yamt int flags = ap->a_flags; 5580 1.93 yamt int error; 5581 1.93 yamt 5582 1.93 yamt /* 5583 1.93 yamt * Regardless of whether this is required for standards conformance, 5584 1.93 yamt * this is the logical behavior when fsync() is called on a file with 5585 1.93 yamt * dirty pages. We use async putpages since the ZIL transactions are 5586 1.93 yamt * already going to be pushed out as part of the zil_commit(). 5587 1.93 yamt */ 5588 1.93 yamt rw_enter(vp->v_uobj.vmobjlock, RW_WRITER); 5589 1.93 yamt error = VOP_PUTPAGES(vp, trunc_page(ap->a_offlo), 5590 1.93 yamt round_page(ap->a_offhi), PGO_CLEANIT); 5591 1.93 yamt if (error != 0) { 5592 1.93 yamt return error; 5593 1.93 yamt } 5594 1.1 haad 5595 1.92 yamt /* 5596 1.92 yamt * it isn't safe or necessary to call zil_commit when reclaiming 5597 1.92 yamt * a vnode. 5598 1.92 yamt * 5599 1.92 yamt * - it can deadlock by attempting vcache_get on itself. 5600 1.92 yamt * (zfs_get_data) 5601 1.92 yamt * 5602 1.92 yamt * - for the purpose of vnode reclaim, we only need to push the 5603 1.92 yamt * data to the txg. no need to log the intent. 5604 1.94 yamt * 5605 1.94 yamt * no need to commit the zil for ioflush either. (FSYNC_LAZY) 5606 1.92 yamt */ 5607 1.94 yamt if ((flags & (FSYNC_RECLAIM|FSYNC_LAZY)) != 0) { 5608 1.92 yamt return (0); 5609 1.92 yamt } 5610 1.92 yamt 5611 1.93 yamt return (zfs_fsync(vp, flags, ap->a_cred, NULL)); 5612 1.1 haad } 5613 1.1 haad 5614 1.1 haad static int 5615 1.61 riastrad zfs_spec_fsync(void *v) 5616 1.61 riastrad { 5617 1.61 riastrad struct vop_fsync_args *ap = v; 5618 1.61 riastrad int error; 5619 1.61 riastrad 5620 1.61 riastrad error = spec_fsync(v); 5621 1.61 riastrad if (error) 5622 1.61 riastrad return error; 5623 1.61 riastrad 5624 1.61 riastrad return (zfs_fsync(ap->a_vp, ap->a_flags, ap->a_cred, NULL)); 5625 1.61 riastrad } 5626 1.61 riastrad 5627 1.61 riastrad static int 5628 1.9 christos zfs_netbsd_getattr(void *v) 5629 1.1 haad { 5630 1.9 christos struct vop_getattr_args *ap = v; 5631 1.2 haad vattr_t *vap = ap->a_vap; 5632 1.2 haad xvattr_t xvap; 5633 1.2 haad u_long fflags = 0; 5634 1.2 haad int error; 5635 1.2 haad 5636 1.2 haad xva_init(&xvap); 5637 1.2 haad xvap.xva_vattr = *vap; 5638 1.2 haad xvap.xva_vattr.va_mask |= AT_XVATTR; 5639 1.2 haad 5640 1.2 haad /* Convert chflags into ZFS-type flags. */ 5641 1.2 haad /* XXX: what about SF_SETTABLE?. */ 5642 1.2 haad XVA_SET_REQ(&xvap, XAT_IMMUTABLE); 5643 1.2 haad XVA_SET_REQ(&xvap, XAT_APPENDONLY); 5644 1.2 haad XVA_SET_REQ(&xvap, XAT_NOUNLINK); 5645 1.2 haad XVA_SET_REQ(&xvap, XAT_NODUMP); 5646 1.2 haad error = zfs_getattr(ap->a_vp, (vattr_t *)&xvap, 0, ap->a_cred, NULL); 5647 1.2 haad if (error != 0) 5648 1.2 haad return (error); 5649 1.1 haad 5650 1.2 haad /* Convert ZFS xattr into chflags. */ 5651 1.2 haad #define FLAG_CHECK(fflag, xflag, xfield) do { \ 5652 1.2 haad if (XVA_ISSET_RTN(&xvap, (xflag)) && (xfield) != 0) \ 5653 1.2 haad fflags |= (fflag); \ 5654 1.2 haad } while (0) 5655 1.2 haad FLAG_CHECK(SF_IMMUTABLE, XAT_IMMUTABLE, 5656 1.2 haad xvap.xva_xoptattrs.xoa_immutable); 5657 1.2 haad FLAG_CHECK(SF_APPEND, XAT_APPENDONLY, 5658 1.2 haad xvap.xva_xoptattrs.xoa_appendonly); 5659 1.2 haad FLAG_CHECK(SF_NOUNLINK, XAT_NOUNLINK, 5660 1.2 haad xvap.xva_xoptattrs.xoa_nounlink); 5661 1.2 haad FLAG_CHECK(UF_NODUMP, XAT_NODUMP, 5662 1.2 haad xvap.xva_xoptattrs.xoa_nodump); 5663 1.2 haad #undef FLAG_CHECK 5664 1.2 haad *vap = xvap.xva_vattr; 5665 1.2 haad vap->va_flags = fflags; 5666 1.1 haad return (0); 5667 1.1 haad } 5668 1.1 haad 5669 1.1 haad static int 5670 1.9 christos zfs_netbsd_setattr(void *v) 5671 1.1 haad { 5672 1.9 christos struct vop_setattr_args *ap = v; 5673 1.2 haad vnode_t *vp = ap->a_vp; 5674 1.2 haad vattr_t *vap = ap->a_vap; 5675 1.2 haad cred_t *cred = ap->a_cred; 5676 1.33 hannken znode_t *zp = VTOZ(vp); 5677 1.86 yamt zfsvfs_t *zfsvfs = zp->z_zfsvfs; 5678 1.2 haad xvattr_t xvap; 5679 1.35 hannken kauth_action_t action; 5680 1.35 hannken u_long fflags, sfflags = 0; 5681 1.2 haad uint64_t zflags; 5682 1.33 hannken int error, flags = 0; 5683 1.35 hannken bool changing_sysflags; 5684 1.1 haad 5685 1.2 haad vattr_init_mask(vap); 5686 1.2 haad vap->va_mask &= ~AT_NOSET; 5687 1.21 riastrad if (ISSET(vap->va_vaflags, VA_UTIMES_NULL)) 5688 1.21 riastrad flags |= ATTR_UTIME; 5689 1.1 haad 5690 1.2 haad xva_init(&xvap); 5691 1.2 haad xvap.xva_vattr = *vap; 5692 1.1 haad 5693 1.27 chs zflags = VTOZ(vp)->z_pflags; 5694 1.1 haad 5695 1.57 hannken /* Ignore size changes on device nodes. */ 5696 1.57 hannken if (vp->v_type == VBLK || vp->v_type == VCHR) 5697 1.57 hannken xvap.xva_vattr.va_mask &= ~AT_SIZE; 5698 1.2 haad if (vap->va_flags != VNOVAL) { 5699 1.2 haad int error; 5700 1.1 haad 5701 1.2 haad fflags = vap->va_flags; 5702 1.2 haad if ((fflags & ~(SF_IMMUTABLE|SF_APPEND|SF_NOUNLINK|UF_NODUMP)) != 0) 5703 1.2 haad return (EOPNOTSUPP); 5704 1.1 haad 5705 1.2 haad #define FLAG_CHANGE(fflag, zflag, xflag, xfield) do { \ 5706 1.2 haad if (((fflags & (fflag)) && !(zflags & (zflag))) || \ 5707 1.2 haad ((zflags & (zflag)) && !(fflags & (fflag)))) { \ 5708 1.2 haad XVA_SET_REQ(&xvap, (xflag)); \ 5709 1.2 haad (xfield) = ((fflags & (fflag)) != 0); \ 5710 1.35 hannken if (((fflag) & SF_SETTABLE) != 0) \ 5711 1.35 hannken sfflags |= (fflag); \ 5712 1.2 haad } \ 5713 1.2 haad } while (0) 5714 1.2 haad /* Convert chflags into ZFS-type flags. */ 5715 1.2 haad /* XXX: what about SF_SETTABLE?. */ 5716 1.2 haad FLAG_CHANGE(SF_IMMUTABLE, ZFS_IMMUTABLE, XAT_IMMUTABLE, 5717 1.2 haad xvap.xva_xoptattrs.xoa_immutable); 5718 1.2 haad FLAG_CHANGE(SF_APPEND, ZFS_APPENDONLY, XAT_APPENDONLY, 5719 1.2 haad xvap.xva_xoptattrs.xoa_appendonly); 5720 1.2 haad FLAG_CHANGE(SF_NOUNLINK, ZFS_NOUNLINK, XAT_NOUNLINK, 5721 1.2 haad xvap.xva_xoptattrs.xoa_nounlink); 5722 1.2 haad FLAG_CHANGE(UF_NODUMP, ZFS_NODUMP, XAT_NODUMP, 5723 1.2 haad xvap.xva_xoptattrs.xoa_nodump); 5724 1.2 haad #undef FLAG_CHANGE 5725 1.35 hannken 5726 1.35 hannken action = KAUTH_VNODE_WRITE_FLAGS; 5727 1.35 hannken changing_sysflags = false; 5728 1.35 hannken 5729 1.35 hannken if (zflags & (ZFS_IMMUTABLE|ZFS_APPENDONLY|ZFS_NOUNLINK)) { 5730 1.35 hannken action |= KAUTH_VNODE_HAS_SYSFLAGS; 5731 1.35 hannken } 5732 1.35 hannken if (sfflags != 0) { 5733 1.35 hannken action |= KAUTH_VNODE_WRITE_SYSFLAGS; 5734 1.35 hannken changing_sysflags = true; 5735 1.35 hannken } 5736 1.35 hannken 5737 1.35 hannken error = kauth_authorize_vnode(cred, action, vp, NULL, 5738 1.67 christos genfs_can_chflags(vp, cred, zp->z_uid, changing_sysflags)); 5739 1.35 hannken if (error) 5740 1.35 hannken return error; 5741 1.1 haad } 5742 1.33 hannken 5743 1.33 hannken if (vap->va_atime.tv_sec != VNOVAL || vap->va_mtime.tv_sec != VNOVAL || 5744 1.33 hannken vap->va_birthtime.tv_sec != VNOVAL) { 5745 1.33 hannken error = kauth_authorize_vnode(cred, KAUTH_VNODE_WRITE_TIMES, vp, 5746 1.67 christos NULL, genfs_can_chtimes(vp, cred, zp->z_uid, 5747 1.67 christos vap->va_vaflags)); 5748 1.33 hannken if (error) 5749 1.33 hannken return error; 5750 1.33 hannken } 5751 1.33 hannken 5752 1.52 hannken error = zfs_setattr(vp, (vattr_t *)&xvap, flags, cred, NULL); 5753 1.70 hannken if (error) 5754 1.70 hannken return error; 5755 1.70 hannken 5756 1.86 yamt if (zfsvfs->z_use_namecache) 5757 1.86 yamt cache_enter_id(vp, zp->z_mode, zp->z_uid, zp->z_gid, true); 5758 1.52 hannken 5759 1.52 hannken return error; 5760 1.2 haad } 5761 1.1 haad 5762 1.2 haad static int 5763 1.9 christos zfs_netbsd_rename(void *v) 5764 1.9 christos { 5765 1.76 thorpej struct vop_rename_args /* { 5766 1.2 haad struct vnode *a_fdvp; 5767 1.2 haad struct vnode *a_fvp; 5768 1.2 haad struct componentname *a_fcnp; 5769 1.2 haad struct vnode *a_tdvp; 5770 1.2 haad struct vnode *a_tvp; 5771 1.2 haad struct componentname *a_tcnp; 5772 1.9 christos } */ *ap = v; 5773 1.2 haad vnode_t *fdvp = ap->a_fdvp; 5774 1.2 haad vnode_t *fvp = ap->a_fvp; 5775 1.12 riastrad struct componentname *fcnp = ap->a_fcnp; 5776 1.2 haad vnode_t *tdvp = ap->a_tdvp; 5777 1.2 haad vnode_t *tvp = ap->a_tvp; 5778 1.12 riastrad struct componentname *tcnp = ap->a_tcnp; 5779 1.12 riastrad kauth_cred_t cred; 5780 1.2 haad int error; 5781 1.1 haad 5782 1.12 riastrad KASSERT(VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE); 5783 1.27 chs KASSERT(tvp == NULL || VOP_ISLOCKED(tvp) == LK_EXCLUSIVE); 5784 1.12 riastrad KASSERT(fdvp->v_type == VDIR); 5785 1.12 riastrad KASSERT(tdvp->v_type == VDIR); 5786 1.12 riastrad 5787 1.12 riastrad cred = fcnp->cn_cred; 5788 1.12 riastrad 5789 1.12 riastrad /* 5790 1.12 riastrad * XXX Want a better equality test. `tcnp->cn_cred == cred' 5791 1.12 riastrad * hoses p2k because puffs transmits the creds separately and 5792 1.12 riastrad * allocates distinct but equivalent structures for them. 5793 1.12 riastrad */ 5794 1.12 riastrad KASSERT(kauth_cred_uidmatch(cred, tcnp->cn_cred)); 5795 1.12 riastrad 5796 1.12 riastrad /* 5797 1.12 riastrad * Drop the insane locks. 5798 1.12 riastrad */ 5799 1.27 chs VOP_UNLOCK(tdvp, 0); 5800 1.27 chs if (tvp != NULL && tvp != tdvp) 5801 1.27 chs VOP_UNLOCK(tvp, 0); 5802 1.12 riastrad 5803 1.12 riastrad /* 5804 1.12 riastrad * Release the source and target nodes; zfs_rename will look 5805 1.12 riastrad * them up again once the locking situation is sane. 5806 1.12 riastrad */ 5807 1.12 riastrad VN_RELE(fvp); 5808 1.12 riastrad if (tvp != NULL) 5809 1.12 riastrad VN_RELE(tvp); 5810 1.27 chs fvp = NULL; 5811 1.27 chs tvp = NULL; 5812 1.1 haad 5813 1.12 riastrad /* 5814 1.12 riastrad * Do the rename ZFSly. 5815 1.12 riastrad */ 5816 1.27 chs error = zfs_rename(fdvp, &fvp, fcnp, tdvp, &tvp, tcnp, cred); 5817 1.12 riastrad 5818 1.12 riastrad /* 5819 1.12 riastrad * Release the directories now too, because the VOP_RENAME 5820 1.12 riastrad * protocol is insane. 5821 1.12 riastrad */ 5822 1.27 chs 5823 1.2 haad VN_RELE(fdvp); 5824 1.12 riastrad VN_RELE(tdvp); 5825 1.31 hannken if (fvp != NULL) 5826 1.31 hannken VN_RELE(fvp); 5827 1.27 chs if (tvp != NULL) 5828 1.27 chs VN_RELE(tvp); 5829 1.1 haad 5830 1.1 haad return (error); 5831 1.1 haad } 5832 1.1 haad 5833 1.1 haad static int 5834 1.9 christos zfs_netbsd_symlink(void *v) 5835 1.1 haad { 5836 1.16 hannken struct vop_symlink_v3_args /* { 5837 1.12 riastrad struct vnode *a_dvp; 5838 1.12 riastrad struct vnode **a_vpp; 5839 1.12 riastrad struct componentname *a_cnp; 5840 1.12 riastrad struct vattr *a_vap; 5841 1.12 riastrad char *a_target; 5842 1.12 riastrad } */ *ap = v; 5843 1.12 riastrad struct vnode *dvp = ap->a_dvp; 5844 1.12 riastrad struct vnode **vpp = ap->a_vpp; 5845 1.2 haad struct componentname *cnp = ap->a_cnp; 5846 1.12 riastrad struct vattr *vap = ap->a_vap; 5847 1.12 riastrad char *target = ap->a_target; 5848 1.37 hannken char *nm; 5849 1.12 riastrad int error; 5850 1.12 riastrad 5851 1.12 riastrad KASSERT(VOP_ISLOCKED(dvp) == LK_EXCLUSIVE); 5852 1.12 riastrad 5853 1.2 haad vap->va_type = VLNK; /* Netbsd: Syscall only sets va_mode. */ 5854 1.2 haad vattr_init_mask(vap); 5855 1.1 haad 5856 1.37 hannken /* ZFS wants a null-terminated name. */ 5857 1.37 hannken nm = PNBUF_GET(); 5858 1.37 hannken (void)strlcpy(nm, cnp->cn_nameptr, cnp->cn_namelen + 1); 5859 1.37 hannken 5860 1.37 hannken error = zfs_symlink(dvp, vpp, nm, vap, target, cnp->cn_cred, 0); 5861 1.37 hannken 5862 1.37 hannken PNBUF_PUT(nm); 5863 1.16 hannken KASSERT((error == 0) == (*vpp != NULL)); 5864 1.12 riastrad KASSERT(VOP_ISLOCKED(dvp) == LK_EXCLUSIVE); 5865 1.53 brad if (*vpp != NULL) 5866 1.53 brad VOP_UNLOCK(*vpp, 0); 5867 1.12 riastrad 5868 1.12 riastrad return (error); 5869 1.4 haad } 5870 1.4 haad 5871 1.2 haad static int 5872 1.9 christos zfs_netbsd_readlink(void *v) 5873 1.2 haad { 5874 1.9 christos struct vop_readlink_args *ap = v; 5875 1.1 haad 5876 1.2 haad return (zfs_readlink(ap->a_vp, ap->a_uio, ap->a_cred, NULL)); 5877 1.2 haad } 5878 1.1 haad 5879 1.2 haad static int 5880 1.9 christos zfs_netbsd_link(void *v) 5881 1.2 haad { 5882 1.23 riastrad struct vop_link_v2_args /* { 5883 1.12 riastrad struct vnode *a_dvp; 5884 1.12 riastrad struct vnode *a_vp; 5885 1.12 riastrad struct componentname *a_cnp; 5886 1.12 riastrad } */ *ap = v; 5887 1.12 riastrad struct vnode *dvp = ap->a_dvp; 5888 1.12 riastrad struct vnode *vp = ap->a_vp; 5889 1.2 haad struct componentname *cnp = ap->a_cnp; 5890 1.37 hannken char *nm; 5891 1.12 riastrad int error; 5892 1.12 riastrad 5893 1.12 riastrad KASSERT(VOP_ISLOCKED(dvp) == LK_EXCLUSIVE); 5894 1.1 haad 5895 1.37 hannken /* ZFS wants a null-terminated name. */ 5896 1.37 hannken nm = PNBUF_GET(); 5897 1.37 hannken (void)strlcpy(nm, cnp->cn_nameptr, cnp->cn_namelen + 1); 5898 1.37 hannken 5899 1.78 christos if ((error = vn_lock(vp, LK_EXCLUSIVE)) != 0) { 5900 1.78 christos /* XXX: No ABORTOP? */ 5901 1.78 christos PNBUF_PUT(nm); 5902 1.78 christos return error; 5903 1.78 christos } 5904 1.78 christos error = kauth_authorize_vnode(cnp->cn_cred, KAUTH_VNODE_ADD_LINK, vp, 5905 1.78 christos dvp, 0); 5906 1.78 christos if (error) 5907 1.78 christos goto out; 5908 1.37 hannken error = zfs_link(dvp, vp, nm, cnp->cn_cred, 5909 1.27 chs NULL, 0); 5910 1.37 hannken 5911 1.78 christos out: 5912 1.37 hannken PNBUF_PUT(nm); 5913 1.27 chs VOP_UNLOCK(vp, 0); 5914 1.27 chs return error; 5915 1.2 haad } 5916 1.1 haad 5917 1.2 haad static int 5918 1.9 christos zfs_netbsd_inactive(void *v) 5919 1.2 haad { 5920 1.24 riastrad struct vop_inactive_v2_args *ap = v; 5921 1.2 haad vnode_t *vp = ap->a_vp; 5922 1.2 haad znode_t *zp = VTOZ(vp); 5923 1.97 yamt zfsvfs_t *zfsvfs = zp->z_zfsvfs; 5924 1.97 yamt int error; 5925 1.97 yamt 5926 1.97 yamt rw_enter(&zfsvfs->z_teardown_inactive_lock, RW_READER); 5927 1.97 yamt if (zp->z_sa_hdl == NULL) { 5928 1.97 yamt /* 5929 1.97 yamt * The fs has been unmounted, or we did a 5930 1.97 yamt * suspend/resume and this file no longer exists. 5931 1.97 yamt */ 5932 1.97 yamt rw_exit(&zfsvfs->z_teardown_inactive_lock); 5933 1.97 yamt *ap->a_recycle = true; 5934 1.97 yamt return (0); 5935 1.97 yamt } 5936 1.97 yamt 5937 1.97 yamt if (zp->z_unlinked) { 5938 1.97 yamt /* 5939 1.97 yamt * Fast path to recycle a vnode of a removed file. 5940 1.97 yamt */ 5941 1.97 yamt rw_exit(&zfsvfs->z_teardown_inactive_lock); 5942 1.97 yamt *ap->a_recycle = true; 5943 1.97 yamt return (0); 5944 1.97 yamt } 5945 1.97 yamt 5946 1.97 yamt if (zp->z_atime_dirty && zp->z_unlinked == 0) { 5947 1.97 yamt dmu_tx_t *tx = dmu_tx_create(zfsvfs->z_os); 5948 1.1 haad 5949 1.97 yamt dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 5950 1.97 yamt zfs_sa_upgrade_txholds(tx, zp); 5951 1.97 yamt error = dmu_tx_assign(tx, TXG_WAIT); 5952 1.97 yamt if (error) { 5953 1.97 yamt dmu_tx_abort(tx); 5954 1.97 yamt } else { 5955 1.97 yamt (void) sa_update(zp->z_sa_hdl, SA_ZPL_ATIME(zfsvfs), 5956 1.97 yamt (void *)&zp->z_atime, sizeof (zp->z_atime), tx); 5957 1.97 yamt zp->z_atime_dirty = 0; 5958 1.97 yamt dmu_tx_commit(tx); 5959 1.97 yamt } 5960 1.97 yamt } 5961 1.97 yamt rw_exit(&zfsvfs->z_teardown_inactive_lock); 5962 1.24 riastrad 5963 1.97 yamt *ap->a_recycle = false; 5964 1.1 haad return (0); 5965 1.1 haad } 5966 1.1 haad 5967 1.1 haad static int 5968 1.9 christos zfs_netbsd_reclaim(void *v) 5969 1.1 haad { 5970 1.26 riastrad struct vop_reclaim_v2_args /* { 5971 1.12 riastrad struct vnode *a_vp; 5972 1.12 riastrad } */ *ap = v; 5973 1.12 riastrad struct vnode *vp = ap->a_vp; 5974 1.12 riastrad znode_t *zp; 5975 1.2 haad zfsvfs_t *zfsvfs; 5976 1.12 riastrad int error; 5977 1.1 haad 5978 1.27 chs VOP_UNLOCK(vp, 0); 5979 1.12 riastrad zp = VTOZ(vp); 5980 1.2 haad zfsvfs = zp->z_zfsvfs; 5981 1.1 haad 5982 1.27 chs KASSERTMSG(!vn_has_cached_data(vp), "vp %p", vp); 5983 1.3 haad 5984 1.12 riastrad rw_enter(&zfsvfs->z_teardown_inactive_lock, RW_READER); 5985 1.27 chs if (zp->z_sa_hdl == NULL) 5986 1.27 chs zfs_znode_free(zp); 5987 1.27 chs else 5988 1.27 chs zfs_zinactive(zp); 5989 1.12 riastrad rw_exit(&zfsvfs->z_teardown_inactive_lock); 5990 1.12 riastrad return 0; 5991 1.2 haad } 5992 1.1 haad 5993 1.2 haad static int 5994 1.9 christos zfs_netbsd_fid(void *v) 5995 1.2 haad { 5996 1.9 christos struct vop_fid_args *ap = v; 5997 1.1 haad 5998 1.2 haad return (zfs_fid(ap->a_vp, (void *)ap->a_fid, NULL)); 5999 1.1 haad } 6000 1.1 haad 6001 1.1 haad static int 6002 1.9 christos zfs_netbsd_pathconf(void *v) 6003 1.1 haad { 6004 1.9 christos struct vop_pathconf_args *ap = v; 6005 1.2 haad ulong_t val; 6006 1.1 haad int error; 6007 1.1 haad 6008 1.2 haad error = zfs_pathconf(ap->a_vp, ap->a_name, &val, curthread->l_cred, NULL); 6009 1.2 haad if (error == 0) 6010 1.2 haad *ap->a_retval = val; 6011 1.2 haad else if (error == EOPNOTSUPP) { 6012 1.2 haad switch (ap->a_name) { 6013 1.2 haad case _PC_NAME_MAX: 6014 1.2 haad *ap->a_retval = NAME_MAX; 6015 1.2 haad return (0); 6016 1.2 haad case _PC_PATH_MAX: 6017 1.2 haad *ap->a_retval = PATH_MAX; 6018 1.2 haad return (0); 6019 1.2 haad case _PC_LINK_MAX: 6020 1.2 haad *ap->a_retval = LINK_MAX; 6021 1.2 haad return (0); 6022 1.2 haad case _PC_MAX_CANON: 6023 1.2 haad *ap->a_retval = MAX_CANON; 6024 1.2 haad return (0); 6025 1.2 haad case _PC_MAX_INPUT: 6026 1.2 haad *ap->a_retval = MAX_INPUT; 6027 1.2 haad return (0); 6028 1.2 haad case _PC_PIPE_BUF: 6029 1.2 haad *ap->a_retval = PIPE_BUF; 6030 1.2 haad return (0); 6031 1.2 haad case _PC_CHOWN_RESTRICTED: 6032 1.2 haad *ap->a_retval = 1; 6033 1.2 haad return (0); 6034 1.13 riastrad case _PC_NO_TRUNC: 6035 1.13 riastrad *ap->a_retval = 1; 6036 1.13 riastrad return (0); 6037 1.2 haad case _PC_VDISABLE: 6038 1.2 haad *ap->a_retval = _POSIX_VDISABLE; 6039 1.2 haad return (0); 6040 1.2 haad default: 6041 1.2 haad return (EINVAL); 6042 1.2 haad } 6043 1.2 haad /* NOTREACHED */ 6044 1.12 riastrad } 6045 1.1 haad return (error); 6046 1.1 haad } 6047 1.1 haad 6048 1.20 riastrad static int 6049 1.20 riastrad zfs_netbsd_advlock(void *v) 6050 1.20 riastrad { 6051 1.20 riastrad struct vop_advlock_args /* { 6052 1.20 riastrad struct vnode *a_vp; 6053 1.20 riastrad void *a_id; 6054 1.20 riastrad int a_op; 6055 1.20 riastrad struct flock *a_fl; 6056 1.20 riastrad int a_flags; 6057 1.20 riastrad } */ *ap = v; 6058 1.20 riastrad struct vnode *vp; 6059 1.20 riastrad struct znode *zp; 6060 1.20 riastrad struct zfsvfs *zfsvfs; 6061 1.20 riastrad int error; 6062 1.20 riastrad 6063 1.20 riastrad vp = ap->a_vp; 6064 1.20 riastrad zp = VTOZ(vp); 6065 1.20 riastrad zfsvfs = zp->z_zfsvfs; 6066 1.20 riastrad 6067 1.20 riastrad ZFS_ENTER(zfsvfs); 6068 1.20 riastrad ZFS_VERIFY_ZP(zp); 6069 1.27 chs error = lf_advlock(ap, &zp->z_lockf, zp->z_size); 6070 1.20 riastrad ZFS_EXIT(zfsvfs); 6071 1.20 riastrad 6072 1.20 riastrad return error; 6073 1.20 riastrad } 6074 1.20 riastrad 6075 1.27 chs static int 6076 1.2 haad zfs_netbsd_getpages(void *v) 6077 1.2 haad { 6078 1.27 chs struct vop_getpages_args /* { 6079 1.27 chs struct vnode *a_vp; 6080 1.27 chs voff_t a_offset; 6081 1.27 chs struct vm_page **a_m; 6082 1.27 chs int *a_count; 6083 1.27 chs int a_centeridx; 6084 1.27 chs vm_prot_t a_access_type; 6085 1.27 chs int a_advice; 6086 1.27 chs int a_flags; 6087 1.27 chs } */ * const ap = v; 6088 1.27 chs 6089 1.27 chs vnode_t *const vp = ap->a_vp; 6090 1.27 chs const int flags = ap->a_flags; 6091 1.27 chs const bool async = (flags & PGO_SYNCIO) == 0; 6092 1.27 chs const bool memwrite = (ap->a_access_type & VM_PROT_WRITE) != 0; 6093 1.27 chs 6094 1.27 chs struct uvm_object * const uobj = &vp->v_uobj; 6095 1.62 ad krwlock_t * const rw = uobj->vmobjlock; 6096 1.27 chs znode_t *zp = VTOZ(vp); 6097 1.27 chs zfsvfs_t *zfsvfs = zp->z_zfsvfs; 6098 1.50 hannken vfs_t *mp; 6099 1.27 chs struct vm_page *pg; 6100 1.27 chs caddr_t va; 6101 1.69 ad int npages = *ap->a_count, found, err = 0; 6102 1.27 chs 6103 1.27 chs if (flags & PGO_LOCKED) { 6104 1.69 ad uvn_findpages(uobj, ap->a_offset, &npages, ap->a_m, NULL, 6105 1.68 ad UFP_NOWAIT | UFP_NOALLOC | UFP_NOBUSY | 6106 1.68 ad (memwrite ? UFP_NORDONLY : 0)); 6107 1.69 ad KASSERT(npages == *ap->a_count); 6108 1.68 ad if (memwrite) { 6109 1.68 ad KASSERT(rw_write_held(uobj->vmobjlock)); 6110 1.68 ad for (int i = 0; i < npages; i++) { 6111 1.68 ad pg = ap->a_m[i]; 6112 1.68 ad if (pg == NULL || pg == PGO_DONTCARE) { 6113 1.68 ad continue; 6114 1.68 ad } 6115 1.68 ad if (uvm_pagegetdirty(pg) == 6116 1.68 ad UVM_PAGE_STATUS_CLEAN) { 6117 1.68 ad uvm_pagemarkdirty(pg, 6118 1.68 ad UVM_PAGE_STATUS_UNKNOWN); 6119 1.68 ad } 6120 1.68 ad } 6121 1.68 ad } 6122 1.68 ad return ap->a_m[ap->a_centeridx] == NULL ? EBUSY : 0; 6123 1.27 chs } 6124 1.62 ad rw_exit(rw); 6125 1.27 chs 6126 1.27 chs if (async) { 6127 1.27 chs return 0; 6128 1.27 chs } 6129 1.27 chs 6130 1.50 hannken mp = vp->v_mount; 6131 1.50 hannken fstrans_start(mp); 6132 1.50 hannken if (vp->v_mount != mp) { 6133 1.50 hannken fstrans_done(mp); 6134 1.50 hannken return ENOENT; 6135 1.50 hannken } 6136 1.27 chs ZFS_ENTER(zfsvfs); 6137 1.27 chs ZFS_VERIFY_ZP(zp); 6138 1.27 chs 6139 1.62 ad rw_enter(rw, RW_WRITER); 6140 1.69 ad if (ap->a_offset + (npages << PAGE_SHIFT) > round_page(vp->v_size)) { 6141 1.62 ad rw_exit(rw); 6142 1.50 hannken ZFS_EXIT(zfsvfs); 6143 1.50 hannken fstrans_done(mp); 6144 1.50 hannken return EINVAL; 6145 1.50 hannken } 6146 1.69 ad uvn_findpages(uobj, ap->a_offset, &npages, ap->a_m, NULL, UFP_ALL); 6147 1.69 ad KASSERT(npages == *ap->a_count); 6148 1.27 chs 6149 1.68 ad for (int i = 0; i < npages; i++) { 6150 1.68 ad pg = ap->a_m[i]; 6151 1.68 ad if (pg->flags & PG_FAKE) { 6152 1.69 ad voff_t offset = pg->offset; 6153 1.69 ad KASSERT(pg->offset == ap->a_offset + (i << PAGE_SHIFT)); 6154 1.68 ad rw_exit(rw); 6155 1.27 chs 6156 1.68 ad va = zfs_map_page(pg, S_WRITE); 6157 1.68 ad err = dmu_read(zfsvfs->z_os, zp->z_id, offset, 6158 1.68 ad PAGE_SIZE, va, DMU_READ_PREFETCH); 6159 1.68 ad zfs_unmap_page(pg, va); 6160 1.27 chs 6161 1.68 ad if (err != 0) { 6162 1.71 chs uvm_aio_aiodone_pages(ap->a_m, npages, false, err); 6163 1.68 ad memset(ap->a_m, 0, sizeof(ap->a_m[0]) * 6164 1.68 ad npages); 6165 1.68 ad break; 6166 1.68 ad } 6167 1.71 chs rw_enter(rw, RW_WRITER); 6168 1.68 ad pg->flags &= ~(PG_FAKE); 6169 1.68 ad } 6170 1.27 chs 6171 1.68 ad if (memwrite && uvm_pagegetdirty(pg) == UVM_PAGE_STATUS_CLEAN) { 6172 1.68 ad /* For write faults, start dirtiness tracking. */ 6173 1.68 ad uvm_pagemarkdirty(pg, UVM_PAGE_STATUS_UNKNOWN); 6174 1.68 ad } 6175 1.27 chs } 6176 1.62 ad rw_exit(rw); 6177 1.27 chs 6178 1.27 chs ZFS_EXIT(zfsvfs); 6179 1.50 hannken fstrans_done(mp); 6180 1.27 chs 6181 1.27 chs return (err); 6182 1.27 chs } 6183 1.27 chs 6184 1.27 chs static int 6185 1.27 chs zfs_putapage(vnode_t *vp, page_t **pp, int count, int flags) 6186 1.27 chs { 6187 1.27 chs znode_t *zp = VTOZ(vp); 6188 1.27 chs zfsvfs_t *zfsvfs = zp->z_zfsvfs; 6189 1.27 chs dmu_tx_t *tx; 6190 1.27 chs voff_t off, koff; 6191 1.27 chs voff_t len, klen; 6192 1.27 chs int err; 6193 1.27 chs 6194 1.27 chs bool *cleanedp; 6195 1.27 chs struct uvm_object *uobj = &vp->v_uobj; 6196 1.62 ad krwlock_t *rw = uobj->vmobjlock; 6197 1.27 chs 6198 1.42 hannken if (zp->z_sa_hdl == NULL) { 6199 1.42 hannken err = 0; 6200 1.71 chs goto out; 6201 1.42 hannken } 6202 1.42 hannken 6203 1.66 chs /* 6204 1.95 yamt * writing to zfs needs memory allocation, locks, etc, 6205 1.95 yamt * which are not safe for the page daemon. 6206 1.95 yamt * ENOMEM to signal a transient error to uvm. 6207 1.95 yamt * hopefully it can find other pages to free. 6208 1.95 yamt */ 6209 1.95 yamt 6210 1.100 kre if (uvm_lwp_is_pagedaemon(curlwp)) { 6211 1.95 yamt err = SET_ERROR(ENOMEM); 6212 1.95 yamt goto out; 6213 1.95 yamt } 6214 1.95 yamt 6215 1.95 yamt /* 6216 1.66 chs * Calculate the length and assert that no whole pages are past EOF. 6217 1.66 chs * This check is equivalent to "off + len <= round_page(zp->z_size)", 6218 1.66 chs * with gyrations to avoid signed integer overflow. 6219 1.66 chs */ 6220 1.66 chs 6221 1.27 chs off = pp[0]->offset; 6222 1.27 chs len = count * PAGESIZE; 6223 1.66 chs KASSERT(off <= zp->z_size); 6224 1.66 chs KASSERT(len <= round_page(zp->z_size)); 6225 1.66 chs KASSERT(off <= round_page(zp->z_size) - len); 6226 1.66 chs 6227 1.66 chs /* 6228 1.66 chs * If EOF is within the last page, reduce len to avoid writing past 6229 1.66 chs * the file size in the ZFS buffer. Assert that 6230 1.66 chs * "off + len <= zp->z_size", again avoiding signed integer overflow. 6231 1.66 chs */ 6232 1.66 chs 6233 1.66 chs if (len > zp->z_size - off) { 6234 1.66 chs len = zp->z_size - off; 6235 1.66 chs } 6236 1.66 chs KASSERT(len <= zp->z_size); 6237 1.66 chs KASSERT(off <= zp->z_size - len); 6238 1.27 chs 6239 1.27 chs if (zfs_owner_overquota(zfsvfs, zp, B_FALSE) || 6240 1.27 chs zfs_owner_overquota(zfsvfs, zp, B_TRUE)) { 6241 1.27 chs err = SET_ERROR(EDQUOT); 6242 1.27 chs goto out; 6243 1.27 chs } 6244 1.27 chs tx = dmu_tx_create(zfsvfs->z_os); 6245 1.27 chs dmu_tx_hold_write(tx, zp->z_id, off, len); 6246 1.27 chs 6247 1.27 chs dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 6248 1.27 chs zfs_sa_upgrade_txholds(tx, zp); 6249 1.27 chs err = dmu_tx_assign(tx, TXG_WAIT); 6250 1.27 chs if (err != 0) { 6251 1.27 chs dmu_tx_abort(tx); 6252 1.27 chs goto out; 6253 1.27 chs } 6254 1.27 chs 6255 1.27 chs if (zp->z_blksz <= PAGESIZE) { 6256 1.27 chs KASSERTMSG(count == 1, "vp %p pp %p count %d", vp, pp, count); 6257 1.27 chs caddr_t va = zfs_map_page(*pp, S_READ); 6258 1.27 chs ASSERT3U(len, <=, PAGESIZE); 6259 1.27 chs dmu_write(zfsvfs->z_os, zp->z_id, off, len, va, tx); 6260 1.27 chs zfs_unmap_page(*pp, va); 6261 1.27 chs } else { 6262 1.27 chs err = dmu_write_pages(zfsvfs->z_os, zp->z_id, off, len, pp, tx); 6263 1.27 chs } 6264 1.27 chs cleanedp = tsd_get(zfs_putpage_key); 6265 1.27 chs *cleanedp = true; 6266 1.27 chs 6267 1.27 chs if (err == 0) { 6268 1.27 chs uint64_t mtime[2], ctime[2]; 6269 1.27 chs sa_bulk_attr_t bulk[3]; 6270 1.27 chs int count = 0; 6271 1.27 chs 6272 1.27 chs SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, 6273 1.27 chs &mtime, 16); 6274 1.27 chs SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, 6275 1.27 chs &ctime, 16); 6276 1.27 chs SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL, 6277 1.27 chs &zp->z_pflags, 8); 6278 1.27 chs zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime, 6279 1.27 chs B_TRUE); 6280 1.27 chs err = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx); 6281 1.27 chs ASSERT0(err); 6282 1.99 yamt zfs_log_write(zfsvfs->z_log, tx, TX_WRITE, zp, off, len, 6283 1.99 yamt B_FALSE); 6284 1.27 chs } 6285 1.27 chs dmu_tx_commit(tx); 6286 1.27 chs 6287 1.27 chs out: 6288 1.71 chs uvm_aio_aiodone_pages(pp, count, true, err); 6289 1.27 chs return (err); 6290 1.27 chs } 6291 1.27 chs 6292 1.27 chs static void 6293 1.96 yamt zfs_netbsd_update_mctime(vnode_t *vp) 6294 1.27 chs { 6295 1.27 chs znode_t *zp = VTOZ(vp); 6296 1.27 chs zfsvfs_t *zfsvfs = zp->z_zfsvfs; 6297 1.27 chs dmu_tx_t *tx; 6298 1.27 chs sa_bulk_attr_t bulk[2]; 6299 1.27 chs uint64_t mtime[2], ctime[2]; 6300 1.27 chs int count = 0, err; 6301 1.2 haad 6302 1.27 chs tx = dmu_tx_create(zfsvfs->z_os); 6303 1.89 yamt dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 6304 1.89 yamt zfs_sa_upgrade_txholds(tx, zp); 6305 1.27 chs err = dmu_tx_assign(tx, TXG_WAIT); 6306 1.27 chs if (err != 0) { 6307 1.27 chs dmu_tx_abort(tx); 6308 1.27 chs return; 6309 1.27 chs } 6310 1.27 chs SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, &mtime, 16); 6311 1.27 chs SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, &ctime, 16); 6312 1.27 chs zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime, B_TRUE); 6313 1.89 yamt err = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx); 6314 1.27 chs dmu_tx_commit(tx); 6315 1.89 yamt if (err != 0) { 6316 1.89 yamt printf("%s: sa_bulk_update failed with %d\n", __func__, err); 6317 1.89 yamt } 6318 1.1 haad } 6319 1.1 haad 6320 1.27 chs static int 6321 1.2 haad zfs_netbsd_putpages(void *v) 6322 1.1 haad { 6323 1.27 chs struct vop_putpages_args /* { 6324 1.27 chs struct vnode *a_vp; 6325 1.27 chs voff_t a_offlo; 6326 1.27 chs voff_t a_offhi; 6327 1.27 chs int a_flags; 6328 1.27 chs } */ * const ap = v; 6329 1.27 chs 6330 1.27 chs struct vnode *vp = ap->a_vp; 6331 1.27 chs voff_t offlo = ap->a_offlo; 6332 1.27 chs voff_t offhi = ap->a_offhi; 6333 1.27 chs int flags = ap->a_flags; 6334 1.27 chs 6335 1.2 haad znode_t *zp = VTOZ(vp); 6336 1.27 chs zfsvfs_t *zfsvfs = zp->z_zfsvfs; 6337 1.27 chs rl_t *rl = NULL; 6338 1.38 hannken uint64_t len; 6339 1.2 haad int error; 6340 1.27 chs bool cleaned = false; 6341 1.27 chs bool cleaning = (flags & PGO_CLEANIT) != 0; 6342 1.27 chs 6343 1.27 chs if (cleaning) { 6344 1.100 kre bool pagedaemon = uvm_lwp_is_pagedaemon(curlwp); 6345 1.82 yamt 6346 1.38 hannken ASSERT((offlo & PAGE_MASK) == 0 && (offhi & PAGE_MASK) == 0); 6347 1.38 hannken ASSERT(offlo < offhi || offhi == 0); 6348 1.38 hannken if (offhi == 0) 6349 1.38 hannken len = UINT64_MAX; 6350 1.38 hannken else 6351 1.38 hannken len = offhi - offlo; 6352 1.62 ad rw_exit(vp->v_uobj.vmobjlock); 6353 1.82 yamt if (pagedaemon) { 6354 1.41 hannken error = fstrans_start_nowait(vp->v_mount); 6355 1.41 hannken if (error) 6356 1.41 hannken return error; 6357 1.41 hannken } else { 6358 1.41 hannken vfs_t *mp = vp->v_mount; 6359 1.41 hannken fstrans_start(mp); 6360 1.41 hannken if (vp->v_mount != mp) { 6361 1.41 hannken fstrans_done(mp); 6362 1.41 hannken ASSERT(!vn_has_cached_data(vp)); 6363 1.41 hannken return 0; 6364 1.41 hannken } 6365 1.41 hannken } 6366 1.42 hannken /* 6367 1.42 hannken * Cannot use ZFS_ENTER() here as it returns with error 6368 1.42 hannken * if z_unmounted. The next statement is equivalent. 6369 1.42 hannken */ 6370 1.42 hannken rrm_enter(&zfsvfs->z_teardown_lock, RW_READER, FTAG); 6371 1.42 hannken 6372 1.82 yamt if (pagedaemon) { 6373 1.82 yamt rl = zfs_range_lock_try(zp, offlo, len, RL_WRITER); 6374 1.82 yamt if (rl == NULL) { 6375 1.82 yamt error = EBUSY; 6376 1.82 yamt goto fail; 6377 1.82 yamt } 6378 1.82 yamt } else { 6379 1.82 yamt rl = zfs_range_lock(zp, offlo, len, RL_WRITER); 6380 1.82 yamt } 6381 1.62 ad rw_enter(vp->v_uobj.vmobjlock, RW_WRITER); 6382 1.27 chs tsd_set(zfs_putpage_key, &cleaned); 6383 1.27 chs } 6384 1.2 haad error = genfs_putpages(v); 6385 1.41 hannken if (cleaning) { 6386 1.27 chs tsd_set(zfs_putpage_key, NULL); 6387 1.27 chs zfs_range_unlock(rl); 6388 1.27 chs 6389 1.41 hannken /* 6390 1.41 hannken * Only zil_commit() if we cleaned something. This avoids 6391 1.41 hannken * deadlock if we're called from zfs_netbsd_setsize(). 6392 1.92 yamt * 6393 1.92 yamt * Also, it isn't safe or nessesary to call it for vnode 6394 1.92 yamt * reclaim. See the comment in zfs_netbsd_fsync. 6395 1.41 hannken */ 6396 1.27 chs 6397 1.92 yamt if (cleaned && (flags & PGO_RECLAIM) == 0) { 6398 1.92 yamt if ((flags & PGO_SYNCIO) != 0 6399 1.92 yamt || zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 6400 1.92 yamt zil_commit(zfsvfs->z_log, zp->z_id); 6401 1.92 yamt } 6402 1.82 yamt fail: 6403 1.42 hannken ZFS_EXIT(zfsvfs); 6404 1.41 hannken fstrans_done(vp->v_mount); 6405 1.41 hannken } 6406 1.2 haad return error; 6407 1.2 haad } 6408 1.2 haad 6409 1.27 chs /* 6410 1.27 chs * Restrict the putpages range to the ZFS block containing the offset. 6411 1.27 chs */ 6412 1.27 chs static void 6413 1.27 chs zfs_netbsd_gop_putrange(struct vnode *vp, off_t off, off_t *lop, off_t *hip) 6414 1.27 chs { 6415 1.27 chs znode_t *zp = VTOZ(vp); 6416 1.27 chs 6417 1.27 chs *lop = trunc_page(rounddown2(off, zp->z_blksz)); 6418 1.27 chs *hip = round_page(*lop + zp->z_blksz); 6419 1.27 chs } 6420 1.27 chs 6421 1.27 chs void 6422 1.27 chs zfs_netbsd_setsize(vnode_t *vp, off_t size) 6423 1.27 chs { 6424 1.27 chs struct uvm_object *uobj = &vp->v_uobj; 6425 1.62 ad krwlock_t *rw = uobj->vmobjlock; 6426 1.27 chs page_t *pg; 6427 1.27 chs int count, pgoff; 6428 1.27 chs caddr_t va; 6429 1.27 chs off_t tsize; 6430 1.27 chs 6431 1.27 chs uvm_vnp_setsize(vp, size); 6432 1.27 chs if (!vn_has_cached_data(vp)) 6433 1.27 chs return; 6434 1.27 chs 6435 1.27 chs tsize = trunc_page(size); 6436 1.27 chs if (tsize == size) 6437 1.27 chs return; 6438 1.27 chs 6439 1.27 chs /* 6440 1.27 chs * If there's a partial page, we need to zero the tail. 6441 1.27 chs */ 6442 1.27 chs 6443 1.62 ad rw_enter(rw, RW_WRITER); 6444 1.27 chs count = 1; 6445 1.27 chs pg = NULL; 6446 1.55 ad if (uvn_findpages(uobj, tsize, &count, &pg, NULL, UFP_NOALLOC)) { 6447 1.27 chs va = zfs_map_page(pg, S_WRITE); 6448 1.27 chs pgoff = size - tsize; 6449 1.27 chs memset(va + pgoff, 0, PAGESIZE - pgoff); 6450 1.27 chs zfs_unmap_page(pg, va); 6451 1.27 chs uvm_page_unbusy(&pg, 1); 6452 1.27 chs } 6453 1.27 chs 6454 1.62 ad rw_exit(rw); 6455 1.27 chs } 6456 1.27 chs 6457 1.27 chs static int 6458 1.27 chs zfs_netbsd_print(void *v) 6459 1.27 chs { 6460 1.27 chs struct vop_print_args /* { 6461 1.27 chs struct vnode *a_vp; 6462 1.27 chs } */ *ap = v; 6463 1.27 chs vnode_t *vp; 6464 1.27 chs znode_t *zp; 6465 1.27 chs 6466 1.27 chs vp = ap->a_vp; 6467 1.27 chs zp = VTOZ(vp); 6468 1.27 chs 6469 1.27 chs printf("\tino %" PRIu64 " size %" PRIu64 "\n", 6470 1.27 chs zp->z_id, zp->z_size); 6471 1.27 chs return 0; 6472 1.27 chs } 6473 1.27 chs 6474 1.27 chs const struct genfs_ops zfs_genfsops = { 6475 1.88 yamt .gop_write = zfs_putapage, 6476 1.27 chs .gop_putrange = zfs_netbsd_gop_putrange, 6477 1.27 chs }; 6478 1.27 chs 6479 1.2 haad int (**zfs_vnodeop_p)(void *); 6480 1.2 haad const struct vnodeopv_entry_desc zfs_vnodeop_entries[] = { 6481 1.2 haad { &vop_default_desc, vn_default_error }, 6482 1.72 dholland { &vop_parsepath_desc, genfs_parsepath }, 6483 1.2 haad { &vop_lookup_desc, zfs_netbsd_lookup }, 6484 1.2 haad { &vop_create_desc, zfs_netbsd_create }, 6485 1.48 hannken { &vop_mknod_desc, zfs_netbsd_mknod }, 6486 1.2 haad { &vop_open_desc, zfs_netbsd_open }, 6487 1.2 haad { &vop_close_desc, zfs_netbsd_close }, 6488 1.2 haad { &vop_access_desc, zfs_netbsd_access }, 6489 1.67 christos { &vop_accessx_desc, genfs_accessx }, 6490 1.2 haad { &vop_getattr_desc, zfs_netbsd_getattr }, 6491 1.2 haad { &vop_setattr_desc, zfs_netbsd_setattr }, 6492 1.2 haad { &vop_read_desc, zfs_netbsd_read }, 6493 1.2 haad { &vop_write_desc, zfs_netbsd_write }, 6494 1.2 haad { &vop_ioctl_desc, zfs_netbsd_ioctl }, 6495 1.51 hannken { &vop_poll_desc, genfs_poll }, 6496 1.52 hannken { &vop_kqfilter_desc, genfs_kqfilter }, 6497 1.58 riastrad { &vop_revoke_desc, genfs_revoke }, 6498 1.2 haad { &vop_fsync_desc, zfs_netbsd_fsync }, 6499 1.2 haad { &vop_remove_desc, zfs_netbsd_remove }, 6500 1.2 haad { &vop_link_desc, zfs_netbsd_link }, 6501 1.74 dholland { &vop_lock_desc, genfs_lock }, 6502 1.74 dholland { &vop_unlock_desc, genfs_unlock }, 6503 1.2 haad { &vop_rename_desc, zfs_netbsd_rename }, 6504 1.2 haad { &vop_mkdir_desc, zfs_netbsd_mkdir }, 6505 1.2 haad { &vop_rmdir_desc, zfs_netbsd_rmdir }, 6506 1.2 haad { &vop_symlink_desc, zfs_netbsd_symlink }, 6507 1.2 haad { &vop_readdir_desc, zfs_netbsd_readdir }, 6508 1.2 haad { &vop_readlink_desc, zfs_netbsd_readlink }, 6509 1.2 haad { &vop_inactive_desc, zfs_netbsd_inactive }, 6510 1.2 haad { &vop_reclaim_desc, zfs_netbsd_reclaim }, 6511 1.2 haad { &vop_pathconf_desc, zfs_netbsd_pathconf }, 6512 1.74 dholland { &vop_seek_desc, genfs_seek }, 6513 1.2 haad { &vop_getpages_desc, zfs_netbsd_getpages }, 6514 1.2 haad { &vop_putpages_desc, zfs_netbsd_putpages }, 6515 1.74 dholland { &vop_mmap_desc, genfs_mmap }, 6516 1.74 dholland { &vop_islocked_desc, genfs_islocked }, 6517 1.20 riastrad { &vop_advlock_desc, zfs_netbsd_advlock }, 6518 1.27 chs { &vop_print_desc, zfs_netbsd_print }, 6519 1.74 dholland { &vop_fcntl_desc, genfs_fcntl }, 6520 1.2 haad { NULL, NULL } 6521 1.1 haad }; 6522 1.1 haad 6523 1.2 haad const struct vnodeopv_desc zfs_vnodeop_opv_desc = 6524 1.2 haad { &zfs_vnodeop_p, zfs_vnodeop_entries }; 6525 1.27 chs 6526 1.48 hannken int (**zfs_specop_p)(void *); 6527 1.48 hannken const struct vnodeopv_entry_desc zfs_specop_entries[] = { 6528 1.48 hannken { &vop_default_desc, vn_default_error }, 6529 1.73 dholland GENFS_SPECOP_ENTRIES, 6530 1.48 hannken { &vop_close_desc, spec_close }, 6531 1.48 hannken { &vop_access_desc, zfs_netbsd_access }, 6532 1.67 christos { &vop_accessx_desc, genfs_accessx }, 6533 1.48 hannken { &vop_getattr_desc, zfs_netbsd_getattr }, 6534 1.48 hannken { &vop_setattr_desc, zfs_netbsd_setattr }, 6535 1.90 yamt { &vop_read_desc, zfs_netbsd_read }, 6536 1.90 yamt { &vop_write_desc, zfs_netbsd_write }, 6537 1.61 riastrad { &vop_fsync_desc, zfs_spec_fsync }, 6538 1.74 dholland { &vop_lock_desc, genfs_lock }, 6539 1.74 dholland { &vop_unlock_desc, genfs_unlock }, 6540 1.48 hannken { &vop_inactive_desc, zfs_netbsd_inactive }, 6541 1.48 hannken { &vop_reclaim_desc, zfs_netbsd_reclaim }, 6542 1.74 dholland { &vop_islocked_desc, genfs_islocked }, 6543 1.74 dholland { &vop_bwrite_desc, vn_bwrite }, 6544 1.48 hannken { &vop_print_desc, zfs_netbsd_print }, 6545 1.74 dholland { &vop_fcntl_desc, genfs_fcntl }, 6546 1.48 hannken { NULL, NULL } 6547 1.48 hannken }; 6548 1.48 hannken 6549 1.48 hannken const struct vnodeopv_desc zfs_specop_opv_desc = 6550 1.48 hannken { &zfs_specop_p, zfs_specop_entries }; 6551 1.48 hannken 6552 1.48 hannken int (**zfs_fifoop_p)(void *); 6553 1.48 hannken const struct vnodeopv_entry_desc zfs_fifoop_entries[] = { 6554 1.48 hannken { &vop_default_desc, vn_default_error }, 6555 1.73 dholland GENFS_FIFOOP_ENTRIES, 6556 1.48 hannken { &vop_close_desc, vn_fifo_bypass }, 6557 1.48 hannken { &vop_access_desc, zfs_netbsd_access }, 6558 1.67 christos { &vop_accessx_desc, genfs_accessx }, 6559 1.48 hannken { &vop_getattr_desc, zfs_netbsd_getattr }, 6560 1.48 hannken { &vop_setattr_desc, zfs_netbsd_setattr }, 6561 1.90 yamt { &vop_read_desc, zfs_netbsd_read }, 6562 1.90 yamt { &vop_write_desc, zfs_netbsd_write }, 6563 1.48 hannken { &vop_fsync_desc, zfs_netbsd_fsync }, 6564 1.74 dholland { &vop_lock_desc, genfs_lock }, 6565 1.74 dholland { &vop_unlock_desc, genfs_unlock }, 6566 1.48 hannken { &vop_inactive_desc, zfs_netbsd_inactive }, 6567 1.48 hannken { &vop_reclaim_desc, zfs_netbsd_reclaim }, 6568 1.74 dholland { &vop_islocked_desc, genfs_islocked }, 6569 1.73 dholland { &vop_bwrite_desc, vn_bwrite }, 6570 1.73 dholland { &vop_strategy_desc, vn_fifo_bypass }, 6571 1.48 hannken { &vop_print_desc, zfs_netbsd_print }, 6572 1.74 dholland { &vop_fcntl_desc, genfs_fcntl }, 6573 1.48 hannken { NULL, NULL } 6574 1.48 hannken }; 6575 1.48 hannken 6576 1.48 hannken const struct vnodeopv_desc zfs_fifoop_opv_desc = 6577 1.48 hannken { &zfs_fifoop_p, zfs_fifoop_entries }; 6578 1.48 hannken 6579 1.27 chs #endif /* __NetBSD__ */ 6580