Home | History | Annotate | Line # | Download | only in common
linux_file.c revision 1.125
      1 /*	$NetBSD: linux_file.c,v 1.125 2024/09/28 19:35:56 christos Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 1995, 1998, 2008 The NetBSD Foundation, Inc.
      5  * All rights reserved.
      6  *
      7  * This code is derived from software contributed to The NetBSD Foundation
      8  * by Frank van der Linden and Eric Haszlakiewicz.
      9  *
     10  * Redistribution and use in source and binary forms, with or without
     11  * modification, are permitted provided that the following conditions
     12  * are met:
     13  * 1. Redistributions of source code must retain the above copyright
     14  *    notice, this list of conditions and the following disclaimer.
     15  * 2. Redistributions in binary form must reproduce the above copyright
     16  *    notice, this list of conditions and the following disclaimer in the
     17  *    documentation and/or other materials provided with the distribution.
     18  *
     19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     29  * POSSIBILITY OF SUCH DAMAGE.
     30  */
     31 
     32 /*
     33  * Functions in multiarch:
     34  *	linux_sys_llseek	: linux_llseek.c
     35  */
     36 
     37 #include <sys/cdefs.h>
     38 __KERNEL_RCSID(0, "$NetBSD: linux_file.c,v 1.125 2024/09/28 19:35:56 christos Exp $");
     39 
     40 #include <sys/types.h>
     41 #include <sys/param.h>
     42 #include <sys/systm.h>
     43 #include <sys/namei.h>
     44 #include <sys/proc.h>
     45 #include <sys/file.h>
     46 #include <sys/fcntl.h>
     47 #include <sys/stat.h>
     48 #include <sys/vfs_syscalls.h>
     49 #include <sys/filedesc.h>
     50 #include <sys/ioctl.h>
     51 #include <sys/kernel.h>
     52 #include <sys/mount.h>
     53 #include <sys/namei.h>
     54 #include <sys/vnode.h>
     55 #include <sys/tty.h>
     56 #include <sys/socketvar.h>
     57 #include <sys/conf.h>
     58 #include <sys/pipe.h>
     59 #include <sys/fstrans.h>
     60 #include <sys/syscallargs.h>
     61 #include <sys/vfs_syscalls.h>
     62 
     63 #include <compat/linux/common/linux_types.h>
     64 #include <compat/linux/common/linux_signal.h>
     65 #include <compat/linux/common/linux_fcntl.h>
     66 #include <compat/linux/common/linux_util.h>
     67 #include <compat/linux/common/linux_machdep.h>
     68 #include <compat/linux/common/linux_ipc.h>
     69 #include <compat/linux/common/linux_sem.h>
     70 
     71 #include <compat/linux/linux_syscallargs.h>
     72 
     73 #ifdef DEBUG_LINUX
     74 #define DPRINTF(a, ...)	uprintf(a, __VA_ARGS__)
     75 #else
     76 #define DPRINTF(a, ...)
     77 #endif
     78 
     79 #define LINUX_COPY_FILE_RANGE_MAX_CHUNK 8192
     80 
     81 static int bsd_to_linux_ioflags(int);
     82 #if !defined(__aarch64__) && !defined(__amd64__)
     83 static void bsd_to_linux_stat(struct stat *, struct linux_stat *);
     84 #endif
     85 
     86 conv_linux_flock(linux, flock)
     87 
     88 /*
     89  * Some file-related calls are handled here. The usual flag conversion
     90  * an structure conversion is done, and alternate emul path searching.
     91  */
     92 
     93 /*
     94  * The next two functions convert between the Linux and NetBSD values
     95  * of the flags used in open(2) and fcntl(2).
     96  */
     97 int
     98 linux_to_bsd_ioflags(int lflags)
     99 {
    100 	int res = 0;
    101 
    102 	res |= cvtto_bsd_mask(lflags, LINUX_O_WRONLY, O_WRONLY);
    103 	res |= cvtto_bsd_mask(lflags, LINUX_O_RDONLY, O_RDONLY);
    104 	res |= cvtto_bsd_mask(lflags, LINUX_O_RDWR, O_RDWR);
    105 
    106 	res |= cvtto_bsd_mask(lflags, LINUX_O_CREAT, O_CREAT);
    107 	res |= cvtto_bsd_mask(lflags, LINUX_O_EXCL, O_EXCL);
    108 	res |= cvtto_bsd_mask(lflags, LINUX_O_NOCTTY, O_NOCTTY);
    109 	res |= cvtto_bsd_mask(lflags, LINUX_O_TRUNC, O_TRUNC);
    110 	res |= cvtto_bsd_mask(lflags, LINUX_O_APPEND, O_APPEND);
    111 	res |= cvtto_bsd_mask(lflags, LINUX_O_NONBLOCK, O_NONBLOCK);
    112 	res |= cvtto_bsd_mask(lflags, LINUX_O_NDELAY, O_NDELAY);
    113 	res |= cvtto_bsd_mask(lflags, LINUX_O_SYNC, O_FSYNC);
    114 	res |= cvtto_bsd_mask(lflags, LINUX_FASYNC, O_ASYNC);
    115 	res |= cvtto_bsd_mask(lflags, LINUX_O_DIRECT, O_DIRECT);
    116 	res |= cvtto_bsd_mask(lflags, LINUX_O_DIRECTORY, O_DIRECTORY);
    117 	res |= cvtto_bsd_mask(lflags, LINUX_O_NOFOLLOW, O_NOFOLLOW);
    118 	res |= cvtto_bsd_mask(lflags, LINUX_O_CLOEXEC, O_CLOEXEC);
    119 
    120 	return res;
    121 }
    122 
    123 static int
    124 bsd_to_linux_ioflags(int bflags)
    125 {
    126 	int res = 0;
    127 
    128 	res |= cvtto_linux_mask(bflags, O_WRONLY, LINUX_O_WRONLY);
    129 	res |= cvtto_linux_mask(bflags, O_RDONLY, LINUX_O_RDONLY);
    130 	res |= cvtto_linux_mask(bflags, O_RDWR, LINUX_O_RDWR);
    131 
    132 	res |= cvtto_linux_mask(bflags, O_CREAT, LINUX_O_CREAT);
    133 	res |= cvtto_linux_mask(bflags, O_EXCL, LINUX_O_EXCL);
    134 	res |= cvtto_linux_mask(bflags, O_NOCTTY, LINUX_O_NOCTTY);
    135 	res |= cvtto_linux_mask(bflags, O_TRUNC, LINUX_O_TRUNC);
    136 	res |= cvtto_linux_mask(bflags, O_APPEND, LINUX_O_APPEND);
    137 	res |= cvtto_linux_mask(bflags, O_NONBLOCK, LINUX_O_NONBLOCK);
    138 	res |= cvtto_linux_mask(bflags, O_NDELAY, LINUX_O_NDELAY);
    139 	res |= cvtto_linux_mask(bflags, O_FSYNC, LINUX_O_SYNC);
    140 	res |= cvtto_linux_mask(bflags, O_ASYNC, LINUX_FASYNC);
    141 	res |= cvtto_linux_mask(bflags, O_DIRECT, LINUX_O_DIRECT);
    142 	res |= cvtto_linux_mask(bflags, O_DIRECTORY, LINUX_O_DIRECTORY);
    143 	res |= cvtto_linux_mask(bflags, O_NOFOLLOW, LINUX_O_NOFOLLOW);
    144 	res |= cvtto_linux_mask(bflags, O_CLOEXEC, LINUX_O_CLOEXEC);
    145 
    146 	return res;
    147 }
    148 
    149 static inline off_t
    150 linux_hilo_to_off_t(unsigned long hi, unsigned long lo)
    151 {
    152 #ifdef _LP64
    153 	/*
    154 	 * Linux discards the "hi" portion on LP64 platforms; even though
    155 	 * glibc puts of the upper 32-bits of the offset into the "hi"
    156 	 * argument regardless, the "lo" argument has all the bits in
    157 	 * this case.
    158 	 */
    159 	(void) hi;
    160 	return (off_t)lo;
    161 #else
    162 	return (((off_t)hi) << 32) | lo;
    163 #endif /* _LP64 */
    164 }
    165 
    166 #if !defined(__aarch64__)
    167 /*
    168  * creat(2) is an obsolete function, but it's present as a Linux
    169  * system call, so let's deal with it.
    170  *
    171  * Note: On the Alpha this doesn't really exist in Linux, but it's defined
    172  * in syscalls.master anyway so this doesn't have to be special cased.
    173  *
    174  * Just call open(2) with the TRUNC, CREAT and WRONLY flags.
    175  */
    176 int
    177 linux_sys_creat(struct lwp *l, const struct linux_sys_creat_args *uap, register_t *retval)
    178 {
    179 	/* {
    180 		syscallarg(const char *) path;
    181 		syscallarg(linux_umode_t) mode;
    182 	} */
    183 	struct sys_open_args oa;
    184 
    185 	SCARG(&oa, path) = SCARG(uap, path);
    186 	SCARG(&oa, flags) = O_CREAT | O_TRUNC | O_WRONLY;
    187 	SCARG(&oa, mode) = SCARG(uap, mode);
    188 
    189 	return sys_open(l, &oa, retval);
    190 }
    191 #endif
    192 
    193 static void
    194 linux_open_ctty(struct lwp *l, int flags, int fd)
    195 {
    196 	struct proc *p = l->l_proc;
    197 
    198 	/*
    199 	 * this bit from sunos_misc.c (and svr4_fcntl.c).
    200 	 * If we are a session leader, and we don't have a controlling
    201 	 * terminal yet, and the O_NOCTTY flag is not set, try to make
    202 	 * this the controlling terminal.
    203 	 */
    204         if (!(flags & O_NOCTTY) && SESS_LEADER(p) && !(p->p_lflag & PL_CONTROLT)) {
    205                 file_t *fp;
    206 
    207 		fp = fd_getfile(fd);
    208 
    209                 /* ignore any error, just give it a try */
    210                 if (fp != NULL) {
    211 			if (fp->f_type == DTYPE_VNODE) {
    212 				(fp->f_ops->fo_ioctl) (fp, TIOCSCTTY, NULL);
    213 			}
    214 			fd_putfile(fd);
    215 		}
    216         }
    217 }
    218 
    219 /*
    220  * open(2). Take care of the different flag values, and let the
    221  * NetBSD syscall do the real work. See if this operation
    222  * gives the current process a controlling terminal.
    223  * (XXX is this necessary?)
    224  */
    225 int
    226 linux_sys_open(struct lwp *l, const struct linux_sys_open_args *uap, register_t *retval)
    227 {
    228 	/* {
    229 		syscallarg(const char *) path;
    230 		syscallarg(int) flags;
    231 		syscallarg(linux_umode_t) mode;
    232 	} */
    233 	int error, fl;
    234 	struct sys_open_args boa;
    235 
    236 	fl = linux_to_bsd_ioflags(SCARG(uap, flags));
    237 
    238 	SCARG(&boa, path) = SCARG(uap, path);
    239 	SCARG(&boa, flags) = fl;
    240 	SCARG(&boa, mode) = SCARG(uap, mode);
    241 
    242 	if ((error = sys_open(l, &boa, retval)))
    243 		return (error == EFTYPE) ? ELOOP : error;
    244 
    245 	linux_open_ctty(l, fl, *retval);
    246 	return 0;
    247 }
    248 
    249 int
    250 linux_sys_openat(struct lwp *l, const struct linux_sys_openat_args *uap, register_t *retval)
    251 {
    252 	/* {
    253 		syscallarg(int) fd;
    254 		syscallarg(const char *) path;
    255 		syscallarg(int) flags;
    256 		syscallarg(linux_umode_t) mode;
    257 	} */
    258 	int error, fl;
    259 	struct sys_openat_args boa;
    260 
    261 	fl = linux_to_bsd_ioflags(SCARG(uap, flags));
    262 
    263 	SCARG(&boa, fd) = SCARG(uap, fd);
    264 	SCARG(&boa, path) = SCARG(uap, path);
    265 	SCARG(&boa, oflags) = fl;
    266 	SCARG(&boa, mode) = SCARG(uap, mode);
    267 
    268 	if ((error = sys_openat(l, &boa, retval)))
    269 		return (error == EFTYPE) ? ELOOP : error;
    270 
    271 	linux_open_ctty(l, fl, *retval);
    272 	return 0;
    273 }
    274 
    275 /*
    276  * Most actions in the fcntl() call are straightforward; simply
    277  * pass control to the NetBSD system call. A few commands need
    278  * conversions after the actual system call has done its work,
    279  * because the flag values and lock structure are different.
    280  */
    281 int
    282 linux_sys_fcntl(struct lwp *l, const struct linux_sys_fcntl_args *uap, register_t *retval)
    283 {
    284 	/* {
    285 		syscallarg(int) fd;
    286 		syscallarg(int) cmd;
    287 		syscallarg(void *) arg;
    288 	} */
    289 	struct proc *p = l->l_proc;
    290 	int fd, cmd, error;
    291 	u_long val;
    292 	void *arg;
    293 	struct sys_fcntl_args fca;
    294 	file_t *fp;
    295 	struct vnode *vp;
    296 	struct vattr va;
    297 	long pgid;
    298 	struct pgrp *pgrp;
    299 	struct tty *tp;
    300 
    301 	fd = SCARG(uap, fd);
    302 	cmd = SCARG(uap, cmd);
    303 	arg = SCARG(uap, arg);
    304 
    305 	switch (cmd) {
    306 
    307 	case LINUX_F_DUPFD:
    308 		cmd = F_DUPFD;
    309 		break;
    310 
    311 	case LINUX_F_GETFD:
    312 		cmd = F_GETFD;
    313 		break;
    314 
    315 	case LINUX_F_SETFD:
    316 		cmd = F_SETFD;
    317 		break;
    318 
    319 	case LINUX_F_GETFL:
    320 		SCARG(&fca, fd) = fd;
    321 		SCARG(&fca, cmd) = F_GETFL;
    322 		SCARG(&fca, arg) = arg;
    323 		if ((error = sys_fcntl(l, &fca, retval)))
    324 			return error;
    325 		retval[0] = bsd_to_linux_ioflags(retval[0]);
    326 		return 0;
    327 
    328 	case LINUX_F_SETFL: {
    329 		file_t	*fp1 = NULL;
    330 
    331 		val = linux_to_bsd_ioflags((unsigned long)SCARG(uap, arg));
    332 		/*
    333 		 * Linux seems to have same semantics for sending SIGIO to the
    334 		 * read side of socket, but slightly different semantics
    335 		 * for SIGIO to the write side.  Rather than sending the SIGIO
    336 		 * every time it's possible to write (directly) more data, it
    337 		 * only sends SIGIO if last write(2) failed due to insufficient
    338 		 * memory to hold the data. This is compatible enough
    339 		 * with NetBSD semantics to not do anything about the
    340 		 * difference.
    341 		 *
    342 		 * Linux does NOT send SIGIO for pipes. Deal with socketpair
    343 		 * ones and DTYPE_PIPE ones. For these, we don't set
    344 		 * the underlying flags (we don't pass O_ASYNC flag down
    345 		 * to sys_fcntl()), but set the FASYNC flag for file descriptor,
    346 		 * so that F_GETFL would report the ASYNC i/o is on.
    347 		 */
    348 		if (val & O_ASYNC) {
    349 			if (((fp1 = fd_getfile(fd)) == NULL))
    350 			    return (EBADF);
    351 			if (((fp1->f_type == DTYPE_SOCKET) && fp1->f_data
    352 			      && ((struct socket *)fp1->f_data)->so_state & SS_ISAPIPE)
    353 			    || (fp1->f_type == DTYPE_PIPE))
    354 				val &= ~O_ASYNC;
    355 			else {
    356 				/* not a pipe, do not modify anything */
    357 				fd_putfile(fd);
    358 				fp1 = NULL;
    359 			}
    360 		}
    361 
    362 		SCARG(&fca, fd) = fd;
    363 		SCARG(&fca, cmd) = F_SETFL;
    364 		SCARG(&fca, arg) = (void *) val;
    365 
    366 		error = sys_fcntl(l, &fca, retval);
    367 
    368 		/* Now set the FASYNC flag for pipes */
    369 		if (fp1) {
    370 			if (!error) {
    371 				mutex_enter(&fp1->f_lock);
    372 				fp1->f_flag |= FASYNC;
    373 				mutex_exit(&fp1->f_lock);
    374 			}
    375 			fd_putfile(fd);
    376 		}
    377 
    378 		return (error);
    379 	    }
    380 
    381 	case LINUX_F_GETLK:
    382 		do_linux_getlk(fd, cmd, arg, linux, flock);
    383 
    384 	case LINUX_F_SETLK:
    385 	case LINUX_F_SETLKW:
    386 		do_linux_setlk(fd, cmd, arg, linux, flock, LINUX_F_SETLK);
    387 
    388 	case LINUX_F_SETOWN:
    389 	case LINUX_F_GETOWN:
    390 		/*
    391 		 * We need to route fcntl() for tty descriptors around normal
    392 		 * fcntl(), since NetBSD tty TIOC{G,S}PGRP semantics is too
    393 		 * restrictive for Linux F_{G,S}ETOWN. For non-tty descriptors,
    394 		 * this is not a problem.
    395 		 */
    396 		if ((fp = fd_getfile(fd)) == NULL)
    397 			return EBADF;
    398 
    399 		/* Check it's a character device vnode */
    400 		if (fp->f_type != DTYPE_VNODE
    401 		    || (vp = (struct vnode *)fp->f_data) == NULL
    402 		    || vp->v_type != VCHR) {
    403 			fd_putfile(fd);
    404 
    405 	    not_tty:
    406 			/* Not a tty, proceed with common fcntl() */
    407 			cmd = cmd == LINUX_F_SETOWN ? F_SETOWN : F_GETOWN;
    408 			break;
    409 		}
    410 
    411 		vn_lock(vp, LK_SHARED | LK_RETRY);
    412 		error = VOP_GETATTR(vp, &va, l->l_cred);
    413 		VOP_UNLOCK(vp);
    414 
    415 		fd_putfile(fd);
    416 
    417 		if (error)
    418 			return error;
    419 
    420 		if ((tp = cdev_tty(va.va_rdev)) == NULL)
    421 			goto not_tty;
    422 
    423 		/* set tty pg_id appropriately */
    424 		mutex_enter(&proc_lock);
    425 		if (cmd == LINUX_F_GETOWN) {
    426 			retval[0] = tp->t_pgrp ? tp->t_pgrp->pg_id : NO_PGID;
    427 			mutex_exit(&proc_lock);
    428 			return 0;
    429 		}
    430 		if ((long)arg <= 0) {
    431 			pgid = -(long)arg;
    432 		} else {
    433 			struct proc *p1 = proc_find((long)arg);
    434 			if (p1 == NULL) {
    435 				mutex_exit(&proc_lock);
    436 				return (ESRCH);
    437 			}
    438 			pgid = (long)p1->p_pgrp->pg_id;
    439 		}
    440 		pgrp = pgrp_find(pgid);
    441 		if (pgrp == NULL || pgrp->pg_session != p->p_session) {
    442 			mutex_exit(&proc_lock);
    443 			return EPERM;
    444 		}
    445 		tp->t_pgrp = pgrp;
    446 		mutex_exit(&proc_lock);
    447 		return 0;
    448 
    449 	case LINUX_F_DUPFD_CLOEXEC:
    450 		cmd = F_DUPFD_CLOEXEC;
    451 		break;
    452 
    453 	case LINUX_F_ADD_SEALS:
    454 		cmd = F_ADD_SEALS;
    455 		break;
    456 
    457 	case LINUX_F_GET_SEALS:
    458 		cmd = F_GET_SEALS;
    459 		break;
    460 
    461 	default:
    462 		return EOPNOTSUPP;
    463 	}
    464 
    465 	SCARG(&fca, fd) = fd;
    466 	SCARG(&fca, cmd) = cmd;
    467 	SCARG(&fca, arg) = arg;
    468 
    469 	return sys_fcntl(l, &fca, retval);
    470 }
    471 
    472 #if !defined(__aarch64__) && !defined(__amd64__)
    473 /*
    474  * Convert a NetBSD stat structure to a Linux stat structure.
    475  * Only the order of the fields and the padding in the structure
    476  * is different. linux_fakedev is a machine-dependent function
    477  * which optionally converts device driver major/minor numbers
    478  * (XXX horrible, but what can you do against code that compares
    479  * things against constant major device numbers? sigh)
    480  */
    481 static void
    482 bsd_to_linux_stat(struct stat *bsp, struct linux_stat *lsp)
    483 {
    484 
    485 	memset(lsp, 0, sizeof(*lsp));
    486 	lsp->lst_dev     = linux_fakedev(bsp->st_dev, 0);
    487 	lsp->lst_ino     = bsp->st_ino;
    488 	lsp->lst_mode    = (linux_mode_t)bsp->st_mode;
    489 	if (bsp->st_nlink >= (1 << 15))
    490 		lsp->lst_nlink = (1 << 15) - 1;
    491 	else
    492 		lsp->lst_nlink = (linux_nlink_t)bsp->st_nlink;
    493 	lsp->lst_uid     = bsp->st_uid;
    494 	lsp->lst_gid     = bsp->st_gid;
    495 	lsp->lst_rdev    = linux_fakedev(bsp->st_rdev, 1);
    496 	lsp->lst_size    = bsp->st_size;
    497 	lsp->lst_blksize = bsp->st_blksize;
    498 	lsp->lst_blocks  = bsp->st_blocks;
    499 	lsp->lst_atime   = bsp->st_atime;
    500 	lsp->lst_mtime   = bsp->st_mtime;
    501 	lsp->lst_ctime   = bsp->st_ctime;
    502 #ifdef LINUX_STAT_HAS_NSEC
    503 	lsp->lst_atime_nsec   = bsp->st_atimensec;
    504 	lsp->lst_mtime_nsec   = bsp->st_mtimensec;
    505 	lsp->lst_ctime_nsec   = bsp->st_ctimensec;
    506 #endif
    507 }
    508 
    509 /*
    510  * The stat functions below are plain sailing. stat and lstat are handled
    511  * by one function to avoid code duplication.
    512  */
    513 int
    514 linux_sys_fstat(struct lwp *l, const struct linux_sys_fstat_args *uap, register_t *retval)
    515 {
    516 	/* {
    517 		syscallarg(int) fd;
    518 		syscallarg(linux_stat *) sp;
    519 	} */
    520 	struct linux_stat tmplst;
    521 	struct stat tmpst;
    522 	int error;
    523 
    524 	error = do_sys_fstat(SCARG(uap, fd), &tmpst);
    525 	if (error != 0)
    526 		return error;
    527 	bsd_to_linux_stat(&tmpst, &tmplst);
    528 
    529 	return copyout(&tmplst, SCARG(uap, sp), sizeof tmplst);
    530 }
    531 
    532 static int
    533 linux_stat1(const struct linux_sys_stat_args *uap, register_t *retval, int flags)
    534 {
    535 	struct linux_stat tmplst;
    536 	struct stat tmpst;
    537 	int error;
    538 
    539 	error = do_sys_stat(SCARG(uap, path), flags, &tmpst);
    540 	if (error != 0)
    541 		return error;
    542 
    543 	bsd_to_linux_stat(&tmpst, &tmplst);
    544 
    545 	return copyout(&tmplst, SCARG(uap, sp), sizeof tmplst);
    546 }
    547 
    548 int
    549 linux_sys_stat(struct lwp *l, const struct linux_sys_stat_args *uap, register_t *retval)
    550 {
    551 	/* {
    552 		syscallarg(const char *) path;
    553 		syscallarg(struct linux_stat *) sp;
    554 	} */
    555 
    556 	return linux_stat1(uap, retval, FOLLOW);
    557 }
    558 
    559 /* Note: this is "newlstat" in the Linux sources */
    560 /*	(we don't bother with the old lstat currently) */
    561 int
    562 linux_sys_lstat(struct lwp *l, const struct linux_sys_lstat_args *uap, register_t *retval)
    563 {
    564 	/* {
    565 		syscallarg(const char *) path;
    566 		syscallarg(struct linux_stat *) sp;
    567 	} */
    568 
    569 	return linux_stat1((const void *)uap, retval, NOFOLLOW);
    570 }
    571 #endif /* !__aarch64__ && !__amd64__ */
    572 
    573 /*
    574  * The following syscalls are mostly here because of the alternate path check.
    575  */
    576 
    577 int
    578 linux_sys_linkat(struct lwp *l, const struct linux_sys_linkat_args *uap, register_t *retval)
    579 {
    580 	/* {
    581 		syscallarg(int) fd1;
    582 		syscallarg(const char *) name1;
    583 		syscallarg(int) fd2;
    584 		syscallarg(const char *) name2;
    585 		syscallarg(int) flags;
    586 	} */
    587 	int fd1 = SCARG(uap, fd1);
    588 	const char *name1 = SCARG(uap, name1);
    589 	int fd2 = SCARG(uap, fd2);
    590 	const char *name2 = SCARG(uap, name2);
    591 	int follow;
    592 
    593 	follow = SCARG(uap, flags) & LINUX_AT_SYMLINK_FOLLOW;
    594 
    595 	return do_sys_linkat(l, fd1, name1, fd2, name2, follow, retval);
    596 }
    597 
    598 static int
    599 linux_unlink_dircheck(const char *path)
    600 {
    601 	struct nameidata nd;
    602 	struct pathbuf *pb;
    603 	int error;
    604 
    605 	/*
    606 	 * Linux returns EISDIR if unlink(2) is called on a directory.
    607 	 * We return EPERM in such cases. To emulate correct behaviour,
    608 	 * check if the path points to directory and return EISDIR if this
    609 	 * is the case.
    610 	 *
    611 	 * XXX this should really not copy in the path buffer twice...
    612 	 */
    613 	error = pathbuf_copyin(path, &pb);
    614 	if (error) {
    615 		return error;
    616 	}
    617 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, pb);
    618 	if (namei(&nd) == 0) {
    619 		struct stat sb;
    620 
    621 		if (vn_stat(nd.ni_vp, &sb) == 0
    622 		    && S_ISDIR(sb.st_mode))
    623 			error = EISDIR;
    624 
    625 		vput(nd.ni_vp);
    626 	}
    627 	pathbuf_destroy(pb);
    628 	return error ? error : EPERM;
    629 }
    630 
    631 int
    632 linux_sys_unlink(struct lwp *l, const struct linux_sys_unlink_args *uap, register_t *retval)
    633 {
    634 	/* {
    635 		syscallarg(const char *) path;
    636 	} */
    637 	int error;
    638 
    639 	error = sys_unlink(l, (const void *)uap, retval);
    640 	if (error == EPERM)
    641 		error = linux_unlink_dircheck(SCARG(uap, path));
    642 
    643 	return error;
    644 }
    645 
    646 int
    647 linux_sys_unlinkat(struct lwp *l, const struct linux_sys_unlinkat_args *uap, register_t *retval)
    648 {
    649 	/* {
    650 		syscallarg(int) fd;
    651 		syscallarg(const char *) path;
    652 		syscallarg(int) flag;
    653 	} */
    654 	struct sys_unlinkat_args ua;
    655 	int error;
    656 
    657 	SCARG(&ua, fd) = SCARG(uap, fd);
    658 	SCARG(&ua, path) = SCARG(uap, path);
    659 	SCARG(&ua, flag) = linux_to_bsd_atflags(SCARG(uap, flag));
    660 
    661 	error = sys_unlinkat(l, &ua, retval);
    662 	if (error == EPERM)
    663 		error = linux_unlink_dircheck(SCARG(uap, path));
    664 
    665 	return error;
    666 }
    667 
    668 int
    669 linux_sys_mknod(struct lwp *l, const struct linux_sys_mknod_args *uap, register_t *retval)
    670 {
    671 	/* {
    672 		syscallarg(const char *) path;
    673 		syscallarg(linux_umode_t) mode;
    674 		syscallarg(unsigned) dev;
    675 	} */
    676 	struct linux_sys_mknodat_args ua;
    677 
    678 	SCARG(&ua, fd) = LINUX_AT_FDCWD;
    679 	SCARG(&ua, path) = SCARG(uap, path);
    680 	SCARG(&ua, mode) = SCARG(uap, mode);
    681 	SCARG(&ua, dev) = SCARG(uap, dev);
    682 
    683 	return linux_sys_mknodat(l, &ua, retval);
    684 }
    685 
    686 int
    687 linux_sys_mknodat(struct lwp *l, const struct linux_sys_mknodat_args *uap, register_t *retval)
    688 {
    689 	/* {
    690 		syscallarg(int) fd;
    691 		syscallarg(const char *) path;
    692 		syscallarg(linux_umode_t) mode;
    693 		syscallarg(unsigned) dev;
    694 	} */
    695 
    696 	/*
    697 	 * BSD handles FIFOs separately
    698 	 */
    699 	if (S_ISFIFO(SCARG(uap, mode))) {
    700 		struct sys_mkfifoat_args bma;
    701 
    702 		SCARG(&bma, fd) = SCARG(uap, fd);
    703 		SCARG(&bma, path) = SCARG(uap, path);
    704 		SCARG(&bma, mode) = SCARG(uap, mode);
    705 		return sys_mkfifoat(l, &bma, retval);
    706 	} else {
    707 
    708 		/*
    709 		 * Linux device numbers uses 8 bits for minor and 8 bits
    710 		 * for major. Due to how we map our major and minor,
    711 		 * this just fits into our dev_t. Just mask off the
    712 		 * upper 16bit to remove any random junk.
    713 		 */
    714 
    715 		return do_sys_mknodat(l, SCARG(uap, fd), SCARG(uap, path),
    716 		    SCARG(uap, mode), SCARG(uap, dev) & 0xffff, UIO_USERSPACE);
    717 	}
    718 }
    719 
    720 int
    721 linux_sys_fchmodat(struct lwp *l, const struct linux_sys_fchmodat_args *uap, register_t *retval)
    722 {
    723 	/* {
    724 		syscallarg(int) fd;
    725 		syscallarg(const char *) path;
    726 		syscallarg(linux_umode_t) mode;
    727 	} */
    728 
    729 	return do_sys_chmodat(l, SCARG(uap, fd), SCARG(uap, path),
    730 			      SCARG(uap, mode), AT_SYMLINK_FOLLOW);
    731 }
    732 
    733 int
    734 linux_sys_fchownat(struct lwp *l, const struct linux_sys_fchownat_args *uap, register_t *retval)
    735 {
    736 	/* {
    737 		syscallarg(int) fd;
    738 		syscallarg(const char *) path;
    739 		syscallarg(uid_t) owner;
    740 		syscallarg(gid_t) group;
    741 		syscallarg(int) flag;
    742 	} */
    743 	int flag;
    744 
    745 	flag = linux_to_bsd_atflags(SCARG(uap, flag));
    746 	return do_sys_chownat(l, SCARG(uap, fd), SCARG(uap, path),
    747 			      SCARG(uap, owner), SCARG(uap, group), flag);
    748 }
    749 
    750 int
    751 linux_sys_faccessat(struct lwp *l, const struct linux_sys_faccessat_args *uap, register_t *retval)
    752 {
    753 	/* {
    754 		syscallarg(int) fd;
    755 		syscallarg(const char *) path;
    756 		syscallarg(int) amode;
    757 	} */
    758 
    759 	return do_sys_accessat(l, SCARG(uap, fd), SCARG(uap, path),
    760 	     SCARG(uap, amode), AT_SYMLINK_FOLLOW);
    761 }
    762 
    763 /*
    764  * This is just fsync() for now (just as it is in the Linux kernel)
    765  * Note: this is not implemented under Linux on Alpha and Arm
    766  *	but should still be defined in our syscalls.master.
    767  *	(syscall #148 on the arm)
    768  */
    769 int
    770 linux_sys_fdatasync(struct lwp *l, const struct linux_sys_fdatasync_args *uap, register_t *retval)
    771 {
    772 	/* {
    773 		syscallarg(int) fd;
    774 	} */
    775 
    776 	return sys_fsync(l, (const void *)uap, retval);
    777 }
    778 
    779 /*
    780  * pread(2).
    781  */
    782 int
    783 linux_sys_pread(struct lwp *l, const struct linux_sys_pread_args *uap, register_t *retval)
    784 {
    785 	/* {
    786 		syscallarg(int) fd;
    787 		syscallarg(void *) buf;
    788 		syscallarg(size_t) nbyte;
    789 		syscallarg(off_t) offset;
    790 	} */
    791 	struct sys_pread_args pra;
    792 
    793 	SCARG(&pra, fd) = SCARG(uap, fd);
    794 	SCARG(&pra, buf) = SCARG(uap, buf);
    795 	SCARG(&pra, nbyte) = SCARG(uap, nbyte);
    796 	SCARG(&pra, PAD) = 0;
    797 	SCARG(&pra, offset) = SCARG(uap, offset);
    798 
    799 	return sys_pread(l, &pra, retval);
    800 }
    801 
    802 /*
    803  * pwrite(2).
    804  */
    805 int
    806 linux_sys_pwrite(struct lwp *l, const struct linux_sys_pwrite_args *uap, register_t *retval)
    807 {
    808 	/* {
    809 		syscallarg(int) fd;
    810 		syscallarg(void *) buf;
    811 		syscallarg(size_t) nbyte;
    812 		syscallarg(off_t) offset;
    813 	} */
    814 	struct sys_pwrite_args pra;
    815 
    816 	SCARG(&pra, fd) = SCARG(uap, fd);
    817 	SCARG(&pra, buf) = SCARG(uap, buf);
    818 	SCARG(&pra, nbyte) = SCARG(uap, nbyte);
    819 	SCARG(&pra, PAD) = 0;
    820 	SCARG(&pra, offset) = SCARG(uap, offset);
    821 
    822 	return sys_pwrite(l, &pra, retval);
    823 }
    824 
    825 /*
    826  * preadv(2)
    827  */
    828 int
    829 linux_sys_preadv(struct lwp *l, const struct linux_sys_preadv_args *uap,
    830     register_t *retval)
    831 {
    832 	/* {
    833 		syscallarg(int) fd;
    834 		syscallarg(const struct iovec *) iovp;
    835 		syscallarg(int) iovcnt;
    836 		syscallarg(unsigned long) off_lo;
    837 		syscallarg(unsigned long) off_hi;
    838 	} */
    839 	struct sys_preadv_args ua;
    840 
    841 	SCARG(&ua, fd) = SCARG(uap, fd);
    842 	SCARG(&ua, iovp) = SCARG(uap, iovp);
    843 	SCARG(&ua, iovcnt) = SCARG(uap, iovcnt);
    844 	SCARG(&ua, PAD) = 0;
    845 	SCARG(&ua, offset) = linux_hilo_to_off_t(SCARG(uap, off_hi),
    846 						 SCARG(uap, off_lo));
    847 	return sys_preadv(l, &ua, retval);
    848 }
    849 
    850 /*
    851  * pwritev(2)
    852  */
    853 int
    854 linux_sys_pwritev(struct lwp *l, const struct linux_sys_pwritev_args *uap,
    855     register_t *retval)
    856 {
    857 	/* {
    858 		syscallarg(int) fd;
    859 		syscallarg(const struct iovec *) iovp;
    860 		syscallarg(int) iovcnt;
    861 		syscallarg(unsigned long) off_lo;
    862 		syscallarg(unsigned long) off_hi;
    863 	} */
    864 	struct sys_pwritev_args ua;
    865 
    866 	SCARG(&ua, fd) = SCARG(uap, fd);
    867 	SCARG(&ua, iovp) = (const void *)SCARG(uap, iovp);
    868 	SCARG(&ua, iovcnt) = SCARG(uap, iovcnt);
    869 	SCARG(&ua, PAD) = 0;
    870 	SCARG(&ua, offset) = linux_hilo_to_off_t(SCARG(uap, off_hi),
    871 						 SCARG(uap, off_lo));
    872 	return sys_pwritev(l, &ua, retval);
    873 }
    874 
    875 int
    876 linux_sys_dup3(struct lwp *l, const struct linux_sys_dup3_args *uap,
    877     register_t *retval)
    878 {
    879 	/* {
    880 		syscallarg(int) from;
    881 		syscallarg(int) to;
    882 		syscallarg(int) flags;
    883 	} */
    884 	int flags;
    885 
    886 	flags = linux_to_bsd_ioflags(SCARG(uap, flags));
    887 	if ((flags & ~O_CLOEXEC) != 0)
    888 		return EINVAL;
    889 
    890 	if (SCARG(uap, from) == SCARG(uap, to))
    891 		return EINVAL;
    892 
    893 	return dodup(l, SCARG(uap, from), SCARG(uap, to), flags, retval);
    894 }
    895 
    896 
    897 int
    898 linux_to_bsd_atflags(int lflags)
    899 {
    900 	int bflags = 0;
    901 
    902 	if (lflags & LINUX_AT_SYMLINK_NOFOLLOW)
    903 		bflags |= AT_SYMLINK_NOFOLLOW;
    904 	if (lflags & LINUX_AT_REMOVEDIR)
    905 		bflags |= AT_REMOVEDIR;
    906 	if (lflags & LINUX_AT_SYMLINK_FOLLOW)
    907 		bflags |= AT_SYMLINK_FOLLOW;
    908 
    909 	return bflags;
    910 }
    911 
    912 int
    913 linux_sys_faccessat2(lwp_t *l, const struct linux_sys_faccessat2_args *uap,
    914     register_t *retval)
    915 {
    916 	/* {
    917 		syscallarg(int) fd;
    918 		syscallarg(const char *) path;
    919 		syscallarg(int) amode;
    920 		syscallarg(int) flags;
    921 	}*/
    922 	int flag = linux_to_bsd_atflags(SCARG(uap, flags));
    923 	int mode = SCARG(uap, amode);
    924 	int fd = SCARG(uap, fd);
    925 	const char *path = SCARG(uap, path);
    926 
    927 	return do_sys_accessat(l, fd, path, mode, flag);
    928 }
    929 
    930 
    931 int
    932 linux_sys_sync_file_range(lwp_t *l,
    933     const struct linux_sys_sync_file_range_args *uap, register_t *retval)
    934 {
    935 	/* {
    936 		syscallarg(int) fd;
    937 		syscallarg(off_t) offset;
    938 		syscallarg(off_t) nbytes;
    939 		syscallarg(unsigned int) flags;
    940 	} */
    941 
    942 	struct sys_fsync_range_args ua;
    943 
    944 	if (SCARG(uap, offset) < 0 || SCARG(uap, nbytes) < 0 ||
    945 	    ((SCARG(uap, flags) & ~LINUX_SYNC_FILE_RANGE_ALL) != 0))
    946 		return EINVAL;
    947 
    948 	/* Fill ua with uap */
    949 	SCARG(&ua, fd) = SCARG(uap, fd);
    950 	SCARG(&ua, flags) = SCARG(uap, flags);
    951 
    952 	/* Round down offset to page boundary */
    953 	SCARG(&ua, start) = rounddown(SCARG(uap, offset), PAGE_SIZE);
    954 	SCARG(&ua, length) = SCARG(uap, nbytes);
    955 	if (SCARG(&ua, length) != 0) {
    956 		/* Round up length to nbytes+offset to page boundary */
    957 		SCARG(&ua, length) = roundup(SCARG(uap, nbytes)
    958 		    + SCARG(uap, offset) - SCARG(&ua, start), PAGE_SIZE);
    959 	}
    960 
    961 	return sys_fsync_range(l, &ua, retval);
    962 }
    963 
    964 int
    965 linux_sys_syncfs(lwp_t *l, const struct linux_sys_syncfs_args *uap,
    966     register_t *retval)
    967 {
    968 	/* {
    969 		syscallarg(int) fd;
    970 	} */
    971 
    972 	struct mount *mp;
    973 	struct vnode *vp;
    974 	file_t *fp;
    975 	int error, fd;
    976 	fd = SCARG(uap, fd);
    977 
    978 	/* Get file pointer */
    979 	if ((error = fd_getvnode(fd, &fp)) != 0)
    980 		return error;
    981 
    982 	/* Get vnode and mount point */
    983 	vp = fp->f_vnode;
    984 	mp = vp->v_mount;
    985 
    986 	mutex_enter(mp->mnt_updating);
    987 	if ((mp->mnt_flag & MNT_RDONLY) == 0) {
    988 		int asyncflag = mp->mnt_flag & MNT_ASYNC;
    989 		mp->mnt_flag &= ~MNT_ASYNC;
    990 		VFS_SYNC(mp, MNT_NOWAIT, l->l_cred);
    991 		if (asyncflag)
    992 			mp->mnt_flag |= MNT_ASYNC;
    993 	}
    994 	mutex_exit(mp->mnt_updating);
    995 
    996 	/* Cleanup vnode and file pointer */
    997 	vrele(vp);
    998 	fd_putfile(fd);
    999 	return 0;
   1000 
   1001 }
   1002 
   1003 int
   1004 linux_sys_renameat2(struct lwp *l, const struct linux_sys_renameat2_args *uap,
   1005     register_t *retval)
   1006 {
   1007 	/* {
   1008 		syscallarg(int) fromfd;
   1009 		syscallarg(const char *) from;
   1010 		syscallarg(int) tofd;
   1011 		syscallarg(const char *) to;
   1012 		syscallarg(unsigned int) flags;
   1013 	} */
   1014 
   1015 	struct sys_renameat_args ua;
   1016 	SCARG(&ua, fromfd) = SCARG(uap, fromfd);
   1017 	SCARG(&ua, from) = SCARG(uap, from);
   1018 	SCARG(&ua, tofd) = SCARG(uap, tofd);
   1019 	SCARG(&ua, to) = SCARG(uap, to);
   1020 
   1021 	unsigned int flags = SCARG(uap, flags);
   1022 	int error;
   1023 
   1024 	if (flags != 0) {
   1025 		if (flags & ~LINUX_RENAME_ALL)
   1026 			return EINVAL;
   1027 		if ((flags & LINUX_RENAME_EXCHANGE) != 0 &&
   1028 		    (flags & (LINUX_RENAME_NOREPLACE | LINUX_RENAME_WHITEOUT))
   1029 		    != 0)
   1030 			return EINVAL;
   1031 		/*
   1032 		 * Suppoting renameat2 flags without support from file systems
   1033 		 * becomes a messy affair cause of locks and how VOP_RENAME
   1034 		 * protocol is implemented. So, return EOPNOTSUPP for now.
   1035 		 */
   1036 		return EOPNOTSUPP;
   1037 	}
   1038 
   1039 	error = sys_renameat(l, &ua, retval);
   1040 	return error;
   1041 }
   1042 
   1043 int linux_sys_copy_file_range(lwp_t *l,
   1044     const struct linux_sys_copy_file_range_args *uap, register_t *retval)
   1045 {
   1046 	/* {
   1047 		syscallarg(int) fd_in;
   1048 		syscallarg(unsigned long) off_in;
   1049 		syscallarg(int) fd_out;
   1050 		syscallarg(unsigned long) off_out;
   1051 		syscallarg(size_t) len;
   1052 		syscallarg(unsigned int) flags;
   1053 	} */
   1054 
   1055 	int fd_in, fd_out;
   1056 	file_t *fp_in, *fp_out;
   1057 	struct vnode *invp, *outvp;
   1058 	off_t off_in = 0, off_out = 0;
   1059 	struct vattr vattr_in, vattr_out;
   1060 	ssize_t total_copied = 0;
   1061 	size_t bytes_left, to_copy;
   1062 	bool have_off_in = false, have_off_out = false;
   1063 	int error = 0;
   1064 	size_t len = SCARG(uap, len);
   1065 	unsigned int flags = SCARG(uap, flags);
   1066 	// Structures for actual copy
   1067 	char *buffer = NULL;
   1068 	struct uio auio;
   1069 	struct iovec aiov;
   1070 
   1071 
   1072 	if (len > SSIZE_MAX) {
   1073 		DPRINTF("%s: len is greater than SSIZE_MAX\n",
   1074 		    __func__);
   1075 		return EOVERFLOW;
   1076 	}
   1077 
   1078 	if(flags != 0) {
   1079 		DPRINTF("%s: unsupported flags %#x\n", __func__, flags);
   1080 		return EINVAL;
   1081 	}
   1082 
   1083 	fd_in = SCARG(uap, fd_in);
   1084 	fd_out = SCARG(uap, fd_out);
   1085 	error = fd_getvnode(fd_in, &fp_in);
   1086 	if (error) {
   1087 		return error;
   1088 	}
   1089 
   1090 	error = fd_getvnode(fd_out, &fp_out);
   1091 	if (error) {
   1092 		    fd_putfile(fd_in);
   1093 		    return error;
   1094 	}
   1095 
   1096 	invp = fp_in->f_vnode;
   1097 	outvp = fp_out->f_vnode;
   1098 
   1099 	/* Get attributes of input and output files */
   1100 	VOP_GETATTR(invp, &vattr_in, l->l_cred);
   1101 	VOP_GETATTR(outvp, &vattr_out, l->l_cred);
   1102 
   1103 	/* Check if input and output files are regular files */
   1104 	if (vattr_in.va_type == VDIR || vattr_out.va_type == VDIR) {
   1105 		error = EISDIR;
   1106 		DPRINTF("%s: Input or output is a directory\n", __func__);
   1107 		goto out;
   1108 	}
   1109 	if ((SCARG(uap, off_in) != NULL && *SCARG(uap, off_in) < 0) ||
   1110 	   (SCARG(uap, off_out) != NULL && *SCARG(uap, off_out) < 0) ||
   1111 	   vattr_in.va_type != VREG || vattr_out.va_type != VREG)
   1112         {
   1113 		error = EINVAL;
   1114 		DPRINTF("%s: Invalid offset or file type\n", __func__);
   1115 		goto out;
   1116 	}
   1117 
   1118 	if ((fp_in->f_flag & FREAD) == 0 ||
   1119 	    (fp_out->f_flag & FWRITE) == 0 || (fp_out->f_flag & FAPPEND) != 0) {
   1120 		DPRINTF("%s: input file can't be read or output file "
   1121 		    "can't be written\n", __func__);
   1122 		error = EBADF;
   1123 		goto out;
   1124 	}
   1125 	/* Retrieve and validate offsets if provided */
   1126 	if (SCARG(uap, off_in) != NULL) {
   1127 	    error = copyin(SCARG(uap, off_in), &off_in, sizeof(off_in));
   1128 	    if (error) {
   1129 		    goto out;
   1130 	    }
   1131 	    have_off_in = true;
   1132 	}
   1133 
   1134 	if (SCARG(uap, off_out) != NULL) {
   1135 	    error = copyin(SCARG(uap, off_out), &off_out, sizeof(off_out));
   1136 	    if (error) {
   1137 		    goto out;
   1138 	    }
   1139 	    have_off_out = true;
   1140 	}
   1141 
   1142 	off_t new_size = off_out + len;
   1143 	if (new_size < 0) {
   1144 		DPRINTF("%s: New size is greater than OFF_MAX\n", __func__);
   1145 		error = EFBIG;
   1146 		goto out;
   1147 	}
   1148 
   1149 	/* Identify overlapping ranges */
   1150 	if ((invp == outvp) &&
   1151 	    ((off_in <= off_out && off_in + (off_t)len > off_out) ||
   1152 	    (off_in > off_out && off_out + (off_t)len > off_in))) {
   1153 		DPRINTF("%s: Ranges overlap\n", __func__);
   1154 		error = EINVAL;
   1155 		goto out;
   1156 	}
   1157 
   1158 	buffer = kmem_alloc(LINUX_COPY_FILE_RANGE_MAX_CHUNK, KM_SLEEP);
   1159 	/* Allocation cannot fail, so no need for error handling? */
   1160 	if (buffer == NULL) {
   1161 		error = ENOMEM;
   1162 		goto out;
   1163 	}
   1164 
   1165 	bytes_left = len;
   1166 
   1167 	while (bytes_left > 0) {
   1168 		to_copy = MIN(bytes_left, LINUX_COPY_FILE_RANGE_MAX_CHUNK);
   1169 
   1170 		/* Lock the input vnode for reading */
   1171 		vn_lock(fp_in->f_vnode, LK_SHARED | LK_RETRY);
   1172 		/* Set up iovec and uio for reading */
   1173 		aiov.iov_base = buffer;
   1174 		aiov.iov_len = to_copy;
   1175 		auio.uio_iov = &aiov;
   1176 		auio.uio_iovcnt = 1;
   1177 		auio.uio_offset = have_off_in ? off_in : fp_in->f_offset;
   1178 		auio.uio_resid = to_copy;
   1179 		auio.uio_rw = UIO_READ;
   1180 		auio.uio_vmspace = l->l_proc->p_vmspace;
   1181 		UIO_SETUP_SYSSPACE(&auio);
   1182 
   1183 		/* Perform read using vn_read */
   1184 		error = VOP_READ(fp_in->f_vnode, &auio, 0, l->l_cred);
   1185 		VOP_UNLOCK(fp_in->f_vnode);
   1186 		if (error) {
   1187 			DPRINTF("%s: Read error %d\n", __func__, error);
   1188 			break;
   1189 		}
   1190 
   1191 		size_t read_bytes = to_copy - auio.uio_resid;
   1192 		if (read_bytes == 0) {
   1193 			/* EOF reached */
   1194 			break;
   1195 		}
   1196 
   1197 		/* Lock the output vnode for writing */
   1198 		vn_lock(fp_out->f_vnode, LK_EXCLUSIVE | LK_RETRY);
   1199 		/* Set up iovec and uio for writing */
   1200 		aiov.iov_base = buffer;
   1201 		aiov.iov_len = read_bytes;
   1202 		auio.uio_iov = &aiov;
   1203 		auio.uio_iovcnt = 1;
   1204 		auio.uio_offset = have_off_out ? off_out : fp_out->f_offset;
   1205 		auio.uio_resid = read_bytes;
   1206 		auio.uio_rw = UIO_WRITE;
   1207 		auio.uio_vmspace = l->l_proc->p_vmspace;
   1208 		UIO_SETUP_SYSSPACE(&auio);
   1209 
   1210 		/* Perform the write */
   1211 		error = VOP_WRITE(fp_out->f_vnode, &auio, 0, l->l_cred);
   1212 		VOP_UNLOCK(fp_out->f_vnode);
   1213 		if (error) {
   1214 			DPRINTF("%s: Write error %d\n", __func__, error);
   1215 			break;
   1216 		}
   1217 		size_t written_bytes = read_bytes - auio.uio_resid;
   1218 		total_copied += written_bytes;
   1219 		bytes_left -= written_bytes;
   1220 
   1221 		/* Update offsets if provided */
   1222 		if (have_off_in) {
   1223 			off_in += written_bytes;
   1224 		} else {
   1225 			fp_in->f_offset += written_bytes;
   1226 		}
   1227 		if (have_off_out) {
   1228 			off_out += written_bytes;
   1229 		} else {
   1230 			fp_out->f_offset += written_bytes;
   1231 		}
   1232 	}
   1233 
   1234 	if (have_off_in) {
   1235 		/* Adjust user space offset */
   1236 		error = copyout(&off_in, SCARG(uap, off_in), sizeof(off_t));
   1237 		if (error) {
   1238 			DPRINTF("%s: Error adjusting user space offset\n",
   1239 			    __func__);
   1240 		}
   1241 		goto out;
   1242 	}
   1243 
   1244 	if (have_off_out) {
   1245 		/* Adjust user space offset */
   1246 		error = copyout(&off_out, SCARG(uap, off_out), sizeof(off_t));
   1247 		if (error) {
   1248 			DPRINTF("%s: Error adjusting user space offset\n",
   1249 			    __func__);
   1250 		}
   1251 	}
   1252 
   1253 	*retval = total_copied;
   1254 out:
   1255 	if (buffer) {
   1256 		kmem_free(buffer, LINUX_COPY_FILE_RANGE_MAX_CHUNK);
   1257 	}
   1258 	if (fp_out) {
   1259 		fd_putfile(fd_out);
   1260 	}
   1261 	if (fp_in) {
   1262 		fd_putfile(fd_in);
   1263 	}
   1264 	return error;
   1265 }
   1266 
   1267 
   1268 #define LINUX_NOT_SUPPORTED(fun) \
   1269 int \
   1270 fun(struct lwp *l, const struct fun##_args *uap, register_t *retval) \
   1271 { \
   1272 	return EOPNOTSUPP; \
   1273 }
   1274 
   1275 LINUX_NOT_SUPPORTED(linux_sys_setxattr)
   1276 LINUX_NOT_SUPPORTED(linux_sys_lsetxattr)
   1277 LINUX_NOT_SUPPORTED(linux_sys_fsetxattr)
   1278 
   1279 LINUX_NOT_SUPPORTED(linux_sys_getxattr)
   1280 LINUX_NOT_SUPPORTED(linux_sys_lgetxattr)
   1281 LINUX_NOT_SUPPORTED(linux_sys_fgetxattr)
   1282 
   1283 LINUX_NOT_SUPPORTED(linux_sys_listxattr)
   1284 LINUX_NOT_SUPPORTED(linux_sys_llistxattr)
   1285 LINUX_NOT_SUPPORTED(linux_sys_flistxattr)
   1286 
   1287 LINUX_NOT_SUPPORTED(linux_sys_removexattr)
   1288 LINUX_NOT_SUPPORTED(linux_sys_lremovexattr)
   1289 LINUX_NOT_SUPPORTED(linux_sys_fremovexattr)
   1290 
   1291 /*
   1292  * For now just return EOPNOTSUPP, this makes glibc posix_fallocate()
   1293  * to fallback to emulation.
   1294  * XXX Right now no filesystem actually implements fallocate support,
   1295  * so no need for mapping.
   1296  */
   1297 LINUX_NOT_SUPPORTED(linux_sys_fallocate)
   1298