linux_file.c revision 1.124 1 /* $NetBSD: linux_file.c,v 1.124 2024/06/29 13:46:10 christos Exp $ */
2
3 /*-
4 * Copyright (c) 1995, 1998, 2008 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Frank van der Linden and Eric Haszlakiewicz.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 /*
33 * Functions in multiarch:
34 * linux_sys_llseek : linux_llseek.c
35 */
36
37 #include <sys/cdefs.h>
38 __KERNEL_RCSID(0, "$NetBSD: linux_file.c,v 1.124 2024/06/29 13:46:10 christos Exp $");
39
40 #include <sys/param.h>
41 #include <sys/systm.h>
42 #include <sys/namei.h>
43 #include <sys/proc.h>
44 #include <sys/file.h>
45 #include <sys/fcntl.h>
46 #include <sys/stat.h>
47 #include <sys/filedesc.h>
48 #include <sys/ioctl.h>
49 #include <sys/kernel.h>
50 #include <sys/mount.h>
51 #include <sys/namei.h>
52 #include <sys/vnode.h>
53 #include <sys/tty.h>
54 #include <sys/socketvar.h>
55 #include <sys/conf.h>
56 #include <sys/pipe.h>
57
58 #include <sys/syscallargs.h>
59 #include <sys/vfs_syscalls.h>
60
61 #include <compat/linux/common/linux_types.h>
62 #include <compat/linux/common/linux_signal.h>
63 #include <compat/linux/common/linux_fcntl.h>
64 #include <compat/linux/common/linux_util.h>
65 #include <compat/linux/common/linux_machdep.h>
66 #include <compat/linux/common/linux_ipc.h>
67 #include <compat/linux/common/linux_sem.h>
68
69 #include <compat/linux/linux_syscallargs.h>
70
71 static int bsd_to_linux_ioflags(int);
72 #if !defined(__aarch64__) && !defined(__amd64__)
73 static void bsd_to_linux_stat(struct stat *, struct linux_stat *);
74 #endif
75
76 conv_linux_flock(linux, flock)
77
78 /*
79 * Some file-related calls are handled here. The usual flag conversion
80 * an structure conversion is done, and alternate emul path searching.
81 */
82
83 /*
84 * The next two functions convert between the Linux and NetBSD values
85 * of the flags used in open(2) and fcntl(2).
86 */
87 int
88 linux_to_bsd_ioflags(int lflags)
89 {
90 int res = 0;
91
92 res |= cvtto_bsd_mask(lflags, LINUX_O_WRONLY, O_WRONLY);
93 res |= cvtto_bsd_mask(lflags, LINUX_O_RDONLY, O_RDONLY);
94 res |= cvtto_bsd_mask(lflags, LINUX_O_RDWR, O_RDWR);
95
96 res |= cvtto_bsd_mask(lflags, LINUX_O_CREAT, O_CREAT);
97 res |= cvtto_bsd_mask(lflags, LINUX_O_EXCL, O_EXCL);
98 res |= cvtto_bsd_mask(lflags, LINUX_O_NOCTTY, O_NOCTTY);
99 res |= cvtto_bsd_mask(lflags, LINUX_O_TRUNC, O_TRUNC);
100 res |= cvtto_bsd_mask(lflags, LINUX_O_APPEND, O_APPEND);
101 res |= cvtto_bsd_mask(lflags, LINUX_O_NONBLOCK, O_NONBLOCK);
102 res |= cvtto_bsd_mask(lflags, LINUX_O_NDELAY, O_NDELAY);
103 res |= cvtto_bsd_mask(lflags, LINUX_O_SYNC, O_FSYNC);
104 res |= cvtto_bsd_mask(lflags, LINUX_FASYNC, O_ASYNC);
105 res |= cvtto_bsd_mask(lflags, LINUX_O_DIRECT, O_DIRECT);
106 res |= cvtto_bsd_mask(lflags, LINUX_O_DIRECTORY, O_DIRECTORY);
107 res |= cvtto_bsd_mask(lflags, LINUX_O_NOFOLLOW, O_NOFOLLOW);
108 res |= cvtto_bsd_mask(lflags, LINUX_O_CLOEXEC, O_CLOEXEC);
109
110 return res;
111 }
112
113 static int
114 bsd_to_linux_ioflags(int bflags)
115 {
116 int res = 0;
117
118 res |= cvtto_linux_mask(bflags, O_WRONLY, LINUX_O_WRONLY);
119 res |= cvtto_linux_mask(bflags, O_RDONLY, LINUX_O_RDONLY);
120 res |= cvtto_linux_mask(bflags, O_RDWR, LINUX_O_RDWR);
121
122 res |= cvtto_linux_mask(bflags, O_CREAT, LINUX_O_CREAT);
123 res |= cvtto_linux_mask(bflags, O_EXCL, LINUX_O_EXCL);
124 res |= cvtto_linux_mask(bflags, O_NOCTTY, LINUX_O_NOCTTY);
125 res |= cvtto_linux_mask(bflags, O_TRUNC, LINUX_O_TRUNC);
126 res |= cvtto_linux_mask(bflags, O_APPEND, LINUX_O_APPEND);
127 res |= cvtto_linux_mask(bflags, O_NONBLOCK, LINUX_O_NONBLOCK);
128 res |= cvtto_linux_mask(bflags, O_NDELAY, LINUX_O_NDELAY);
129 res |= cvtto_linux_mask(bflags, O_FSYNC, LINUX_O_SYNC);
130 res |= cvtto_linux_mask(bflags, O_ASYNC, LINUX_FASYNC);
131 res |= cvtto_linux_mask(bflags, O_DIRECT, LINUX_O_DIRECT);
132 res |= cvtto_linux_mask(bflags, O_DIRECTORY, LINUX_O_DIRECTORY);
133 res |= cvtto_linux_mask(bflags, O_NOFOLLOW, LINUX_O_NOFOLLOW);
134 res |= cvtto_linux_mask(bflags, O_CLOEXEC, LINUX_O_CLOEXEC);
135
136 return res;
137 }
138
139 static inline off_t
140 linux_hilo_to_off_t(unsigned long hi, unsigned long lo)
141 {
142 #ifdef _LP64
143 /*
144 * Linux discards the "hi" portion on LP64 platforms; even though
145 * glibc puts of the upper 32-bits of the offset into the "hi"
146 * argument regardless, the "lo" argument has all the bits in
147 * this case.
148 */
149 (void) hi;
150 return (off_t)lo;
151 #else
152 return (((off_t)hi) << 32) | lo;
153 #endif /* _LP64 */
154 }
155
156 #if !defined(__aarch64__)
157 /*
158 * creat(2) is an obsolete function, but it's present as a Linux
159 * system call, so let's deal with it.
160 *
161 * Note: On the Alpha this doesn't really exist in Linux, but it's defined
162 * in syscalls.master anyway so this doesn't have to be special cased.
163 *
164 * Just call open(2) with the TRUNC, CREAT and WRONLY flags.
165 */
166 int
167 linux_sys_creat(struct lwp *l, const struct linux_sys_creat_args *uap, register_t *retval)
168 {
169 /* {
170 syscallarg(const char *) path;
171 syscallarg(linux_umode_t) mode;
172 } */
173 struct sys_open_args oa;
174
175 SCARG(&oa, path) = SCARG(uap, path);
176 SCARG(&oa, flags) = O_CREAT | O_TRUNC | O_WRONLY;
177 SCARG(&oa, mode) = SCARG(uap, mode);
178
179 return sys_open(l, &oa, retval);
180 }
181 #endif
182
183 static void
184 linux_open_ctty(struct lwp *l, int flags, int fd)
185 {
186 struct proc *p = l->l_proc;
187
188 /*
189 * this bit from sunos_misc.c (and svr4_fcntl.c).
190 * If we are a session leader, and we don't have a controlling
191 * terminal yet, and the O_NOCTTY flag is not set, try to make
192 * this the controlling terminal.
193 */
194 if (!(flags & O_NOCTTY) && SESS_LEADER(p) && !(p->p_lflag & PL_CONTROLT)) {
195 file_t *fp;
196
197 fp = fd_getfile(fd);
198
199 /* ignore any error, just give it a try */
200 if (fp != NULL) {
201 if (fp->f_type == DTYPE_VNODE) {
202 (fp->f_ops->fo_ioctl) (fp, TIOCSCTTY, NULL);
203 }
204 fd_putfile(fd);
205 }
206 }
207 }
208
209 /*
210 * open(2). Take care of the different flag values, and let the
211 * NetBSD syscall do the real work. See if this operation
212 * gives the current process a controlling terminal.
213 * (XXX is this necessary?)
214 */
215 int
216 linux_sys_open(struct lwp *l, const struct linux_sys_open_args *uap, register_t *retval)
217 {
218 /* {
219 syscallarg(const char *) path;
220 syscallarg(int) flags;
221 syscallarg(linux_umode_t) mode;
222 } */
223 int error, fl;
224 struct sys_open_args boa;
225
226 fl = linux_to_bsd_ioflags(SCARG(uap, flags));
227
228 SCARG(&boa, path) = SCARG(uap, path);
229 SCARG(&boa, flags) = fl;
230 SCARG(&boa, mode) = SCARG(uap, mode);
231
232 if ((error = sys_open(l, &boa, retval)))
233 return (error == EFTYPE) ? ELOOP : error;
234
235 linux_open_ctty(l, fl, *retval);
236 return 0;
237 }
238
239 int
240 linux_sys_openat(struct lwp *l, const struct linux_sys_openat_args *uap, register_t *retval)
241 {
242 /* {
243 syscallarg(int) fd;
244 syscallarg(const char *) path;
245 syscallarg(int) flags;
246 syscallarg(linux_umode_t) mode;
247 } */
248 int error, fl;
249 struct sys_openat_args boa;
250
251 fl = linux_to_bsd_ioflags(SCARG(uap, flags));
252
253 SCARG(&boa, fd) = SCARG(uap, fd);
254 SCARG(&boa, path) = SCARG(uap, path);
255 SCARG(&boa, oflags) = fl;
256 SCARG(&boa, mode) = SCARG(uap, mode);
257
258 if ((error = sys_openat(l, &boa, retval)))
259 return (error == EFTYPE) ? ELOOP : error;
260
261 linux_open_ctty(l, fl, *retval);
262 return 0;
263 }
264
265 /*
266 * Most actions in the fcntl() call are straightforward; simply
267 * pass control to the NetBSD system call. A few commands need
268 * conversions after the actual system call has done its work,
269 * because the flag values and lock structure are different.
270 */
271 int
272 linux_sys_fcntl(struct lwp *l, const struct linux_sys_fcntl_args *uap, register_t *retval)
273 {
274 /* {
275 syscallarg(int) fd;
276 syscallarg(int) cmd;
277 syscallarg(void *) arg;
278 } */
279 struct proc *p = l->l_proc;
280 int fd, cmd, error;
281 u_long val;
282 void *arg;
283 struct sys_fcntl_args fca;
284 file_t *fp;
285 struct vnode *vp;
286 struct vattr va;
287 long pgid;
288 struct pgrp *pgrp;
289 struct tty *tp;
290
291 fd = SCARG(uap, fd);
292 cmd = SCARG(uap, cmd);
293 arg = SCARG(uap, arg);
294
295 switch (cmd) {
296
297 case LINUX_F_DUPFD:
298 cmd = F_DUPFD;
299 break;
300
301 case LINUX_F_GETFD:
302 cmd = F_GETFD;
303 break;
304
305 case LINUX_F_SETFD:
306 cmd = F_SETFD;
307 break;
308
309 case LINUX_F_GETFL:
310 SCARG(&fca, fd) = fd;
311 SCARG(&fca, cmd) = F_GETFL;
312 SCARG(&fca, arg) = arg;
313 if ((error = sys_fcntl(l, &fca, retval)))
314 return error;
315 retval[0] = bsd_to_linux_ioflags(retval[0]);
316 return 0;
317
318 case LINUX_F_SETFL: {
319 file_t *fp1 = NULL;
320
321 val = linux_to_bsd_ioflags((unsigned long)SCARG(uap, arg));
322 /*
323 * Linux seems to have same semantics for sending SIGIO to the
324 * read side of socket, but slightly different semantics
325 * for SIGIO to the write side. Rather than sending the SIGIO
326 * every time it's possible to write (directly) more data, it
327 * only sends SIGIO if last write(2) failed due to insufficient
328 * memory to hold the data. This is compatible enough
329 * with NetBSD semantics to not do anything about the
330 * difference.
331 *
332 * Linux does NOT send SIGIO for pipes. Deal with socketpair
333 * ones and DTYPE_PIPE ones. For these, we don't set
334 * the underlying flags (we don't pass O_ASYNC flag down
335 * to sys_fcntl()), but set the FASYNC flag for file descriptor,
336 * so that F_GETFL would report the ASYNC i/o is on.
337 */
338 if (val & O_ASYNC) {
339 if (((fp1 = fd_getfile(fd)) == NULL))
340 return (EBADF);
341 if (((fp1->f_type == DTYPE_SOCKET) && fp1->f_data
342 && ((struct socket *)fp1->f_data)->so_state & SS_ISAPIPE)
343 || (fp1->f_type == DTYPE_PIPE))
344 val &= ~O_ASYNC;
345 else {
346 /* not a pipe, do not modify anything */
347 fd_putfile(fd);
348 fp1 = NULL;
349 }
350 }
351
352 SCARG(&fca, fd) = fd;
353 SCARG(&fca, cmd) = F_SETFL;
354 SCARG(&fca, arg) = (void *) val;
355
356 error = sys_fcntl(l, &fca, retval);
357
358 /* Now set the FASYNC flag for pipes */
359 if (fp1) {
360 if (!error) {
361 mutex_enter(&fp1->f_lock);
362 fp1->f_flag |= FASYNC;
363 mutex_exit(&fp1->f_lock);
364 }
365 fd_putfile(fd);
366 }
367
368 return (error);
369 }
370
371 case LINUX_F_GETLK:
372 do_linux_getlk(fd, cmd, arg, linux, flock);
373
374 case LINUX_F_SETLK:
375 case LINUX_F_SETLKW:
376 do_linux_setlk(fd, cmd, arg, linux, flock, LINUX_F_SETLK);
377
378 case LINUX_F_SETOWN:
379 case LINUX_F_GETOWN:
380 /*
381 * We need to route fcntl() for tty descriptors around normal
382 * fcntl(), since NetBSD tty TIOC{G,S}PGRP semantics is too
383 * restrictive for Linux F_{G,S}ETOWN. For non-tty descriptors,
384 * this is not a problem.
385 */
386 if ((fp = fd_getfile(fd)) == NULL)
387 return EBADF;
388
389 /* Check it's a character device vnode */
390 if (fp->f_type != DTYPE_VNODE
391 || (vp = (struct vnode *)fp->f_data) == NULL
392 || vp->v_type != VCHR) {
393 fd_putfile(fd);
394
395 not_tty:
396 /* Not a tty, proceed with common fcntl() */
397 cmd = cmd == LINUX_F_SETOWN ? F_SETOWN : F_GETOWN;
398 break;
399 }
400
401 vn_lock(vp, LK_SHARED | LK_RETRY);
402 error = VOP_GETATTR(vp, &va, l->l_cred);
403 VOP_UNLOCK(vp);
404
405 fd_putfile(fd);
406
407 if (error)
408 return error;
409
410 if ((tp = cdev_tty(va.va_rdev)) == NULL)
411 goto not_tty;
412
413 /* set tty pg_id appropriately */
414 mutex_enter(&proc_lock);
415 if (cmd == LINUX_F_GETOWN) {
416 retval[0] = tp->t_pgrp ? tp->t_pgrp->pg_id : NO_PGID;
417 mutex_exit(&proc_lock);
418 return 0;
419 }
420 if ((long)arg <= 0) {
421 pgid = -(long)arg;
422 } else {
423 struct proc *p1 = proc_find((long)arg);
424 if (p1 == NULL) {
425 mutex_exit(&proc_lock);
426 return (ESRCH);
427 }
428 pgid = (long)p1->p_pgrp->pg_id;
429 }
430 pgrp = pgrp_find(pgid);
431 if (pgrp == NULL || pgrp->pg_session != p->p_session) {
432 mutex_exit(&proc_lock);
433 return EPERM;
434 }
435 tp->t_pgrp = pgrp;
436 mutex_exit(&proc_lock);
437 return 0;
438
439 case LINUX_F_DUPFD_CLOEXEC:
440 cmd = F_DUPFD_CLOEXEC;
441 break;
442
443 case LINUX_F_ADD_SEALS:
444 cmd = F_ADD_SEALS;
445 break;
446
447 case LINUX_F_GET_SEALS:
448 cmd = F_GET_SEALS;
449 break;
450
451 default:
452 return EOPNOTSUPP;
453 }
454
455 SCARG(&fca, fd) = fd;
456 SCARG(&fca, cmd) = cmd;
457 SCARG(&fca, arg) = arg;
458
459 return sys_fcntl(l, &fca, retval);
460 }
461
462 #if !defined(__aarch64__) && !defined(__amd64__)
463 /*
464 * Convert a NetBSD stat structure to a Linux stat structure.
465 * Only the order of the fields and the padding in the structure
466 * is different. linux_fakedev is a machine-dependent function
467 * which optionally converts device driver major/minor numbers
468 * (XXX horrible, but what can you do against code that compares
469 * things against constant major device numbers? sigh)
470 */
471 static void
472 bsd_to_linux_stat(struct stat *bsp, struct linux_stat *lsp)
473 {
474
475 memset(lsp, 0, sizeof(*lsp));
476 lsp->lst_dev = linux_fakedev(bsp->st_dev, 0);
477 lsp->lst_ino = bsp->st_ino;
478 lsp->lst_mode = (linux_mode_t)bsp->st_mode;
479 if (bsp->st_nlink >= (1 << 15))
480 lsp->lst_nlink = (1 << 15) - 1;
481 else
482 lsp->lst_nlink = (linux_nlink_t)bsp->st_nlink;
483 lsp->lst_uid = bsp->st_uid;
484 lsp->lst_gid = bsp->st_gid;
485 lsp->lst_rdev = linux_fakedev(bsp->st_rdev, 1);
486 lsp->lst_size = bsp->st_size;
487 lsp->lst_blksize = bsp->st_blksize;
488 lsp->lst_blocks = bsp->st_blocks;
489 lsp->lst_atime = bsp->st_atime;
490 lsp->lst_mtime = bsp->st_mtime;
491 lsp->lst_ctime = bsp->st_ctime;
492 #ifdef LINUX_STAT_HAS_NSEC
493 lsp->lst_atime_nsec = bsp->st_atimensec;
494 lsp->lst_mtime_nsec = bsp->st_mtimensec;
495 lsp->lst_ctime_nsec = bsp->st_ctimensec;
496 #endif
497 }
498
499 /*
500 * The stat functions below are plain sailing. stat and lstat are handled
501 * by one function to avoid code duplication.
502 */
503 int
504 linux_sys_fstat(struct lwp *l, const struct linux_sys_fstat_args *uap, register_t *retval)
505 {
506 /* {
507 syscallarg(int) fd;
508 syscallarg(linux_stat *) sp;
509 } */
510 struct linux_stat tmplst;
511 struct stat tmpst;
512 int error;
513
514 error = do_sys_fstat(SCARG(uap, fd), &tmpst);
515 if (error != 0)
516 return error;
517 bsd_to_linux_stat(&tmpst, &tmplst);
518
519 return copyout(&tmplst, SCARG(uap, sp), sizeof tmplst);
520 }
521
522 static int
523 linux_stat1(const struct linux_sys_stat_args *uap, register_t *retval, int flags)
524 {
525 struct linux_stat tmplst;
526 struct stat tmpst;
527 int error;
528
529 error = do_sys_stat(SCARG(uap, path), flags, &tmpst);
530 if (error != 0)
531 return error;
532
533 bsd_to_linux_stat(&tmpst, &tmplst);
534
535 return copyout(&tmplst, SCARG(uap, sp), sizeof tmplst);
536 }
537
538 int
539 linux_sys_stat(struct lwp *l, const struct linux_sys_stat_args *uap, register_t *retval)
540 {
541 /* {
542 syscallarg(const char *) path;
543 syscallarg(struct linux_stat *) sp;
544 } */
545
546 return linux_stat1(uap, retval, FOLLOW);
547 }
548
549 /* Note: this is "newlstat" in the Linux sources */
550 /* (we don't bother with the old lstat currently) */
551 int
552 linux_sys_lstat(struct lwp *l, const struct linux_sys_lstat_args *uap, register_t *retval)
553 {
554 /* {
555 syscallarg(const char *) path;
556 syscallarg(struct linux_stat *) sp;
557 } */
558
559 return linux_stat1((const void *)uap, retval, NOFOLLOW);
560 }
561 #endif /* !__aarch64__ && !__amd64__ */
562
563 /*
564 * The following syscalls are mostly here because of the alternate path check.
565 */
566
567 int
568 linux_sys_linkat(struct lwp *l, const struct linux_sys_linkat_args *uap, register_t *retval)
569 {
570 /* {
571 syscallarg(int) fd1;
572 syscallarg(const char *) name1;
573 syscallarg(int) fd2;
574 syscallarg(const char *) name2;
575 syscallarg(int) flags;
576 } */
577 int fd1 = SCARG(uap, fd1);
578 const char *name1 = SCARG(uap, name1);
579 int fd2 = SCARG(uap, fd2);
580 const char *name2 = SCARG(uap, name2);
581 int follow;
582
583 follow = SCARG(uap, flags) & LINUX_AT_SYMLINK_FOLLOW;
584
585 return do_sys_linkat(l, fd1, name1, fd2, name2, follow, retval);
586 }
587
588 static int
589 linux_unlink_dircheck(const char *path)
590 {
591 struct nameidata nd;
592 struct pathbuf *pb;
593 int error;
594
595 /*
596 * Linux returns EISDIR if unlink(2) is called on a directory.
597 * We return EPERM in such cases. To emulate correct behaviour,
598 * check if the path points to directory and return EISDIR if this
599 * is the case.
600 *
601 * XXX this should really not copy in the path buffer twice...
602 */
603 error = pathbuf_copyin(path, &pb);
604 if (error) {
605 return error;
606 }
607 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, pb);
608 if (namei(&nd) == 0) {
609 struct stat sb;
610
611 if (vn_stat(nd.ni_vp, &sb) == 0
612 && S_ISDIR(sb.st_mode))
613 error = EISDIR;
614
615 vput(nd.ni_vp);
616 }
617 pathbuf_destroy(pb);
618 return error ? error : EPERM;
619 }
620
621 int
622 linux_sys_unlink(struct lwp *l, const struct linux_sys_unlink_args *uap, register_t *retval)
623 {
624 /* {
625 syscallarg(const char *) path;
626 } */
627 int error;
628
629 error = sys_unlink(l, (const void *)uap, retval);
630 if (error == EPERM)
631 error = linux_unlink_dircheck(SCARG(uap, path));
632
633 return error;
634 }
635
636 int
637 linux_sys_unlinkat(struct lwp *l, const struct linux_sys_unlinkat_args *uap, register_t *retval)
638 {
639 /* {
640 syscallarg(int) fd;
641 syscallarg(const char *) path;
642 syscallarg(int) flag;
643 } */
644 struct sys_unlinkat_args ua;
645 int error;
646
647 SCARG(&ua, fd) = SCARG(uap, fd);
648 SCARG(&ua, path) = SCARG(uap, path);
649 SCARG(&ua, flag) = linux_to_bsd_atflags(SCARG(uap, flag));
650
651 error = sys_unlinkat(l, &ua, retval);
652 if (error == EPERM)
653 error = linux_unlink_dircheck(SCARG(uap, path));
654
655 return error;
656 }
657
658 int
659 linux_sys_mknod(struct lwp *l, const struct linux_sys_mknod_args *uap, register_t *retval)
660 {
661 /* {
662 syscallarg(const char *) path;
663 syscallarg(linux_umode_t) mode;
664 syscallarg(unsigned) dev;
665 } */
666 struct linux_sys_mknodat_args ua;
667
668 SCARG(&ua, fd) = LINUX_AT_FDCWD;
669 SCARG(&ua, path) = SCARG(uap, path);
670 SCARG(&ua, mode) = SCARG(uap, mode);
671 SCARG(&ua, dev) = SCARG(uap, dev);
672
673 return linux_sys_mknodat(l, &ua, retval);
674 }
675
676 int
677 linux_sys_mknodat(struct lwp *l, const struct linux_sys_mknodat_args *uap, register_t *retval)
678 {
679 /* {
680 syscallarg(int) fd;
681 syscallarg(const char *) path;
682 syscallarg(linux_umode_t) mode;
683 syscallarg(unsigned) dev;
684 } */
685
686 /*
687 * BSD handles FIFOs separately
688 */
689 if (S_ISFIFO(SCARG(uap, mode))) {
690 struct sys_mkfifoat_args bma;
691
692 SCARG(&bma, fd) = SCARG(uap, fd);
693 SCARG(&bma, path) = SCARG(uap, path);
694 SCARG(&bma, mode) = SCARG(uap, mode);
695 return sys_mkfifoat(l, &bma, retval);
696 } else {
697
698 /*
699 * Linux device numbers uses 8 bits for minor and 8 bits
700 * for major. Due to how we map our major and minor,
701 * this just fits into our dev_t. Just mask off the
702 * upper 16bit to remove any random junk.
703 */
704
705 return do_sys_mknodat(l, SCARG(uap, fd), SCARG(uap, path),
706 SCARG(uap, mode), SCARG(uap, dev) & 0xffff, UIO_USERSPACE);
707 }
708 }
709
710 int
711 linux_sys_fchmodat(struct lwp *l, const struct linux_sys_fchmodat_args *uap, register_t *retval)
712 {
713 /* {
714 syscallarg(int) fd;
715 syscallarg(const char *) path;
716 syscallarg(linux_umode_t) mode;
717 } */
718
719 return do_sys_chmodat(l, SCARG(uap, fd), SCARG(uap, path),
720 SCARG(uap, mode), AT_SYMLINK_FOLLOW);
721 }
722
723 int
724 linux_sys_fchownat(struct lwp *l, const struct linux_sys_fchownat_args *uap, register_t *retval)
725 {
726 /* {
727 syscallarg(int) fd;
728 syscallarg(const char *) path;
729 syscallarg(uid_t) owner;
730 syscallarg(gid_t) group;
731 syscallarg(int) flag;
732 } */
733 int flag;
734
735 flag = linux_to_bsd_atflags(SCARG(uap, flag));
736 return do_sys_chownat(l, SCARG(uap, fd), SCARG(uap, path),
737 SCARG(uap, owner), SCARG(uap, group), flag);
738 }
739
740 int
741 linux_sys_faccessat(struct lwp *l, const struct linux_sys_faccessat_args *uap, register_t *retval)
742 {
743 /* {
744 syscallarg(int) fd;
745 syscallarg(const char *) path;
746 syscallarg(int) amode;
747 } */
748
749 return do_sys_accessat(l, SCARG(uap, fd), SCARG(uap, path),
750 SCARG(uap, amode), AT_SYMLINK_FOLLOW);
751 }
752
753 /*
754 * This is just fsync() for now (just as it is in the Linux kernel)
755 * Note: this is not implemented under Linux on Alpha and Arm
756 * but should still be defined in our syscalls.master.
757 * (syscall #148 on the arm)
758 */
759 int
760 linux_sys_fdatasync(struct lwp *l, const struct linux_sys_fdatasync_args *uap, register_t *retval)
761 {
762 /* {
763 syscallarg(int) fd;
764 } */
765
766 return sys_fsync(l, (const void *)uap, retval);
767 }
768
769 /*
770 * pread(2).
771 */
772 int
773 linux_sys_pread(struct lwp *l, const struct linux_sys_pread_args *uap, register_t *retval)
774 {
775 /* {
776 syscallarg(int) fd;
777 syscallarg(void *) buf;
778 syscallarg(size_t) nbyte;
779 syscallarg(off_t) offset;
780 } */
781 struct sys_pread_args pra;
782
783 SCARG(&pra, fd) = SCARG(uap, fd);
784 SCARG(&pra, buf) = SCARG(uap, buf);
785 SCARG(&pra, nbyte) = SCARG(uap, nbyte);
786 SCARG(&pra, PAD) = 0;
787 SCARG(&pra, offset) = SCARG(uap, offset);
788
789 return sys_pread(l, &pra, retval);
790 }
791
792 /*
793 * pwrite(2).
794 */
795 int
796 linux_sys_pwrite(struct lwp *l, const struct linux_sys_pwrite_args *uap, register_t *retval)
797 {
798 /* {
799 syscallarg(int) fd;
800 syscallarg(void *) buf;
801 syscallarg(size_t) nbyte;
802 syscallarg(off_t) offset;
803 } */
804 struct sys_pwrite_args pra;
805
806 SCARG(&pra, fd) = SCARG(uap, fd);
807 SCARG(&pra, buf) = SCARG(uap, buf);
808 SCARG(&pra, nbyte) = SCARG(uap, nbyte);
809 SCARG(&pra, PAD) = 0;
810 SCARG(&pra, offset) = SCARG(uap, offset);
811
812 return sys_pwrite(l, &pra, retval);
813 }
814
815 /*
816 * preadv(2)
817 */
818 int
819 linux_sys_preadv(struct lwp *l, const struct linux_sys_preadv_args *uap,
820 register_t *retval)
821 {
822 /* {
823 syscallarg(int) fd;
824 syscallarg(const struct iovec *) iovp;
825 syscallarg(int) iovcnt;
826 syscallarg(unsigned long) off_lo;
827 syscallarg(unsigned long) off_hi;
828 } */
829 struct sys_preadv_args ua;
830
831 SCARG(&ua, fd) = SCARG(uap, fd);
832 SCARG(&ua, iovp) = SCARG(uap, iovp);
833 SCARG(&ua, iovcnt) = SCARG(uap, iovcnt);
834 SCARG(&ua, PAD) = 0;
835 SCARG(&ua, offset) = linux_hilo_to_off_t(SCARG(uap, off_hi),
836 SCARG(uap, off_lo));
837 return sys_preadv(l, &ua, retval);
838 }
839
840 /*
841 * pwritev(2)
842 */
843 int
844 linux_sys_pwritev(struct lwp *l, const struct linux_sys_pwritev_args *uap,
845 register_t *retval)
846 {
847 /* {
848 syscallarg(int) fd;
849 syscallarg(const struct iovec *) iovp;
850 syscallarg(int) iovcnt;
851 syscallarg(unsigned long) off_lo;
852 syscallarg(unsigned long) off_hi;
853 } */
854 struct sys_pwritev_args ua;
855
856 SCARG(&ua, fd) = SCARG(uap, fd);
857 SCARG(&ua, iovp) = (const void *)SCARG(uap, iovp);
858 SCARG(&ua, iovcnt) = SCARG(uap, iovcnt);
859 SCARG(&ua, PAD) = 0;
860 SCARG(&ua, offset) = linux_hilo_to_off_t(SCARG(uap, off_hi),
861 SCARG(uap, off_lo));
862 return sys_pwritev(l, &ua, retval);
863 }
864
865 int
866 linux_sys_dup3(struct lwp *l, const struct linux_sys_dup3_args *uap,
867 register_t *retval)
868 {
869 /* {
870 syscallarg(int) from;
871 syscallarg(int) to;
872 syscallarg(int) flags;
873 } */
874 int flags;
875
876 flags = linux_to_bsd_ioflags(SCARG(uap, flags));
877 if ((flags & ~O_CLOEXEC) != 0)
878 return EINVAL;
879
880 if (SCARG(uap, from) == SCARG(uap, to))
881 return EINVAL;
882
883 return dodup(l, SCARG(uap, from), SCARG(uap, to), flags, retval);
884 }
885
886
887 int
888 linux_to_bsd_atflags(int lflags)
889 {
890 int bflags = 0;
891
892 if (lflags & LINUX_AT_SYMLINK_NOFOLLOW)
893 bflags |= AT_SYMLINK_NOFOLLOW;
894 if (lflags & LINUX_AT_REMOVEDIR)
895 bflags |= AT_REMOVEDIR;
896 if (lflags & LINUX_AT_SYMLINK_FOLLOW)
897 bflags |= AT_SYMLINK_FOLLOW;
898
899 return bflags;
900 }
901
902 int
903 linux_sys_faccessat2(lwp_t *l, const struct linux_sys_faccessat2_args *uap,
904 register_t *retval)
905 {
906 /* {
907 syscallarg(int) fd;
908 syscallarg(const char *) path;
909 syscallarg(int) amode;
910 syscallarg(int) flags;
911 }*/
912 int flag = linux_to_bsd_atflags(SCARG(uap, flags));
913 int mode = SCARG(uap, amode);
914 int fd = SCARG(uap, fd);
915 const char *path = SCARG(uap, path);
916
917 return do_sys_accessat(l, fd, path, mode, flag);
918 }
919
920
921 #define LINUX_NOT_SUPPORTED(fun) \
922 int \
923 fun(struct lwp *l, const struct fun##_args *uap, register_t *retval) \
924 { \
925 return EOPNOTSUPP; \
926 }
927
928 LINUX_NOT_SUPPORTED(linux_sys_setxattr)
929 LINUX_NOT_SUPPORTED(linux_sys_lsetxattr)
930 LINUX_NOT_SUPPORTED(linux_sys_fsetxattr)
931
932 LINUX_NOT_SUPPORTED(linux_sys_getxattr)
933 LINUX_NOT_SUPPORTED(linux_sys_lgetxattr)
934 LINUX_NOT_SUPPORTED(linux_sys_fgetxattr)
935
936 LINUX_NOT_SUPPORTED(linux_sys_listxattr)
937 LINUX_NOT_SUPPORTED(linux_sys_llistxattr)
938 LINUX_NOT_SUPPORTED(linux_sys_flistxattr)
939
940 LINUX_NOT_SUPPORTED(linux_sys_removexattr)
941 LINUX_NOT_SUPPORTED(linux_sys_lremovexattr)
942 LINUX_NOT_SUPPORTED(linux_sys_fremovexattr)
943
944 /*
945 * For now just return EOPNOTSUPP, this makes glibc posix_fallocate()
946 * to fallback to emulation.
947 * XXX Right now no filesystem actually implements fallocate support,
948 * so no need for mapping.
949 */
950 LINUX_NOT_SUPPORTED(linux_sys_fallocate)
951