sys_descrip.c revision 1.43 1 /* $NetBSD: sys_descrip.c,v 1.43 2023/04/22 13:53:02 riastradh Exp $ */
2
3 /*-
4 * Copyright (c) 2008, 2020 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
17 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
18 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
20 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
27 */
28
29 /*
30 * Copyright (c) 1982, 1986, 1989, 1991, 1993
31 * The Regents of the University of California. All rights reserved.
32 * (c) UNIX System Laboratories, Inc.
33 * All or some portions of this file are derived from material licensed
34 * to the University of California by American Telephone and Telegraph
35 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
36 * the permission of UNIX System Laboratories, Inc.
37 *
38 * Redistribution and use in source and binary forms, with or without
39 * modification, are permitted provided that the following conditions
40 * are met:
41 * 1. Redistributions of source code must retain the above copyright
42 * notice, this list of conditions and the following disclaimer.
43 * 2. Redistributions in binary form must reproduce the above copyright
44 * notice, this list of conditions and the following disclaimer in the
45 * documentation and/or other materials provided with the distribution.
46 * 3. Neither the name of the University nor the names of its contributors
47 * may be used to endorse or promote products derived from this software
48 * without specific prior written permission.
49 *
50 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
51 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
52 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
53 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
54 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
55 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
56 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
57 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
58 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
59 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
60 * SUCH DAMAGE.
61 *
62 * @(#)kern_descrip.c 8.8 (Berkeley) 2/14/95
63 */
64
65 /*
66 * System calls on descriptors.
67 */
68
69 #include <sys/cdefs.h>
70 __KERNEL_RCSID(0, "$NetBSD: sys_descrip.c,v 1.43 2023/04/22 13:53:02 riastradh Exp $");
71
72 #include <sys/param.h>
73 #include <sys/systm.h>
74 #include <sys/filedesc.h>
75 #include <sys/kernel.h>
76 #include <sys/vnode.h>
77 #include <sys/proc.h>
78 #include <sys/file.h>
79 #include <sys/namei.h>
80 #include <sys/socket.h>
81 #include <sys/socketvar.h>
82 #include <sys/stat.h>
83 #include <sys/ioctl.h>
84 #include <sys/fcntl.h>
85 #include <sys/kmem.h>
86 #include <sys/pool.h>
87 #include <sys/syslog.h>
88 #include <sys/unistd.h>
89 #include <sys/resourcevar.h>
90 #include <sys/conf.h>
91 #include <sys/event.h>
92 #include <sys/kauth.h>
93 #include <sys/atomic.h>
94 #include <sys/mount.h>
95 #include <sys/syscallargs.h>
96
97 #include <uvm/uvm_readahead.h>
98
99 /*
100 * Duplicate a file descriptor.
101 */
102 int
103 sys_dup(struct lwp *l, const struct sys_dup_args *uap, register_t *retval)
104 {
105 /* {
106 syscallarg(int) fd;
107 } */
108 int error, newfd, oldfd;
109 file_t *fp;
110
111 oldfd = SCARG(uap, fd);
112
113 if ((fp = fd_getfile(oldfd)) == NULL) {
114 return EBADF;
115 }
116 error = fd_dup(fp, 0, &newfd, false);
117 fd_putfile(oldfd);
118 *retval = newfd;
119 return error;
120 }
121
122 /*
123 * Duplicate a file descriptor to a particular value.
124 */
125 int
126 dodup(struct lwp *l, int from, int to, int flags, register_t *retval)
127 {
128 int error;
129 file_t *fp;
130
131 if ((fp = fd_getfile(from)) == NULL)
132 return EBADF;
133 mutex_enter(&fp->f_lock);
134 fp->f_count++;
135 mutex_exit(&fp->f_lock);
136 fd_putfile(from);
137
138 if ((u_int)to >= curproc->p_rlimit[RLIMIT_NOFILE].rlim_cur ||
139 (u_int)to >= maxfiles)
140 error = EBADF;
141 else if (from == to)
142 error = 0;
143 else
144 error = fd_dup2(fp, to, flags);
145 closef(fp);
146 *retval = to;
147
148 return error;
149 }
150
151 int
152 sys_dup3(struct lwp *l, const struct sys_dup3_args *uap, register_t *retval)
153 {
154 /* {
155 syscallarg(int) from;
156 syscallarg(int) to;
157 syscallarg(int) flags;
158 } */
159 return dodup(l, SCARG(uap, from), SCARG(uap, to), SCARG(uap, flags),
160 retval);
161 }
162
163 int
164 sys_dup2(struct lwp *l, const struct sys_dup2_args *uap, register_t *retval)
165 {
166 /* {
167 syscallarg(int) from;
168 syscallarg(int) to;
169 } */
170 return dodup(l, SCARG(uap, from), SCARG(uap, to), 0, retval);
171 }
172
173 /*
174 * fcntl call which is being passed to the file's fs.
175 */
176 static int
177 fcntl_forfs(int fd, file_t *fp, int cmd, void *arg)
178 {
179 int error;
180 u_int size;
181 void *data, *memp;
182 #define STK_PARAMS 128
183 char stkbuf[STK_PARAMS];
184
185 if ((fp->f_flag & (FREAD | FWRITE)) == 0)
186 return (EBADF);
187
188 /*
189 * Interpret high order word to find amount of data to be
190 * copied to/from the user's address space.
191 */
192 size = (size_t)F_PARAM_LEN(cmd);
193 if (size > F_PARAM_MAX)
194 return (EINVAL);
195 memp = NULL;
196 if (size > sizeof(stkbuf)) {
197 memp = kmem_alloc(size, KM_SLEEP);
198 data = memp;
199 } else
200 data = stkbuf;
201 if (cmd & F_FSIN) {
202 if (size) {
203 error = copyin(arg, data, size);
204 if (error) {
205 if (memp)
206 kmem_free(memp, size);
207 return (error);
208 }
209 } else
210 *(void **)data = arg;
211 } else if ((cmd & F_FSOUT) != 0 && size != 0) {
212 /*
213 * Zero the buffer so the user always
214 * gets back something deterministic.
215 */
216 memset(data, 0, size);
217 } else if (cmd & F_FSVOID)
218 *(void **)data = arg;
219
220
221 error = (*fp->f_ops->fo_fcntl)(fp, cmd, data);
222
223 /*
224 * Copy any data to user, size was
225 * already set and checked above.
226 */
227 if (error == 0 && (cmd & F_FSOUT) && size)
228 error = copyout(data, arg, size);
229 if (memp)
230 kmem_free(memp, size);
231 return (error);
232 }
233
234 int
235 do_fcntl_lock(int fd, int cmd, struct flock *fl)
236 {
237 file_t *fp;
238 proc_t *p;
239 int (*fo_advlock)(struct file *, void *, int, struct flock *, int);
240 int error, flg;
241
242 if ((fp = fd_getfile(fd)) == NULL)
243 return EBADF;
244 if ((fo_advlock = fp->f_ops->fo_advlock) == NULL)
245 return EINVAL;
246
247 flg = F_POSIX;
248 p = curproc;
249
250 switch (cmd) {
251 case F_SETLKW:
252 flg |= F_WAIT;
253 /* Fall into F_SETLK */
254
255 /* FALLTHROUGH */
256 case F_SETLK:
257 switch (fl->l_type) {
258 case F_RDLCK:
259 if ((fp->f_flag & FREAD) == 0) {
260 error = EBADF;
261 break;
262 }
263 if ((p->p_flag & PK_ADVLOCK) == 0) {
264 mutex_enter(p->p_lock);
265 p->p_flag |= PK_ADVLOCK;
266 mutex_exit(p->p_lock);
267 }
268 error = (*fo_advlock)(fp, p, F_SETLK, fl, flg);
269 break;
270
271 case F_WRLCK:
272 if ((fp->f_flag & FWRITE) == 0) {
273 error = EBADF;
274 break;
275 }
276 if ((p->p_flag & PK_ADVLOCK) == 0) {
277 mutex_enter(p->p_lock);
278 p->p_flag |= PK_ADVLOCK;
279 mutex_exit(p->p_lock);
280 }
281 error = (*fo_advlock)(fp, p, F_SETLK, fl, flg);
282 break;
283
284 case F_UNLCK:
285 error = (*fo_advlock)(fp, p, F_UNLCK, fl, F_POSIX);
286 break;
287
288 default:
289 error = EINVAL;
290 break;
291 }
292 break;
293
294 case F_GETLK:
295 if (fl->l_type != F_RDLCK &&
296 fl->l_type != F_WRLCK &&
297 fl->l_type != F_UNLCK) {
298 error = EINVAL;
299 break;
300 }
301 error = (*fo_advlock)(fp, p, F_GETLK, fl, F_POSIX);
302 break;
303
304 default:
305 error = EINVAL;
306 break;
307 }
308
309 fd_putfile(fd);
310 return error;
311 }
312
313 static int
314 do_fcntl_getpath(struct lwp *l, file_t *fp, char *upath)
315 {
316 char *kpath;
317 int error;
318
319 if (fp->f_type != DTYPE_VNODE)
320 return EOPNOTSUPP;
321
322 kpath = PNBUF_GET();
323
324 error = vnode_to_path(kpath, MAXPATHLEN, fp->f_vnode, l, l->l_proc);
325 if (!error)
326 error = copyoutstr(kpath, upath, MAXPATHLEN, NULL);
327
328 PNBUF_PUT(kpath);
329
330 return error;
331 }
332
333 /*
334 * The file control system call.
335 */
336 int
337 sys_fcntl(struct lwp *l, const struct sys_fcntl_args *uap, register_t *retval)
338 {
339 /* {
340 syscallarg(int) fd;
341 syscallarg(int) cmd;
342 syscallarg(void *) arg;
343 } */
344 int fd, i, tmp, error, cmd, newmin;
345 filedesc_t *fdp;
346 fdtab_t *dt;
347 file_t *fp;
348 struct flock fl;
349 bool cloexec = false;
350
351 fd = SCARG(uap, fd);
352 cmd = SCARG(uap, cmd);
353 fdp = l->l_fd;
354 error = 0;
355
356 switch (cmd) {
357 case F_CLOSEM:
358 if (fd < 0)
359 return EBADF;
360 while ((i = fdp->fd_lastfile) >= fd) {
361 if (fd_getfile(i) == NULL) {
362 /* Another thread has updated. */
363 continue;
364 }
365 fd_close(i);
366 }
367 return 0;
368
369 case F_MAXFD:
370 *retval = fdp->fd_lastfile;
371 return 0;
372
373 case F_SETLKW:
374 case F_SETLK:
375 case F_GETLK:
376 error = copyin(SCARG(uap, arg), &fl, sizeof(fl));
377 if (error)
378 return error;
379 error = do_fcntl_lock(fd, cmd, &fl);
380 if (cmd == F_GETLK && error == 0)
381 error = copyout(&fl, SCARG(uap, arg), sizeof(fl));
382 return error;
383
384 default:
385 /* Handled below */
386 break;
387 }
388
389 if ((fp = fd_getfile(fd)) == NULL)
390 return EBADF;
391
392 if ((cmd & F_FSCTL)) {
393 error = fcntl_forfs(fd, fp, cmd, SCARG(uap, arg));
394 fd_putfile(fd);
395 return error;
396 }
397
398 switch (cmd) {
399 case F_DUPFD_CLOEXEC:
400 cloexec = true;
401 /*FALLTHROUGH*/
402 case F_DUPFD:
403 newmin = (long)SCARG(uap, arg);
404 if ((u_int)newmin >=
405 l->l_proc->p_rlimit[RLIMIT_NOFILE].rlim_cur ||
406 (u_int)newmin >= maxfiles) {
407 fd_putfile(fd);
408 return EINVAL;
409 }
410 error = fd_dup(fp, newmin, &i, cloexec);
411 *retval = i;
412 break;
413
414 case F_GETFD:
415 dt = atomic_load_consume(&fdp->fd_dt);
416 *retval = dt->dt_ff[fd]->ff_exclose;
417 break;
418
419 case F_SETFD:
420 fd_set_exclose(l, fd,
421 ((long)SCARG(uap, arg) & FD_CLOEXEC) != 0);
422 break;
423
424 case F_GETNOSIGPIPE:
425 *retval = (fp->f_flag & FNOSIGPIPE) != 0;
426 break;
427
428 case F_SETNOSIGPIPE:
429 if (SCARG(uap, arg))
430 atomic_or_uint(&fp->f_flag, FNOSIGPIPE);
431 else
432 atomic_and_uint(&fp->f_flag, ~FNOSIGPIPE);
433 *retval = 0;
434 break;
435
436 case F_GETFL:
437 *retval = OFLAGS(fp->f_flag);
438 break;
439
440 case F_SETFL:
441 /* XXX not guaranteed to be atomic. */
442 tmp = FFLAGS((long)SCARG(uap, arg)) & FCNTLFLAGS;
443 error = (*fp->f_ops->fo_fcntl)(fp, F_SETFL, &tmp);
444 if (error)
445 break;
446 i = tmp ^ fp->f_flag;
447 if (i & FNONBLOCK) {
448 int flgs = tmp & FNONBLOCK;
449 error = (*fp->f_ops->fo_ioctl)(fp, FIONBIO, &flgs);
450 if (error) {
451 (*fp->f_ops->fo_fcntl)(fp, F_SETFL,
452 &fp->f_flag);
453 break;
454 }
455 }
456 if (i & FASYNC) {
457 int flgs = tmp & FASYNC;
458 error = (*fp->f_ops->fo_ioctl)(fp, FIOASYNC, &flgs);
459 if (error) {
460 if (i & FNONBLOCK) {
461 tmp = fp->f_flag & FNONBLOCK;
462 (void)(*fp->f_ops->fo_ioctl)(fp,
463 FIONBIO, &tmp);
464 }
465 (*fp->f_ops->fo_fcntl)(fp, F_SETFL,
466 &fp->f_flag);
467 break;
468 }
469 }
470 fp->f_flag = (fp->f_flag & ~FCNTLFLAGS) | tmp;
471 break;
472
473 case F_GETOWN:
474 error = (*fp->f_ops->fo_ioctl)(fp, FIOGETOWN, &tmp);
475 *retval = tmp;
476 break;
477
478 case F_SETOWN:
479 tmp = (int)(uintptr_t) SCARG(uap, arg);
480 error = (*fp->f_ops->fo_ioctl)(fp, FIOSETOWN, &tmp);
481 break;
482
483 case F_GETPATH:
484 error = do_fcntl_getpath(l, fp, SCARG(uap, arg));
485 break;
486
487 default:
488 error = EINVAL;
489 }
490
491 fd_putfile(fd);
492 return (error);
493 }
494
495 /*
496 * Close a file descriptor.
497 */
498 int
499 sys_close(struct lwp *l, const struct sys_close_args *uap, register_t *retval)
500 {
501 /* {
502 syscallarg(int) fd;
503 } */
504 int error;
505 int fd = SCARG(uap, fd);
506
507 if (fd_getfile(fd) == NULL) {
508 return EBADF;
509 }
510
511 error = fd_close(fd);
512 if (error == ERESTART) {
513 #ifdef DIAGNOSTIC
514 printf("%s[%d]: close(%d) returned ERESTART\n",
515 l->l_proc->p_comm, (int)l->l_proc->p_pid, fd);
516 #endif
517 error = EINTR;
518 }
519
520 return error;
521 }
522
523 /*
524 * Return status information about a file descriptor.
525 * Common function for compat code.
526 */
527 int
528 do_sys_fstat(int fd, struct stat *sb)
529 {
530 file_t *fp;
531 int error;
532
533 if ((fp = fd_getfile(fd)) == NULL) {
534 return EBADF;
535 }
536 error = (*fp->f_ops->fo_stat)(fp, sb);
537 fd_putfile(fd);
538
539 return error;
540 }
541
542 /*
543 * Return status information about a file descriptor.
544 */
545 int
546 sys___fstat50(struct lwp *l, const struct sys___fstat50_args *uap,
547 register_t *retval)
548 {
549 /* {
550 syscallarg(int) fd;
551 syscallarg(struct stat *) sb;
552 } */
553 struct stat sb;
554 int error;
555
556 error = do_sys_fstat(SCARG(uap, fd), &sb);
557 if (error == 0) {
558 error = copyout(&sb, SCARG(uap, sb), sizeof(sb));
559 }
560 return error;
561 }
562
563 /*
564 * Return pathconf information about a file descriptor.
565 */
566 int
567 sys_fpathconf(struct lwp *l, const struct sys_fpathconf_args *uap,
568 register_t *retval)
569 {
570 /* {
571 syscallarg(int) fd;
572 syscallarg(int) name;
573 } */
574 int fd, name, error;
575 file_t *fp;
576
577 fd = SCARG(uap, fd);
578 name = SCARG(uap, name);
579 error = 0;
580
581 if ((fp = fd_getfile(fd)) == NULL)
582 return EBADF;
583 if (fp->f_ops->fo_fpathconf == NULL)
584 error = EOPNOTSUPP;
585 else
586 error = (*fp->f_ops->fo_fpathconf)(fp, name, retval);
587 fd_putfile(fd);
588 return error;
589 }
590
591 /*
592 * Apply an advisory lock on a file descriptor.
593 *
594 * Just attempt to get a record lock of the requested type on
595 * the entire file (l_whence = SEEK_SET, l_start = 0, l_len = 0).
596 */
597 /* ARGSUSED */
598 int
599 sys_flock(struct lwp *l, const struct sys_flock_args *uap, register_t *retval)
600 {
601 /* {
602 syscallarg(int) fd;
603 syscallarg(int) how;
604 } */
605 int fd, how, error;
606 file_t *fp;
607 int (*fo_advlock)(struct file *, void *, int, struct flock *, int);
608 struct flock lf;
609
610 fd = SCARG(uap, fd);
611 how = SCARG(uap, how);
612
613 if ((fp = fd_getfile(fd)) == NULL)
614 return EBADF;
615 if ((fo_advlock = fp->f_ops->fo_advlock) == NULL)
616 return EOPNOTSUPP;
617
618 lf.l_whence = SEEK_SET;
619 lf.l_start = 0;
620 lf.l_len = 0;
621
622 switch (how & ~LOCK_NB) {
623 case LOCK_UN:
624 lf.l_type = F_UNLCK;
625 atomic_and_uint(&fp->f_flag, ~FHASLOCK);
626 error = (*fo_advlock)(fp, fp, F_UNLCK, &lf, F_FLOCK);
627 fd_putfile(fd);
628 return error;
629 case LOCK_EX:
630 lf.l_type = F_WRLCK;
631 break;
632 case LOCK_SH:
633 lf.l_type = F_RDLCK;
634 break;
635 default:
636 fd_putfile(fd);
637 return EINVAL;
638 }
639
640 atomic_or_uint(&fp->f_flag, FHASLOCK);
641 if (how & LOCK_NB) {
642 error = (*fo_advlock)(fp, fp, F_SETLK, &lf, F_FLOCK);
643 } else {
644 error = (*fo_advlock)(fp, fp, F_SETLK, &lf, F_FLOCK|F_WAIT);
645 }
646 fd_putfile(fd);
647 return error;
648 }
649
650 int
651 do_posix_fadvise(int fd, off_t offset, off_t len, int advice)
652 {
653 file_t *fp;
654 int error;
655
656 if ((fp = fd_getfile(fd)) == NULL)
657 return EBADF;
658 if (fp->f_ops->fo_posix_fadvise == NULL) {
659 error = EOPNOTSUPP;
660 } else {
661 error = (*fp->f_ops->fo_posix_fadvise)(fp, offset, len,
662 advice);
663 }
664 fd_putfile(fd);
665 return error;
666 }
667
668 int
669 sys___posix_fadvise50(struct lwp *l,
670 const struct sys___posix_fadvise50_args *uap,
671 register_t *retval)
672 {
673 /* {
674 syscallarg(int) fd;
675 syscallarg(int) pad;
676 syscallarg(off_t) offset;
677 syscallarg(off_t) len;
678 syscallarg(int) advice;
679 } */
680
681 *retval = do_posix_fadvise(SCARG(uap, fd), SCARG(uap, offset),
682 SCARG(uap, len), SCARG(uap, advice));
683
684 return 0;
685 }
686
687 int
688 sys_pipe(struct lwp *l, const void *v, register_t *retval)
689 {
690 int fd[2], error;
691
692 if ((error = pipe1(l, fd, 0)) != 0)
693 return error;
694
695 retval[0] = fd[0];
696 retval[1] = fd[1];
697
698 return 0;
699 }
700
701 int
702 sys_pipe2(struct lwp *l, const struct sys_pipe2_args *uap, register_t *retval)
703 {
704 /* {
705 syscallarg(int[2]) fildes;
706 syscallarg(int) flags;
707 } */
708 int fd[2], error;
709
710 if ((error = pipe1(l, fd, SCARG(uap, flags))) != 0)
711 return error;
712
713 if ((error = copyout(fd, SCARG(uap, fildes), sizeof(fd))) != 0)
714 return error;
715 retval[0] = 0;
716 return 0;
717 }
718