sys_descrip.c revision 1.4 1 /* $NetBSD: sys_descrip.c,v 1.4 2008/06/23 11:26:53 ad Exp $ */
2
3 /*-
4 * Copyright (c) 2008 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
17 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
18 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
20 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
27 */
28
29 /*
30 * Copyright (c) 1982, 1986, 1989, 1991, 1993
31 * The Regents of the University of California. All rights reserved.
32 * (c) UNIX System Laboratories, Inc.
33 * All or some portions of this file are derived from material licensed
34 * to the University of California by American Telephone and Telegraph
35 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
36 * the permission of UNIX System Laboratories, Inc.
37 *
38 * Redistribution and use in source and binary forms, with or without
39 * modification, are permitted provided that the following conditions
40 * are met:
41 * 1. Redistributions of source code must retain the above copyright
42 * notice, this list of conditions and the following disclaimer.
43 * 2. Redistributions in binary form must reproduce the above copyright
44 * notice, this list of conditions and the following disclaimer in the
45 * documentation and/or other materials provided with the distribution.
46 * 3. Neither the name of the University nor the names of its contributors
47 * may be used to endorse or promote products derived from this software
48 * without specific prior written permission.
49 *
50 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
51 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
52 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
53 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
54 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
55 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
56 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
57 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
58 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
59 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
60 * SUCH DAMAGE.
61 *
62 * @(#)kern_descrip.c 8.8 (Berkeley) 2/14/95
63 */
64
65 /*
66 * System calls on descriptors.
67 */
68
69 #include <sys/cdefs.h>
70 __KERNEL_RCSID(0, "$NetBSD: sys_descrip.c,v 1.4 2008/06/23 11:26:53 ad Exp $");
71
72 #include <sys/param.h>
73 #include <sys/systm.h>
74 #include <sys/filedesc.h>
75 #include <sys/kernel.h>
76 #include <sys/vnode.h>
77 #include <sys/proc.h>
78 #include <sys/file.h>
79 #include <sys/namei.h>
80 #include <sys/socket.h>
81 #include <sys/socketvar.h>
82 #include <sys/stat.h>
83 #include <sys/ioctl.h>
84 #include <sys/fcntl.h>
85 #include <sys/malloc.h>
86 #include <sys/pool.h>
87 #include <sys/syslog.h>
88 #include <sys/unistd.h>
89 #include <sys/resourcevar.h>
90 #include <sys/conf.h>
91 #include <sys/event.h>
92 #include <sys/kauth.h>
93 #include <sys/atomic.h>
94 #include <sys/mount.h>
95 #include <sys/syscallargs.h>
96
97 /*
98 * Duplicate a file descriptor.
99 */
100 int
101 sys_dup(struct lwp *l, const struct sys_dup_args *uap, register_t *retval)
102 {
103 /* {
104 syscallarg(int) fd;
105 } */
106 int new, error, old;
107 file_t *fp;
108
109 old = SCARG(uap, fd);
110
111 if ((fp = fd_getfile(old)) == NULL) {
112 return EBADF;
113 }
114 error = fd_dup(fp, 0, &new, 0);
115 fd_putfile(old);
116 *retval = new;
117 return error;
118 }
119
120 /*
121 * Duplicate a file descriptor to a particular value.
122 */
123 int
124 sys_dup2(struct lwp *l, const struct sys_dup2_args *uap, register_t *retval)
125 {
126 /* {
127 syscallarg(int) from;
128 syscallarg(int) to;
129 } */
130 int old, new, error;
131 file_t *fp;
132
133 old = SCARG(uap, from);
134 new = SCARG(uap, to);
135
136 if ((fp = fd_getfile(old)) == NULL) {
137 return EBADF;
138 }
139 if ((u_int)new >= curproc->p_rlimit[RLIMIT_NOFILE].rlim_cur ||
140 (u_int)new >= maxfiles) {
141 error = EBADF;
142 } else if (old == new) {
143 error = 0;
144 } else {
145 error = fd_dup2(fp, new);
146 }
147 fd_putfile(old);
148 *retval = new;
149
150 return 0;
151 }
152
153 /*
154 * fcntl call which is being passed to the file's fs.
155 */
156 static int
157 fcntl_forfs(int fd, file_t *fp, int cmd, void *arg)
158 {
159 int error;
160 u_int size;
161 void *data, *memp;
162 #define STK_PARAMS 128
163 char stkbuf[STK_PARAMS];
164
165 if ((fp->f_flag & (FREAD | FWRITE)) == 0)
166 return (EBADF);
167
168 /*
169 * Interpret high order word to find amount of data to be
170 * copied to/from the user's address space.
171 */
172 size = (size_t)F_PARAM_LEN(cmd);
173 if (size > F_PARAM_MAX)
174 return (EINVAL);
175 memp = NULL;
176 if (size > sizeof(stkbuf)) {
177 memp = kmem_alloc(size, KM_SLEEP);
178 data = memp;
179 } else
180 data = stkbuf;
181 if (cmd & F_FSIN) {
182 if (size) {
183 error = copyin(arg, data, size);
184 if (error) {
185 if (memp)
186 kmem_free(memp, size);
187 return (error);
188 }
189 } else
190 *(void **)data = arg;
191 } else if ((cmd & F_FSOUT) != 0 && size != 0) {
192 /*
193 * Zero the buffer so the user always
194 * gets back something deterministic.
195 */
196 memset(data, 0, size);
197 } else if (cmd & F_FSVOID)
198 *(void **)data = arg;
199
200
201 error = (*fp->f_ops->fo_fcntl)(fp, cmd, data);
202
203 /*
204 * Copy any data to user, size was
205 * already set and checked above.
206 */
207 if (error == 0 && (cmd & F_FSOUT) && size)
208 error = copyout(data, arg, size);
209 if (memp)
210 kmem_free(memp, size);
211 return (error);
212 }
213
214 int
215 do_fcntl_lock(int fd, int cmd, struct flock *fl)
216 {
217 file_t *fp;
218 vnode_t *vp;
219 proc_t *p;
220 int error, flg;
221
222 if ((fp = fd_getfile(fd)) == NULL)
223 return EBADF;
224 if (fp->f_type != DTYPE_VNODE) {
225 fd_putfile(fd);
226 return EINVAL;
227 }
228 vp = fp->f_data;
229 if (fl->l_whence == SEEK_CUR)
230 fl->l_start += fp->f_offset;
231
232 flg = F_POSIX;
233 p = curproc;
234
235 switch (cmd) {
236 case F_SETLKW:
237 flg |= F_WAIT;
238 /* Fall into F_SETLK */
239
240 case F_SETLK:
241 switch (fl->l_type) {
242 case F_RDLCK:
243 if ((fp->f_flag & FREAD) == 0) {
244 error = EBADF;
245 break;
246 }
247 if ((p->p_flag & PK_ADVLOCK) == 0) {
248 mutex_enter(p->p_lock);
249 p->p_flag |= PK_ADVLOCK;
250 mutex_exit(p->p_lock);
251 }
252 error = VOP_ADVLOCK(vp, p, F_SETLK, fl, flg);
253 break;
254
255 case F_WRLCK:
256 if ((fp->f_flag & FWRITE) == 0) {
257 error = EBADF;
258 break;
259 }
260 if ((p->p_flag & PK_ADVLOCK) == 0) {
261 mutex_enter(p->p_lock);
262 p->p_flag |= PK_ADVLOCK;
263 mutex_exit(p->p_lock);
264 }
265 error = VOP_ADVLOCK(vp, p, F_SETLK, fl, flg);
266 break;
267
268 case F_UNLCK:
269 error = VOP_ADVLOCK(vp, p, F_UNLCK, fl, F_POSIX);
270 break;
271
272 default:
273 error = EINVAL;
274 break;
275 }
276 break;
277
278 case F_GETLK:
279 if (fl->l_type != F_RDLCK &&
280 fl->l_type != F_WRLCK &&
281 fl->l_type != F_UNLCK) {
282 error = EINVAL;
283 break;
284 }
285 error = VOP_ADVLOCK(vp, p, F_GETLK, fl, F_POSIX);
286 break;
287
288 default:
289 error = EINVAL;
290 break;
291 }
292
293 fd_putfile(fd);
294 return error;
295 }
296
297 /*
298 * The file control system call.
299 */
300 int
301 sys_fcntl(struct lwp *l, const struct sys_fcntl_args *uap, register_t *retval)
302 {
303 /* {
304 syscallarg(int) fd;
305 syscallarg(int) cmd;
306 syscallarg(void *) arg;
307 } */
308 int fd, i, tmp, error, cmd, newmin;
309 filedesc_t *fdp;
310 file_t *fp;
311 fdfile_t *ff;
312 struct flock fl;
313
314 fd = SCARG(uap, fd);
315 cmd = SCARG(uap, cmd);
316 fdp = l->l_fd;
317 error = 0;
318
319 switch (cmd) {
320 case F_CLOSEM:
321 if (fd < 0)
322 return EBADF;
323 while ((i = fdp->fd_lastfile) >= fd) {
324 if (fd_getfile(i) == NULL) {
325 /* Another thread has updated. */
326 continue;
327 }
328 fd_close(i);
329 }
330 return 0;
331
332 case F_MAXFD:
333 *retval = fdp->fd_lastfile;
334 return 0;
335
336 case F_SETLKW:
337 case F_SETLK:
338 case F_GETLK:
339 error = copyin(SCARG(uap, arg), &fl, sizeof(fl));
340 if (error)
341 return error;
342 error = do_fcntl_lock(fd, cmd, &fl);
343 if (cmd == F_GETLK && error == 0)
344 error = copyout(&fl, SCARG(uap, arg), sizeof(fl));
345 return error;
346
347 default:
348 /* Handled below */
349 break;
350 }
351
352 if ((fp = fd_getfile(fd)) == NULL)
353 return (EBADF);
354 ff = fdp->fd_ofiles[fd];
355
356 if ((cmd & F_FSCTL)) {
357 error = fcntl_forfs(fd, fp, cmd, SCARG(uap, arg));
358 fd_putfile(fd);
359 return error;
360 }
361
362 switch (cmd) {
363 case F_DUPFD:
364 newmin = (long)SCARG(uap, arg);
365 if ((u_int)newmin >=
366 l->l_proc->p_rlimit[RLIMIT_NOFILE].rlim_cur ||
367 (u_int)newmin >= maxfiles) {
368 fd_putfile(fd);
369 return EINVAL;
370 }
371 error = fd_dup(fp, newmin, &i, 0);
372 *retval = i;
373 break;
374
375 case F_GETFD:
376 *retval = ff->ff_exclose;
377 break;
378
379 case F_SETFD:
380 if ((long)SCARG(uap, arg) & 1) {
381 ff->ff_exclose = 1;
382 fdp->fd_exclose = 1;
383 } else {
384 ff->ff_exclose = 0;
385 }
386 break;
387
388 case F_GETFL:
389 *retval = OFLAGS(fp->f_flag);
390 break;
391
392 case F_SETFL:
393 /* XXX not guaranteed to be atomic. */
394 tmp = FFLAGS((long)SCARG(uap, arg)) & FCNTLFLAGS;
395 error = (*fp->f_ops->fo_fcntl)(fp, F_SETFL, &tmp);
396 if (error)
397 break;
398 i = tmp ^ fp->f_flag;
399 if (i & FNONBLOCK) {
400 int flgs = tmp & FNONBLOCK;
401 error = (*fp->f_ops->fo_ioctl)(fp, FIONBIO, &flgs);
402 if (error) {
403 (*fp->f_ops->fo_fcntl)(fp, F_SETFL,
404 &fp->f_flag);
405 break;
406 }
407 }
408 if (i & FASYNC) {
409 int flgs = tmp & FASYNC;
410 error = (*fp->f_ops->fo_ioctl)(fp, FIOASYNC, &flgs);
411 if (error) {
412 if (i & FNONBLOCK) {
413 tmp = fp->f_flag & FNONBLOCK;
414 (void)(*fp->f_ops->fo_ioctl)(fp,
415 FIONBIO, &tmp);
416 }
417 (*fp->f_ops->fo_fcntl)(fp, F_SETFL,
418 &fp->f_flag);
419 break;
420 }
421 }
422 fp->f_flag = (fp->f_flag & ~FCNTLFLAGS) | tmp;
423 break;
424
425 case F_GETOWN:
426 error = (*fp->f_ops->fo_ioctl)(fp, FIOGETOWN, &tmp);
427 *retval = tmp;
428 break;
429
430 case F_SETOWN:
431 tmp = (int)(intptr_t) SCARG(uap, arg);
432 error = (*fp->f_ops->fo_ioctl)(fp, FIOSETOWN, &tmp);
433 break;
434
435 default:
436 error = EINVAL;
437 }
438
439 fd_putfile(fd);
440 return (error);
441 }
442
443 /*
444 * Close a file descriptor.
445 */
446 int
447 sys_close(struct lwp *l, const struct sys_close_args *uap, register_t *retval)
448 {
449 /* {
450 syscallarg(int) fd;
451 } */
452
453 if (fd_getfile(SCARG(uap, fd)) == NULL) {
454 return EBADF;
455 }
456 return fd_close(SCARG(uap, fd));
457 }
458
459 /*
460 * Return status information about a file descriptor.
461 * Common function for compat code.
462 */
463 int
464 do_sys_fstat(int fd, struct stat *sb)
465 {
466 file_t *fp;
467 int error;
468
469 if ((fp = fd_getfile(fd)) == NULL) {
470 return EBADF;
471 }
472 error = (*fp->f_ops->fo_stat)(fp, sb);
473 fd_putfile(fd);
474
475 return error;
476 }
477
478 /*
479 * Return status information about a file descriptor.
480 */
481 int
482 sys___fstat30(struct lwp *l, const struct sys___fstat30_args *uap,
483 register_t *retval)
484 {
485 /* {
486 syscallarg(int) fd;
487 syscallarg(struct stat *) sb;
488 } */
489 struct stat sb;
490 int error;
491
492 error = do_sys_fstat(SCARG(uap, fd), &sb);
493 if (error == 0) {
494 error = copyout(&sb, SCARG(uap, sb), sizeof(sb));
495 }
496 return error;
497 }
498
499 /*
500 * Return pathconf information about a file descriptor.
501 */
502 int
503 sys_fpathconf(struct lwp *l, const struct sys_fpathconf_args *uap,
504 register_t *retval)
505 {
506 /* {
507 syscallarg(int) fd;
508 syscallarg(int) name;
509 } */
510 int fd, error;
511 file_t *fp;
512
513 fd = SCARG(uap, fd);
514 error = 0;
515
516 if ((fp = fd_getfile(fd)) == NULL) {
517 return (EBADF);
518 }
519 switch (fp->f_type) {
520 case DTYPE_SOCKET:
521 case DTYPE_PIPE:
522 if (SCARG(uap, name) != _PC_PIPE_BUF)
523 error = EINVAL;
524 else
525 *retval = PIPE_BUF;
526 break;
527
528 case DTYPE_VNODE:
529 error = VOP_PATHCONF(fp->f_data, SCARG(uap, name), retval);
530 break;
531
532 case DTYPE_KQUEUE:
533 error = EINVAL;
534 break;
535
536 default:
537 error = EOPNOTSUPP;
538 break;
539 }
540
541 fd_putfile(fd);
542 return (error);
543 }
544
545 /*
546 * Apply an advisory lock on a file descriptor.
547 *
548 * Just attempt to get a record lock of the requested type on
549 * the entire file (l_whence = SEEK_SET, l_start = 0, l_len = 0).
550 */
551 /* ARGSUSED */
552 int
553 sys_flock(struct lwp *l, const struct sys_flock_args *uap, register_t *retval)
554 {
555 /* {
556 syscallarg(int) fd;
557 syscallarg(int) how;
558 } */
559 int fd, how, error;
560 file_t *fp;
561 vnode_t *vp;
562 struct flock lf;
563 proc_t *p;
564
565 fd = SCARG(uap, fd);
566 how = SCARG(uap, how);
567 error = 0;
568
569 if ((fp = fd_getfile(fd)) == NULL) {
570 return EBADF;
571 }
572 if (fp->f_type != DTYPE_VNODE) {
573 fd_putfile(fd);
574 return EOPNOTSUPP;
575 }
576
577 vp = fp->f_data;
578 lf.l_whence = SEEK_SET;
579 lf.l_start = 0;
580 lf.l_len = 0;
581 if (how & LOCK_UN) {
582 lf.l_type = F_UNLCK;
583 atomic_and_uint(&fp->f_flag, ~FHASLOCK);
584 error = VOP_ADVLOCK(vp, fp, F_UNLCK, &lf, F_FLOCK);
585 fd_putfile(fd);
586 return error;
587 }
588 if (how & LOCK_EX) {
589 lf.l_type = F_WRLCK;
590 } else if (how & LOCK_SH) {
591 lf.l_type = F_RDLCK;
592 } else {
593 fd_putfile(fd);
594 return EINVAL;
595 }
596 atomic_or_uint(&fp->f_flag, FHASLOCK);
597 p = curproc;
598 if (how & LOCK_NB) {
599 error = VOP_ADVLOCK(vp, fp, F_SETLK, &lf, F_FLOCK);
600 } else {
601 error = VOP_ADVLOCK(vp, fp, F_SETLK, &lf, F_FLOCK|F_WAIT);
602 }
603 fd_putfile(fd);
604 return error;
605 }
606
607 int
608 do_posix_fadvise(int fd, off_t offset, off_t len, int advice)
609 {
610 file_t *fp;
611 int error;
612
613 if ((fp = fd_getfile(fd)) == NULL) {
614 return EBADF;
615 }
616 if (fp->f_type != DTYPE_VNODE) {
617 if (fp->f_type == DTYPE_PIPE || fp->f_type == DTYPE_SOCKET) {
618 error = ESPIPE;
619 } else {
620 error = EOPNOTSUPP;
621 }
622 fd_putfile(fd);
623 return error;
624 }
625
626 switch (advice) {
627 case POSIX_FADV_NORMAL:
628 case POSIX_FADV_RANDOM:
629 case POSIX_FADV_SEQUENTIAL:
630 KASSERT(POSIX_FADV_NORMAL == UVM_ADV_NORMAL);
631 KASSERT(POSIX_FADV_RANDOM == UVM_ADV_RANDOM);
632 KASSERT(POSIX_FADV_SEQUENTIAL == UVM_ADV_SEQUENTIAL);
633
634 /*
635 * We ignore offset and size. must lock the file to
636 * do this, as f_advice is sub-word sized.
637 */
638 mutex_enter(&fp->f_lock);
639 fp->f_advice = (u_char)advice;
640 mutex_exit(&fp->f_lock);
641 error = 0;
642 break;
643
644 case POSIX_FADV_WILLNEED:
645 case POSIX_FADV_DONTNEED:
646 case POSIX_FADV_NOREUSE:
647 /* Not implemented yet. */
648 error = 0;
649 break;
650 default:
651 error = EINVAL;
652 break;
653 }
654
655 fd_putfile(fd);
656 return error;
657 }
658
659 int
660 sys___posix_fadvise50(struct lwp *l,
661 const struct sys___posix_fadvise50_args *uap,
662 register_t *retval)
663 {
664 /* {
665 syscallarg(int) fd;
666 syscallarg(int) pad;
667 syscallarg(off_t) offset;
668 syscallarg(off_t) len;
669 syscallarg(int) advice;
670 } */
671
672 return do_posix_fadvise(SCARG(uap, fd), SCARG(uap, offset),
673 SCARG(uap, len), SCARG(uap, advice));
674 }
675