Home | History | Annotate | Line # | Download | only in sys
      1 /*	$NetBSD: filedesc.h,v 1.73 2025/07/16 19:14:14 kre Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 2008 The NetBSD Foundation, Inc.
      5  * All rights reserved.
      6  *
      7  * Redistribution and use in source and binary forms, with or without
      8  * modification, are permitted provided that the following conditions
      9  * are met:
     10  * 1. Redistributions of source code must retain the above copyright
     11  *    notice, this list of conditions and the following disclaimer.
     12  * 2. Redistributions in binary form must reproduce the above copyright
     13  *    notice, this list of conditions and the following disclaimer in the
     14  *    documentation and/or other materials provided with the distribution.
     15  *
     16  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     17  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     18  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     19  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     20  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     21  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     22  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     23  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     24  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     25  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     26  * POSSIBILITY OF SUCH DAMAGE.
     27  */
     28 
     29 /*
     30  * Copyright (c) 1990, 1993
     31  *	The Regents of the University of California.  All rights reserved.
     32  *
     33  * Redistribution and use in source and binary forms, with or without
     34  * modification, are permitted provided that the following conditions
     35  * are met:
     36  * 1. Redistributions of source code must retain the above copyright
     37  *    notice, this list of conditions and the following disclaimer.
     38  * 2. Redistributions in binary form must reproduce the above copyright
     39  *    notice, this list of conditions and the following disclaimer in the
     40  *    documentation and/or other materials provided with the distribution.
     41  * 3. Neither the name of the University nor the names of its contributors
     42  *    may be used to endorse or promote products derived from this software
     43  *    without specific prior written permission.
     44  *
     45  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     46  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     47  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     48  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     49  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     50  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     51  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     52  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     53  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     54  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     55  * SUCH DAMAGE.
     56  *
     57  *	@(#)filedesc.h	8.1 (Berkeley) 6/2/93
     58  */
     59 
     60 #ifndef _SYS_FILEDESC_H_
     61 #define	_SYS_FILEDESC_H_
     62 
     63 #include <sys/param.h>
     64 #include <sys/queue.h>
     65 #include <sys/mutex.h>
     66 #include <sys/rwlock.h>
     67 #include <sys/condvar.h>
     68 
     69 /*
     70  * This structure is used for the management of descriptors.  It may be
     71  * shared by multiple processes.
     72  *
     73  * A process is initially started out with NDFILE descriptors stored within
     74  * this structure, selected to be enough for typical applications based on
     75  * the historical limit of 20 open files (and the usage of descriptors by
     76  * shells).  If these descriptors are exhausted, a larger descriptor table
     77  * may be allocated, up to a process' resource limit; the internal arrays
     78  * are then unused.  The initial expansion is set to NDEXTENT; each time
     79  * it runs out, it is doubled until the resource limit is reached. NDEXTENT
     80  * should be selected to be the biggest multiple of OFILESIZE (see below)
     81  * that will fit in a power-of-two sized piece of memory.
     82  */
     83 #define	NDFILE		20
     84 #define	NDEXTENT	50		/* 250 bytes in 256-byte alloc */
     85 #define	NDENTRIES	32		/* 32 fds per entry */
     86 #define	NDENTRYMASK	(NDENTRIES - 1)
     87 #define	NDENTRYSHIFT	5		/* bits per entry */
     88 #define	NDLOSLOTS(x)	(((x) + NDENTRIES - 1) >> NDENTRYSHIFT)
     89 #define	NDHISLOTS(x)	((NDLOSLOTS(x) + NDENTRIES - 1) >> NDENTRYSHIFT)
     90 #define	NDFDFILE	6		/* first 6 descriptors are free */
     91 
     92 /*
     93  * Process-private descriptor reference, one for each descriptor slot
     94  * in use.  Locks:
     95  *
     96  * :	unlocked
     97  * a	atomic operations + filedesc_t::fd_lock in some cases
     98  * d	filedesc_t::fd_lock
     99  *
    100  * Note that ff_exclose and ff_allocated are likely to be byte sized
    101  * (bool).  In general adjacent sub-word sized fields must be locked
    102  * the same way, but in this case it's ok: ff_exclose can only be
    103  * modified while the descriptor slot is live, and ff_allocated when
    104  * it's invalid.
    105  *
    106  * NOTE: ff_exclose should generally be set with fd_set_exclose(), not
    107  * written to directly, when implementing flags like O_CLOEXEC or
    108  * SOCK_CLOEXEC, so that struct filedesc::fd_exclose is updated as
    109  * needed.  See PR kern/58822: close-on-exec is broken for dup3 and
    110  * opening cloning devices (fixed).
    111  * Same with fd_set_foclose() for O_CLOFORK, SOCK_CLOFORK, etc.
    112  */
    113 typedef struct fdfile {
    114 	bool		ff_exclose;	/* :: close on exec (fd_set_exclose) */
    115 	bool		ff_foclose;	/* :: close on fork (fd_set_foclose) */
    116 	bool		ff_allocated;	/* d: descriptor slot is allocated */
    117 	u_int		ff_refcnt;	/* a: reference count on structure */
    118 	struct file	*ff_file;	/* d: pointer to file if open */
    119 	SLIST_HEAD(,knote) ff_knlist;	/* d: knotes attached to this fd */
    120 	kcondvar_t	ff_closing;	/* d: notifier for close */
    121 } fdfile_t;
    122 
    123 #define FDFILE_SIZE ((sizeof(fdfile_t)+CACHE_LINE_SIZE-1)/CACHE_LINE_SIZE*CACHE_LINE_SIZE)
    124 
    125 /* Reference count */
    126 #define	FR_CLOSING	(0x80000000)	/* closing: must interlock */
    127 #define	FR_MASK		(~FR_CLOSING)	/* reference count */
    128 
    129 /*
    130  * Open file table, potentially many 'active' tables per filedesc_t
    131  * in a multi-threaded process, or with a shared filedesc_t (clone()).
    132  * nfiles is first to avoid pointer arithmetic.
    133  */
    134 typedef struct fdtab {
    135 	u_int		dt_nfiles;	/* number of open files allocated */
    136 	struct fdtab	*dt_link;	/* for lists of dtab */
    137 	fdfile_t	*dt_ff[NDFILE];	/* file structures for open fds */
    138 } fdtab_t;
    139 
    140 typedef struct filedesc {
    141 	/*
    142 	 * Built-in fdfile_t records first, since they have strict
    143 	 * alignment requirements.
    144 	 */
    145 	uint8_t		fd_dfdfile[NDFDFILE][FDFILE_SIZE];
    146 	/*
    147 	 * All of the remaining fields are locked by fd_lock.
    148 	 */
    149 	kmutex_t	fd_lock;	/* lock on structure */
    150 	fdtab_t * volatile fd_dt;	/* active descriptor table */
    151 	uint32_t	*fd_himap;	/* each bit points to 32 fds */
    152 	uint32_t	*fd_lomap;	/* bitmap of free fds */
    153 	struct klist	*fd_knhash;	/* hash of attached non-fd knotes */
    154 	int		fd_lastkqfile;	/* max descriptor for kqueue */
    155 	int		fd_lastfile;	/* high-water mark of fd_ofiles */
    156 	int		fd_refcnt;	/* reference count */
    157 	u_long		fd_knhashmask;	/* size of fd_knhash */
    158 	int		fd_freefile;	/* approx. next free file */
    159 	int		fd_unused;	/* unused */
    160 	bool		fd_exclose;	/* non-zero if >0 fd with EXCLOSE */
    161 	bool		fd_foclose;	/* non-zero if >0 fd with FOCLOSE */
    162 	/*
    163 	 * This structure is used when the number of open files is
    164 	 * <= NDFILE, and are then pointed to by the pointers above.
    165 	 */
    166 	fdtab_t		fd_dtbuiltin;
    167 	/*
    168 	 * These arrays are used when the number of open files is
    169 	 * <= 1024, and are then pointed to by the pointers above.
    170 	 */
    171 #define fd_startzero	fd_dhimap	/* area to zero on return to cache */
    172 	uint32_t	fd_dhimap[NDENTRIES >> NDENTRYSHIFT];
    173 	uint32_t	fd_dlomap[NDENTRIES];
    174 } filedesc_t;
    175 
    176 /*
    177  * Working directory, root and umask information.  Serialization:
    178  *
    179  * a	atomic operations
    180  * l	cwdi_lock
    181  */
    182 typedef struct cwdinfo {
    183 	struct vnode	*cwdi_cdir;	/* l: current directory */
    184 	struct vnode	*cwdi_rdir;	/* l: root directory */
    185 	struct vnode	*cwdi_edir;	/* l: emulation root (if known) */
    186 	u_int		cwdi_cmask;	/* a: mask for file creation */
    187 	u_int		cwdi_refcnt;	/* a: reference count */
    188 
    189 	krwlock_t	cwdi_lock	/* :: lock on struct */
    190 	    __aligned(COHERENCY_UNIT);	/* -> gets own cache line */
    191 } cwdinfo_t;
    192 
    193 #ifdef _KERNEL
    194 
    195 struct fileops;
    196 struct socket;
    197 struct proc;
    198 
    199 extern struct cwdinfo cwdi0;
    200 
    201 /*
    202  * Kernel global variables and routines.
    203  */
    204 void	fd_sys_init(void);
    205 int	fd_open(const char*, int, int, int*);
    206 int	fd_dupopen(int, bool, int, int *);
    207 int	fd_alloc(struct proc *, int, int *);
    208 void	fd_tryexpand(struct proc *);
    209 int	fd_allocfile(file_t **, int *);
    210 void	fd_affix(struct proc *, file_t *, unsigned);
    211 void	fd_abort(struct proc *, file_t *, unsigned);
    212 filedesc_t *fd_copy(void);
    213 filedesc_t *fd_init(filedesc_t *);
    214 void	fd_share(proc_t *);
    215 void	fd_hold(lwp_t *);
    216 void	fd_free(void);
    217 void	fd_closeexec(void);
    218 void	fd_ktrexecfd(void);
    219 int	fd_checkstd(void);
    220 file_t	*fd_getfile(unsigned);
    221 file_t	*fd_getfile2(proc_t *, unsigned);
    222 void	fd_putfile(unsigned);
    223 int	fd_getvnode(unsigned, file_t **);
    224 int	fd_getsock(unsigned, struct socket **);
    225 int	fd_getsock1(unsigned, struct socket **, file_t **);
    226 void	fd_putvnode(unsigned);
    227 void	fd_putsock(unsigned);
    228 int	fd_close(unsigned);
    229 int	fd_dup(file_t *, int, int *, bool, bool);
    230 int	fd_dup2(file_t *, unsigned, int);
    231 int	fd_clone(file_t *, unsigned, int, const struct fileops *, void *);
    232 void	fd_set_exclose(struct lwp *, int, bool);
    233 void	fd_set_foclose(struct lwp *, int, bool);
    234 int	pipe1(struct lwp *, int *, int);
    235 int	dodup(struct lwp *, int, int, int, register_t *);
    236 
    237 void	cwd_sys_init(void);
    238 struct cwdinfo *cwdinit(void);
    239 void	cwdshare(proc_t *);
    240 void	cwdunshare(proc_t *);
    241 void	cwdfree(struct cwdinfo *);
    242 void	cwdexec(struct proc *);
    243 
    244 #define GETCWD_CHECK_ACCESS 0x0001
    245 int	getcwd_common(struct vnode *, struct vnode *, char **, char *, int,
    246     int, struct lwp *);
    247 int	vnode_to_path(char *, size_t, struct vnode *, struct lwp *,
    248     struct proc *);
    249 
    250 int	closef(file_t *);
    251 file_t *fgetdummy(void);
    252 void	fputdummy(file_t *);
    253 
    254 struct stat;
    255 int	do_sys_fstat(int, struct stat *);
    256 struct flock;
    257 int	do_fcntl_lock(int, int, struct flock *);
    258 int	do_posix_fadvise(int, off_t, off_t, int);
    259 
    260 extern kmutex_t filelist_lock;
    261 extern filedesc_t filedesc0;
    262 
    263 #endif /* _KERNEL */
    264 
    265 #endif /* !_SYS_FILEDESC_H_ */
    266