Home | History | Annotate | Line # | Download | only in sys
      1 /*     $NetBSD: buf.h,v 1.135 2024/05/12 10:34:56 rillig Exp $ */
      2 
      3 /*-
      4  * Copyright (c) 1999, 2000, 2007, 2008 The NetBSD Foundation, Inc.
      5  * All rights reserved.
      6  *
      7  * This code is derived from software contributed to The NetBSD Foundation
      8  * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
      9  * NASA Ames Research Center, and by Andrew Doran.
     10  *
     11  * Redistribution and use in source and binary forms, with or without
     12  * modification, are permitted provided that the following conditions
     13  * are met:
     14  * 1. Redistributions of source code must retain the above copyright
     15  *    notice, this list of conditions and the following disclaimer.
     16  * 2. Redistributions in binary form must reproduce the above copyright
     17  *    notice, this list of conditions and the following disclaimer in the
     18  *    documentation and/or other materials provided with the distribution.
     19  *
     20  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     21  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     22  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     23  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     24  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     25  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     26  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     27  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     28  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     29  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     30  * POSSIBILITY OF SUCH DAMAGE.
     31  */
     32 
     33 /*
     34  * Copyright (c) 1982, 1986, 1989, 1993
     35  *	The Regents of the University of California.  All rights reserved.
     36  * (c) UNIX System Laboratories, Inc.
     37  * All or some portions of this file are derived from material licensed
     38  * to the University of California by American Telephone and Telegraph
     39  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
     40  * the permission of UNIX System Laboratories, Inc.
     41  *
     42  * Redistribution and use in source and binary forms, with or without
     43  * modification, are permitted provided that the following conditions
     44  * are met:
     45  * 1. Redistributions of source code must retain the above copyright
     46  *    notice, this list of conditions and the following disclaimer.
     47  * 2. Redistributions in binary form must reproduce the above copyright
     48  *    notice, this list of conditions and the following disclaimer in the
     49  *    documentation and/or other materials provided with the distribution.
     50  * 3. Neither the name of the University nor the names of its contributors
     51  *    may be used to endorse or promote products derived from this software
     52  *    without specific prior written permission.
     53  *
     54  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     55  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     56  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     57  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     58  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     59  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     60  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     61  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     62  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     63  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     64  * SUCH DAMAGE.
     65  *
     66  *	@(#)buf.h	8.9 (Berkeley) 3/30/95
     67  */
     68 
     69 #ifndef _SYS_BUF_H_
     70 #define	_SYS_BUF_H_
     71 
     72 #include <sys/pool.h>
     73 #include <sys/queue.h>
     74 #include <sys/mutex.h>
     75 #include <sys/condvar.h>
     76 #include <sys/rbtree.h>
     77 #if defined(_KERNEL)
     78 #include <sys/workqueue.h>
     79 #endif /* defined(_KERNEL) */
     80 
     81 struct buf;
     82 struct mount;
     83 struct vnode;
     84 struct kauth_cred;
     85 
     86 #define NOLIST ((struct buf *)0x87654321)
     87 
     88 extern kmutex_t bufcache_lock;
     89 extern kmutex_t buffer_lock;
     90 
     91 #if defined(_KERNEL)
     92 extern void (*biodone_vfs)(buf_t *);
     93 #endif
     94 
     95 /*
     96  * The buffer header describes an I/O operation in the kernel.
     97  *
     98  * Field markings and the corresponding locks:
     99  *
    100  * b	thread of execution that holds BC_BUSY, does not correspond
    101  *	  directly to any particular LWP
    102  * c	bufcache_lock
    103  * o	b_objlock
    104  *
    105  * For buffers associated with a vnode, b_objlock points to vp->v_interlock.
    106  * If not associated with a vnode, it points to the generic buffer_lock.
    107  */
    108 
    109 /* required for the conditional union member below to be ~safe */
    110 #if defined(_KERNEL)
    111 __CTASSERT(sizeof(struct work) <= sizeof(TAILQ_ENTRY(buf)));
    112 #endif
    113 
    114 struct buf {
    115 	union {
    116 		TAILQ_ENTRY(buf) u_actq;
    117 		rb_node_t u_rbnode;
    118 #if defined(_KERNEL)
    119 		/* u_work is smaller than u_actq */
    120 		struct work u_work;
    121 #endif
    122 	} b_u;					/* b: device driver queue */
    123 #define	b_actq	b_u.u_actq
    124 #define	b_work	b_u.u_work
    125 	void			(*b_iodone)(struct buf *);/* b: call when done */
    126 	int			b_error;	/* b: errno value. */
    127 	int			b_resid;	/* b: remaining I/O. */
    128 	u_int			b_flags;	/* b: B_* flags */
    129 	int			b_prio;		/* b: priority for queue */
    130 	int			b_bufsize;	/* b: allocated size */
    131 	int			b_bcount;	/* b: valid bytes in buffer */
    132 	dev_t			b_dev;		/* b: associated device */
    133 	void			*b_data;	/* b: fs private data */
    134 	daddr_t			b_blkno;	/* b: physical block number
    135 						      (partition relative) */
    136 	daddr_t			b_rawblkno;	/* b: raw physical block number
    137 						      (volume relative) */
    138 	struct proc		*b_proc;	/* b: proc if BB_PHYS */
    139 	void			*b_saveaddr;	/* b: saved b_data for physio */
    140 	struct cpu_info		*b_ci;		/* b: originating CPU */
    141 
    142 	/*
    143 	 * b: private data for owner.
    144 	 *  - buffer cache buffers are owned by corresponding filesystem.
    145 	 *  - non-buffer cache buffers are owned by subsystem which
    146 	 *    allocated them. (filesystem, disk driver, etc)
    147 	 */
    148 	void	*b_private;
    149 	off_t	b_dcookie;		/* NFS: Offset cookie if dir block */
    150 
    151 	kcondvar_t		b_busy;		/* c: threads waiting on buf */
    152 	void			*b_unused;	/*  : unused */
    153 	LIST_ENTRY(buf)		b_hash;		/* c: hash chain */
    154 	LIST_ENTRY(buf)		b_vnbufs;	/* c: associated vnode */
    155 	TAILQ_ENTRY(buf)	b_freelist;	/* c: position if not active */
    156 	TAILQ_ENTRY(buf)	b_wapbllist;	/* c: transaction buffer list */
    157 	daddr_t			b_lblkno;	/* c: logical block number */
    158 	int			b_freelistindex;/* c: free list index (BQ_) */
    159 	u_int			b_cflags;	/* c: BC_* flags */
    160 	struct vnode		*b_vp;		/* c: file vnode */
    161 
    162 	kcondvar_t		b_done;		/* o: waiting on completion */
    163 	u_int			b_oflags;	/* o: BO_* flags */
    164 	kmutex_t		*b_objlock;	/* o: completion lock */
    165 };
    166 
    167 /*
    168  * For portability with historic industry practice, the cylinder number has
    169  * to be maintained in the `b_resid' field.
    170  */
    171 #define	b_cylinder b_resid		/* Cylinder number for disksort(). */
    172 
    173 /*
    174  * These flags are kept in b_cflags (owned by buffer cache).
    175  */
    176 #define	BC_AGE		0x00000001	/* Move to age queue when I/O done. */
    177 #define	BC_BUSY		0x00000010	/* I/O in progress. */
    178 #define	BC_INVAL	0x00002000	/* Does not contain valid info. */
    179 #define	BC_NOCACHE	0x00008000	/* Do not cache block after use. */
    180 #define	BC_WANTED	0x00800000	/* Process wants this buffer. */
    181 #define	BC_VFLUSH	0x04000000	/* Buffer is being synced. */
    182 
    183 /*
    184  * These flags are kept in b_oflags (owned by associated object).
    185  */
    186 #define	BO_DELWRI	0x00000080	/* Delay I/O until buffer reused. */
    187 #define	BO_DONE		0x00000200	/* I/O completed. */
    188 
    189 /*
    190  * These flags are kept in b_flags (owned by buffer holder).
    191  */
    192 #define	B_WRITE		0x00000000	/* Write buffer (pseudo flag). */
    193 #define	B_ASYNC		0x00000004	/* Start I/O, do not wait. */
    194 #define	B_COWDONE	0x00000400	/* Copy-on-write already done. */
    195 #define	B_GATHERED	0x00001000	/* LFS: already in a segment. */
    196 #define	B_LOCKED	0x00004000	/* Locked in core (not reusable). */
    197 #define	B_PHYS		0x00040000	/* I/O to user memory. */
    198 #define	B_RAW		0x00080000	/* Set by physio for raw transfers. */
    199 #define	B_READ		0x00100000	/* Read buffer. */
    200 #define	B_DEVPRIVATE	0x02000000	/* Device driver private flag. */
    201 #define	B_MEDIA_FUA	0x08000000	/* Set Force Unit Access for media. */
    202 #define	B_MEDIA_DPO	0x10000000	/* Set Disable Page Out for media. */
    203 
    204 #define BUF_FLAGBITS \
    205     "\20\1AGE\3ASYNC\4BAD\5BUSY\10DELWRI" \
    206     "\12DONE\13COWDONE\15GATHERED\16INVAL\17LOCKED\20NOCACHE" \
    207     "\23PHYS\24RAW\25READ\32DEVPRIVATE\33VFLUSH\34MEDIA_FUA\35MEDIA_DPO"
    208 
    209 /* Avoid weird code due to B_WRITE being a "pseudo flag" */
    210 #define BUF_ISREAD(bp)	(((bp)->b_flags & B_READ) == B_READ)
    211 #define BUF_ISWRITE(bp)	(((bp)->b_flags & B_READ) == B_WRITE)
    212 
    213 /* Media flags, to be passed for nested I/O */
    214 #define B_MEDIA_FLAGS	(B_MEDIA_FUA|B_MEDIA_DPO)
    215 
    216 /*
    217  * This structure describes a clustered I/O.  It is stored in the b_saveaddr
    218  * field of the buffer on which I/O is done.  At I/O completion, cluster
    219  * callback uses the structure to parcel I/O's to individual buffers, and
    220  * then free's this structure.
    221  */
    222 struct cluster_save {
    223 	long	bs_bcount;		/* Saved b_bcount. */
    224 	long	bs_bufsize;		/* Saved b_bufsize. */
    225 	void	*bs_saveaddr;		/* Saved b_addr. */
    226 	int	bs_nchildren;		/* Number of associated buffers. */
    227 	struct buf *bs_children;	/* List of associated buffers. */
    228 };
    229 
    230 /*
    231  * Zero out the buffer's data area.
    232  */
    233 #define	clrbuf(bp)							\
    234 do {									\
    235 	memset((bp)->b_data, 0, (u_int)(bp)->b_bcount);			\
    236 	(bp)->b_resid = 0;						\
    237 } while (0)
    238 
    239 /* Flags to low-level allocation routines. */
    240 #define B_CLRBUF	0x01	/* Request allocated buffer be cleared. */
    241 #define B_SYNC		0x02	/* Do all allocations synchronously. */
    242 #define B_METAONLY	0x04	/* Return indirect block buffer. */
    243 #define B_CONTIG	0x08	/* Allocate file contiguously. */
    244 
    245 /* Flags to bread() and breadn(). */
    246 #define B_MODIFY	0x01	/* Hint: caller might modify buffer */
    247 
    248 #ifdef _KERNEL
    249 
    250 #define	BIO_GETPRIO(bp)		((bp)->b_prio)
    251 #define	BIO_SETPRIO(bp, prio)	(bp)->b_prio = (prio)
    252 #define	BIO_COPYPRIO(bp1, bp2)	BIO_SETPRIO(bp1, BIO_GETPRIO(bp2))
    253 
    254 #define	BPRIO_NPRIO		3
    255 #define	BPRIO_TIMECRITICAL	2
    256 #define	BPRIO_TIMELIMITED	1
    257 #define	BPRIO_TIMENONCRITICAL	0
    258 #define	BPRIO_DEFAULT		BPRIO_TIMELIMITED
    259 
    260 __BEGIN_DECLS
    261 /*
    262  * bufferio(9) ops
    263  */
    264 void	biodone(buf_t *);
    265 int	biowait(buf_t *);
    266 buf_t	*getiobuf(struct vnode *, bool);
    267 void	putiobuf(buf_t *);
    268 void	nestiobuf_setup(buf_t *, buf_t *, int, size_t);
    269 void	nestiobuf_done(buf_t *, int, int);
    270 
    271 void	nestiobuf_iodone(buf_t *);
    272 int	physio(void (*)(buf_t *), buf_t *, dev_t, int,
    273 	       void (*)(buf_t *), struct uio *);
    274 
    275 /*
    276  * buffercache(9) ops
    277  */
    278 int	bread(struct vnode *, daddr_t, int, int, buf_t **);
    279 int	breadn(struct vnode *, daddr_t, int, daddr_t *, int *, int,
    280 	       int, buf_t **);
    281 int	bwrite(buf_t *);
    282 void	bawrite(buf_t *);
    283 void	bdwrite(buf_t *);
    284 buf_t	*getblk(struct vnode *, daddr_t, int, int, int);
    285 buf_t	*geteblk(int);
    286 buf_t	*incore(struct vnode *, daddr_t);
    287 int	allocbuf(buf_t *, int, int);
    288 void	brelsel(buf_t *, int);
    289 void	brelse(buf_t *, int);
    290 void	binvalbuf(struct vnode *, daddr_t);
    291 
    292 /*
    293  * So-far indeterminate ops that might belong to either
    294  * bufferio(9) or buffercache(9).
    295  */
    296 void	bremfree(buf_t *);
    297 void	bufinit(void);
    298 void	bufinit2(void);
    299 void	minphys(buf_t *);
    300 void	brelvp(buf_t *);
    301 void	reassignbuf(buf_t *, struct vnode *);
    302 void	bgetvp(struct vnode *, buf_t *);
    303 u_long	buf_memcalc(void);
    304 int	buf_drain(int);
    305 int	buf_setvalimit(vsize_t);
    306 #if defined(DDB) || defined(DEBUGPRINT)
    307 void	vfs_buf_print(buf_t *, int, void (*)(const char *, ...)
    308     __printflike(1, 2));
    309 #endif
    310 void	buf_init(buf_t *);
    311 void	buf_destroy(buf_t *);
    312 int	bbusy(buf_t *, bool, int, kmutex_t *);
    313 u_int	buf_nbuf(void);
    314 
    315 void	biohist_init(void);
    316 
    317 __END_DECLS
    318 #endif /* _KERNEL */
    319 #endif /* !_SYS_BUF_H_ */
    320