kern_physio.c revision 1.57 1 /* $NetBSD: kern_physio.c,v 1.57 2003/08/07 16:31:47 agc Exp $ */
2
3 /*-
4 * Copyright (c) 1982, 1986, 1990, 1993
5 * The Regents of the University of California. All rights reserved.
6 * (c) UNIX System Laboratories, Inc.
7 * All or some portions of this file are derived from material licensed
8 * to the University of California by American Telephone and Telegraph
9 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
10 * the permission of UNIX System Laboratories, Inc.
11 *
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 * 1. Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in the
19 * documentation and/or other materials provided with the distribution.
20 * 3. Neither the name of the University nor the names of its contributors
21 * may be used to endorse or promote products derived from this software
22 * without specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
35 *
36 * @(#)kern_physio.c 8.1 (Berkeley) 6/10/93
37 */
38
39 /*-
40 * Copyright (c) 1994 Christopher G. Demetriou
41 *
42 * Redistribution and use in source and binary forms, with or without
43 * modification, are permitted provided that the following conditions
44 * are met:
45 * 1. Redistributions of source code must retain the above copyright
46 * notice, this list of conditions and the following disclaimer.
47 * 2. Redistributions in binary form must reproduce the above copyright
48 * notice, this list of conditions and the following disclaimer in the
49 * documentation and/or other materials provided with the distribution.
50 * 3. All advertising materials mentioning features or use of this software
51 * must display the following acknowledgement:
52 * This product includes software developed by the University of
53 * California, Berkeley and its contributors.
54 * 4. Neither the name of the University nor the names of its contributors
55 * may be used to endorse or promote products derived from this software
56 * without specific prior written permission.
57 *
58 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
59 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
60 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
61 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
62 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
63 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
64 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
65 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
66 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
67 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
68 * SUCH DAMAGE.
69 *
70 * @(#)kern_physio.c 8.1 (Berkeley) 6/10/93
71 */
72
73 #include <sys/cdefs.h>
74 __KERNEL_RCSID(0, "$NetBSD: kern_physio.c,v 1.57 2003/08/07 16:31:47 agc Exp $");
75
76 #include <sys/param.h>
77 #include <sys/systm.h>
78 #include <sys/buf.h>
79 #include <sys/malloc.h>
80 #include <sys/proc.h>
81
82 #include <uvm/uvm_extern.h>
83
84 /*
85 * The routines implemented in this file are described in:
86 * Leffler, et al.: The Design and Implementation of the 4.3BSD
87 * UNIX Operating System (Addison Welley, 1989)
88 * on pages 231-233.
89 *
90 * The routines "getphysbuf" and "putphysbuf" steal and return a swap
91 * buffer. Leffler, et al., says that swap buffers are used to do the
92 * I/O, so raw I/O requests don't have to be single-threaded.
93 */
94
95 struct buf *getphysbuf __P((void));
96 void putphysbuf __P((struct buf *bp));
97
98 /*
99 * Do "physical I/O" on behalf of a user. "Physical I/O" is I/O directly
100 * from the raw device to user buffers, and bypasses the buffer cache.
101 *
102 * Comments in brackets are from Leffler, et al.'s pseudo-code implementation.
103 */
104 int
105 physio(strategy, bp, dev, flags, minphys, uio)
106 void (*strategy) __P((struct buf *));
107 struct buf *bp;
108 dev_t dev;
109 int flags;
110 void (*minphys) __P((struct buf *));
111 struct uio *uio;
112 {
113 struct iovec *iovp;
114 struct lwp *l = curlwp;
115 struct proc *p = l->l_proc;
116 int error, done, i, nobuf, s;
117 long todo;
118
119 error = 0;
120 flags &= B_READ | B_WRITE;
121
122 /* Make sure we have a buffer, creating one if necessary. */
123 if ((nobuf = (bp == NULL)) != 0) {
124
125 bp = getphysbuf();
126 /* bp was just malloc'd so can't already be busy */
127 bp->b_flags |= B_BUSY;
128
129 } else {
130
131 /* [raise the processor priority level to splbio;] */
132 s = splbio();
133
134 /* [while the buffer is marked busy] */
135 while (bp->b_flags & B_BUSY) {
136 /* [mark the buffer wanted] */
137 bp->b_flags |= B_WANTED;
138 /* [wait until the buffer is available] */
139 tsleep((caddr_t)bp, PRIBIO+1, "physbuf", 0);
140 }
141
142 /* Mark it busy, so nobody else will use it. */
143 bp->b_flags |= B_BUSY;
144
145 /* [lower the priority level] */
146 splx(s);
147 }
148
149 /* [set up the fixed part of the buffer for a transfer] */
150 bp->b_dev = dev;
151 bp->b_error = 0;
152 bp->b_proc = p;
153 LIST_INIT(&bp->b_dep);
154
155 /*
156 * [while there are data to transfer and no I/O error]
157 * Note that I/O errors are handled with a 'goto' at the bottom
158 * of the 'while' loop.
159 */
160 for (i = 0; i < uio->uio_iovcnt; i++) {
161 iovp = &uio->uio_iov[i];
162 while (iovp->iov_len > 0) {
163
164 /*
165 * [mark the buffer busy for physical I/O]
166 * (i.e. set B_PHYS (because it's an I/O to user
167 * memory, and B_RAW, because B_RAW is to be
168 * "Set by physio for raw transfers.", in addition
169 * to the "busy" and read/write flag.)
170 */
171 bp->b_flags = B_BUSY | B_PHYS | B_RAW | flags;
172
173 /* [set up the buffer for a maximum-sized transfer] */
174 bp->b_blkno = btodb(uio->uio_offset);
175 bp->b_bcount = iovp->iov_len;
176 bp->b_data = iovp->iov_base;
177
178 /*
179 * [call minphys to bound the transfer size]
180 * and remember the amount of data to transfer,
181 * for later comparison.
182 */
183 (*minphys)(bp);
184 todo = bp->b_bcount;
185 #ifdef DIAGNOSTIC
186 if (todo <= 0)
187 panic("todo(%ld) <= 0; minphys broken", todo);
188 if (todo > MAXPHYS)
189 panic("todo(%ld) > MAXPHYS; minphys broken",
190 todo);
191 #endif
192
193 /*
194 * [lock the part of the user address space involved
195 * in the transfer]
196 * Beware vmapbuf(); it clobbers b_data and
197 * saves it in b_saveaddr. However, vunmapbuf()
198 * restores it.
199 */
200 PHOLD(l);
201 error = uvm_vslock(p, bp->b_data, todo,
202 (flags & B_READ) ?
203 VM_PROT_WRITE : VM_PROT_READ);
204 if (error) {
205 bp->b_flags |= B_ERROR;
206 bp->b_error = error;
207 goto after_vsunlock;
208 }
209 vmapbuf(bp, todo);
210
211 /* [call strategy to start the transfer] */
212 (*strategy)(bp);
213
214 /*
215 * Note that the raise/wait/lower/get error
216 * steps below would be done by biowait(), but
217 * we want to unlock the address space before
218 * we lower the priority.
219 *
220 * [raise the priority level to splbio]
221 */
222 s = splbio();
223
224 /* [wait for the transfer to complete] */
225 while ((bp->b_flags & B_DONE) == 0)
226 tsleep((caddr_t) bp, PRIBIO + 1, "physio", 0);
227
228 /* Mark it busy again, so nobody else will use it. */
229 bp->b_flags |= B_BUSY;
230
231 /* [lower the priority level] */
232 splx(s);
233
234 /*
235 * [unlock the part of the address space previously
236 * locked]
237 */
238 vunmapbuf(bp, todo);
239 uvm_vsunlock(p, bp->b_data, todo);
240 after_vsunlock:
241 PRELE(l);
242
243 /* remember error value (save a splbio/splx pair) */
244 if (bp->b_flags & B_ERROR)
245 error = (bp->b_error ? bp->b_error : EIO);
246
247 /*
248 * [deduct the transfer size from the total number
249 * of data to transfer]
250 */
251 done = bp->b_bcount - bp->b_resid;
252 KASSERT(done >= 0);
253 KASSERT(done <= todo);
254
255 iovp->iov_len -= done;
256 iovp->iov_base = (caddr_t)iovp->iov_base + done;
257 uio->uio_offset += done;
258 uio->uio_resid -= done;
259
260 /*
261 * Now, check for an error.
262 * Also, handle weird end-of-disk semantics.
263 */
264 if (error || done < todo)
265 goto done;
266 }
267 }
268
269 done:
270 /*
271 * [clean up the state of the buffer]
272 * Remember if somebody wants it, so we can wake them up below.
273 * Also, if we had to steal it, give it back.
274 */
275 s = splbio();
276 bp->b_flags &= ~(B_BUSY | B_PHYS | B_RAW);
277 if (nobuf)
278 putphysbuf(bp);
279 else {
280 /*
281 * [if another process is waiting for the raw I/O buffer,
282 * wake up processes waiting to do physical I/O;
283 */
284 if (bp->b_flags & B_WANTED) {
285 bp->b_flags &= ~B_WANTED;
286 wakeup(bp);
287 }
288 }
289 splx(s);
290
291 return (error);
292 }
293
294 /*
295 * allocate a buffer structure for use in physical I/O.
296 */
297 struct buf *
298 getphysbuf()
299 {
300 struct buf *bp;
301 int s;
302
303 s = splbio();
304 bp = pool_get(&bufpool, PR_WAITOK);
305 splx(s);
306 memset(bp, 0, sizeof(*bp));
307 BUF_INIT(bp);
308 return(bp);
309 }
310
311 /*
312 * get rid of a swap buffer structure which has been used in physical I/O.
313 */
314 void
315 putphysbuf(bp)
316 struct buf *bp;
317 {
318 int s;
319
320 if (__predict_false(bp->b_flags & B_WANTED))
321 panic("putphysbuf: private buf B_WANTED");
322 s = splbio();
323 pool_put(&bufpool, bp);
324 splx(s);
325 }
326
327 /*
328 * Leffler, et al., says on p. 231:
329 * "The minphys() routine is called by physio() to adjust the
330 * size of each I/O transfer before the latter is passed to
331 * the strategy routine..."
332 *
333 * so, just adjust the buffer's count accounting to MAXPHYS here,
334 * and return the new count;
335 */
336 void
337 minphys(bp)
338 struct buf *bp;
339 {
340
341 if (bp->b_bcount > MAXPHYS)
342 bp->b_bcount = MAXPHYS;
343 }
344