ld_ataraid.c revision 1.11 1 /* $NetBSD: ld_ataraid.c,v 1.11 2004/04/22 00:17:10 itojun Exp $ */
2
3 /*
4 * Copyright (c) 2003 Wasabi Systems, Inc.
5 * All rights reserved.
6 *
7 * Written by Jason R. Thorpe for Wasabi Systems, Inc.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed for the NetBSD Project by
20 * Wasabi Systems, Inc.
21 * 4. The name of Wasabi Systems, Inc. may not be used to endorse
22 * or promote products derived from this software without specific prior
23 * written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
36 */
37
38 /*
39 * Support for ATA RAID logical disks.
40 *
41 * Note that all the RAID happens in software here; the ATA RAID
42 * controllers we're dealing with (Promise, etc.) only support
43 * configuration data on the component disks, with the BIOS supporting
44 * booting from the RAID volumes.
45 */
46
47 #include <sys/cdefs.h>
48 __KERNEL_RCSID(0, "$NetBSD: ld_ataraid.c,v 1.11 2004/04/22 00:17:10 itojun Exp $");
49
50 #include "rnd.h"
51
52 #include <sys/param.h>
53 #include <sys/systm.h>
54 #include <sys/conf.h>
55 #include <sys/kernel.h>
56 #include <sys/device.h>
57 #include <sys/buf.h>
58 #include <sys/dkio.h>
59 #include <sys/disk.h>
60 #include <sys/disklabel.h>
61 #include <sys/fcntl.h>
62 #include <sys/malloc.h>
63 #include <sys/vnode.h>
64 #if NRND > 0
65 #include <sys/rnd.h>
66 #endif
67
68 #include <miscfs/specfs/specdev.h>
69
70 #include <dev/ldvar.h>
71
72 #include <dev/ata/ata_raidvar.h>
73
74 struct ld_ataraid_softc {
75 struct ld_softc sc_ld;
76
77 struct ataraid_array_info *sc_aai;
78 struct vnode *sc_vnodes[ATA_RAID_MAX_DISKS];
79
80 void (*sc_iodone)(struct buf *);
81 };
82
83 static int ld_ataraid_match(struct device *, struct cfdata *, void *);
84 static void ld_ataraid_attach(struct device *, struct device *, void *);
85
86 static int ld_ataraid_dump(struct ld_softc *, void *, int, int);
87
88 static int ld_ataraid_start_span(struct ld_softc *, struct buf *);
89
90 static int ld_ataraid_start_raid0(struct ld_softc *, struct buf *);
91 static void ld_ataraid_iodone_raid0(struct buf *);
92
93 CFATTACH_DECL(ld_ataraid, sizeof(struct ld_ataraid_softc),
94 ld_ataraid_match, ld_ataraid_attach, NULL, NULL);
95
96 static int ld_ataraid_initialized;
97 static struct pool ld_ataraid_cbufpl;
98
99 struct cbuf {
100 struct buf cb_buf; /* new I/O buf */
101 struct buf *cb_obp; /* ptr. to original I/O buf */
102 struct ld_ataraid_softc *cb_sc; /* pointer to ld softc */
103 u_int cb_comp; /* target component */
104 SIMPLEQ_ENTRY(cbuf) cb_q; /* fifo of component buffers */
105 };
106
107 #define CBUF_GET() pool_get(&ld_ataraid_cbufpl, PR_NOWAIT);
108 #define CBUF_PUT(cbp) pool_put(&ld_ataraid_cbufpl, (cbp))
109
110 static int
111 ld_ataraid_match(struct device *parent, struct cfdata *match, void *aux)
112 {
113
114 return (1);
115 }
116
117 static void
118 ld_ataraid_attach(struct device *parent, struct device *self, void *aux)
119 {
120 struct ld_ataraid_softc *sc = (void *) self;
121 struct ld_softc *ld = &sc->sc_ld;
122 struct ataraid_array_info *aai = aux;
123 const char *level;
124 struct vnode *vp;
125 char unklev[32];
126 u_int i;
127
128 if (ld_ataraid_initialized == 0) {
129 ld_ataraid_initialized = 1;
130 pool_init(&ld_ataraid_cbufpl, sizeof(struct cbuf), 0,
131 0, 0, "ldcbuf", NULL);
132 }
133
134 sc->sc_aai = aai; /* this data persists */
135
136 ld->sc_maxxfer = MAXPHYS * aai->aai_width; /* XXX */
137 ld->sc_secperunit = aai->aai_capacity;
138 ld->sc_secsize = 512; /* XXX */
139 ld->sc_maxqueuecnt = 128; /* XXX */
140 ld->sc_dump = ld_ataraid_dump;
141
142 switch (aai->aai_level) {
143 case AAI_L_SPAN:
144 level = "SPAN";
145 ld->sc_start = ld_ataraid_start_span;
146 sc->sc_iodone = ld_ataraid_iodone_raid0;
147 break;
148
149 case AAI_L_RAID0:
150 level = "RAID-0";
151 ld->sc_start = ld_ataraid_start_raid0;
152 sc->sc_iodone = ld_ataraid_iodone_raid0;
153 break;
154
155 case AAI_L_RAID1:
156 level = "RAID-1";
157 break;
158
159 case AAI_L_RAID0 | AAI_L_RAID1:
160 level = "RAID-10";
161 break;
162
163 default:
164 snprintf(unklev, sizeof(unklev), "<unknown level 0x%x>",
165 aai->aai_level);
166 level = unklev;
167 }
168
169 aprint_naive(": ATA %s array\n", level);
170 aprint_normal(": %s ATA %s array\n",
171 ata_raid_type_name(aai->aai_type), level);
172
173 if (ld->sc_start == NULL) {
174 aprint_error("%s: unsupported array type\n",
175 ld->sc_dv.dv_xname);
176 return;
177 }
178
179 /*
180 * We get a geometry from the device; use it.
181 */
182 ld->sc_nheads = aai->aai_heads;
183 ld->sc_nsectors = aai->aai_sectors;
184 ld->sc_ncylinders = aai->aai_cylinders;
185
186 /*
187 * Configure all the component disks.
188 */
189 for (i = 0; i < aai->aai_ndisks; i++) {
190 struct ataraid_disk_info *adi = &aai->aai_disks[i];
191 int bmajor, error;
192 dev_t dev;
193
194 bmajor = devsw_name2blk(adi->adi_dev->dv_xname, NULL, 0);
195 dev = MAKEDISKDEV(bmajor, adi->adi_dev->dv_unit, RAW_PART);
196 error = bdevvp(dev, &vp);
197 if (error)
198 break;
199 error = VOP_OPEN(vp, FREAD|FWRITE, NOCRED, 0);
200 if (error) {
201 vput(vp);
202 /*
203 * XXX This is bogus. We should just mark the
204 * XXX component as FAILED, and write-back new
205 * XXX config blocks.
206 */
207 break;
208 }
209
210 VOP_UNLOCK(vp, 0);
211 sc->sc_vnodes[i] = vp;
212 }
213 if (i == aai->aai_ndisks) {
214 ld->sc_flags = LDF_ENABLED;
215 goto finish;
216 }
217
218 for (i = 0; i < aai->aai_ndisks; i++) {
219 vp = sc->sc_vnodes[i];
220 sc->sc_vnodes[i] = NULL;
221 if (vp != NULL)
222 (void) vn_close(vp, FREAD|FWRITE, NOCRED, curproc);
223 }
224
225 finish:
226 ldattach(ld);
227 }
228
229 static struct cbuf *
230 ld_ataraid_make_cbuf(struct ld_ataraid_softc *sc, struct buf *bp,
231 u_int comp, daddr_t bn, caddr_t addr, long bcount)
232 {
233 struct cbuf *cbp;
234
235 cbp = CBUF_GET();
236 if (cbp == NULL)
237 return (NULL);
238 BUF_INIT(&cbp->cb_buf);
239 cbp->cb_buf.b_flags = bp->b_flags | B_CALL;
240 cbp->cb_buf.b_iodone = sc->sc_iodone;
241 cbp->cb_buf.b_proc = bp->b_proc;
242 cbp->cb_buf.b_vp = sc->sc_vnodes[comp];
243 cbp->cb_buf.b_blkno = bn + sc->sc_aai->aai_offset;
244 cbp->cb_buf.b_data = addr;
245 cbp->cb_buf.b_bcount = bcount;
246
247 /* Context for iodone */
248 cbp->cb_obp = bp;
249 cbp->cb_sc = sc;
250 cbp->cb_comp = comp;
251
252 return (cbp);
253 }
254
255 static int
256 ld_ataraid_start_span(struct ld_softc *ld, struct buf *bp)
257 {
258 struct ld_ataraid_softc *sc = (void *) ld;
259 struct ataraid_array_info *aai = sc->sc_aai;
260 struct ataraid_disk_info *adi;
261 SIMPLEQ_HEAD(, cbuf) cbufq;
262 struct cbuf *cbp;
263 caddr_t addr;
264 daddr_t bn;
265 long bcount, rcount;
266 u_int comp;
267
268 /* Allocate component buffers. */
269 SIMPLEQ_INIT(&cbufq);
270 addr = bp->b_data;
271
272 /* Find the first component. */
273 comp = 0;
274 adi = &aai->aai_disks[comp];
275 bn = bp->b_rawblkno;
276 while (bn >= adi->adi_compsize) {
277 bn -= adi->adi_compsize;
278 adi = &aai->aai_disks[++comp];
279 }
280
281 bp->b_resid = bp->b_bcount;
282
283 for (bcount = bp->b_bcount; bcount > 0; bcount -= rcount) {
284 rcount = bp->b_bcount;
285 if ((adi->adi_compsize - bn) < btodb(rcount))
286 rcount = dbtob(adi->adi_compsize - bn);
287
288 cbp = ld_ataraid_make_cbuf(sc, bp, comp, bn, addr, rcount);
289 if (cbp == NULL) {
290 /* Free the already allocated component buffers. */
291 while ((cbp = SIMPLEQ_FIRST(&cbufq)) != NULL) {
292 SIMPLEQ_REMOVE_HEAD(&cbufq, cb_q);
293 CBUF_PUT(cbp);
294 }
295 return (EAGAIN);
296 }
297
298 /*
299 * For a span, we always know we advance to the next disk,
300 * and always start at offset 0 on that disk.
301 */
302 adi = &aai->aai_disks[++comp];
303 bn = 0;
304
305 SIMPLEQ_INSERT_TAIL(&cbufq, cbp, cb_q);
306 addr += rcount;
307 }
308
309 /* Now fire off the requests. */
310 while ((cbp = SIMPLEQ_FIRST(&cbufq)) != NULL) {
311 SIMPLEQ_REMOVE_HEAD(&cbufq, cb_q);
312 if ((cbp->cb_buf.b_flags & B_READ) == 0)
313 cbp->cb_buf.b_vp->v_numoutput++;
314 VOP_STRATEGY(cbp->cb_buf.b_vp, &cbp->cb_buf);
315 }
316
317 return (0);
318 }
319
320 static int
321 ld_ataraid_start_raid0(struct ld_softc *ld, struct buf *bp)
322 {
323 struct ld_ataraid_softc *sc = (void *) ld;
324 struct ataraid_array_info *aai = sc->sc_aai;
325 SIMPLEQ_HEAD(, cbuf) cbufq;
326 struct cbuf *cbp;
327 caddr_t addr;
328 daddr_t bn, cbn, tbn, off;
329 long bcount, rcount;
330 u_int comp;
331
332 /* Allocate component buffers. */
333 SIMPLEQ_INIT(&cbufq);
334 addr = bp->b_data;
335 bn = bp->b_rawblkno;
336
337 bp->b_resid = bp->b_bcount;
338
339 for (bcount = bp->b_bcount; bcount > 0; bcount -= rcount) {
340 tbn = bn / aai->aai_interleave;
341 off = bn % aai->aai_interleave;
342
343 if (__predict_false(tbn == aai->aai_capacity /
344 aai->aai_interleave)) {
345 /* Last stripe. */
346 daddr_t sz = (aai->aai_capacity -
347 (tbn * aai->aai_interleave)) /
348 aai->aai_width;
349 comp = off / sz;
350 cbn = ((tbn / aai->aai_width) * aai->aai_interleave) +
351 (off % sz);
352 rcount = min(bcount, dbtob(sz));
353 } else {
354 comp = tbn % aai->aai_width;
355 cbn = ((tbn / aai->aai_width) * aai->aai_interleave) +
356 off;
357 rcount = min(bcount, dbtob(aai->aai_interleave - off));
358 }
359
360 cbp = ld_ataraid_make_cbuf(sc, bp, comp, cbn, addr, rcount);
361 if (cbp == NULL) {
362 /* Free the already allocated component buffers. */
363 while ((cbp = SIMPLEQ_FIRST(&cbufq)) != NULL) {
364 SIMPLEQ_REMOVE_HEAD(&cbufq, cb_q);
365 CBUF_PUT(cbp);
366 }
367 return (EAGAIN);
368 }
369 SIMPLEQ_INSERT_TAIL(&cbufq, cbp, cb_q);
370 bn += btodb(rcount);
371 addr += rcount;
372 }
373
374 /* Now fire off the requests. */
375 while ((cbp = SIMPLEQ_FIRST(&cbufq)) != NULL) {
376 SIMPLEQ_REMOVE_HEAD(&cbufq, cb_q);
377 if ((cbp->cb_buf.b_flags & B_READ) == 0)
378 cbp->cb_buf.b_vp->v_numoutput++;
379 VOP_STRATEGY(cbp->cb_buf.b_vp, &cbp->cb_buf);
380 }
381
382 return (0);
383 }
384
385 /*
386 * Called at interrupt time. Mark the component as done and if all
387 * components are done, take an "interrupt".
388 */
389 static void
390 ld_ataraid_iodone_raid0(struct buf *vbp)
391 {
392 struct cbuf *cbp = (struct cbuf *) vbp;
393 struct buf *bp = cbp->cb_obp;
394 struct ld_ataraid_softc *sc = cbp->cb_sc;
395 long count;
396 int s;
397
398 s = splbio();
399
400 if (cbp->cb_buf.b_flags & B_ERROR) {
401 bp->b_flags |= B_ERROR;
402 bp->b_error = cbp->cb_buf.b_error ?
403 cbp->cb_buf.b_error : EIO;
404
405 /* XXX Update component config blocks. */
406
407 printf("%s: error %d on component %d\n",
408 sc->sc_ld.sc_dv.dv_xname, bp->b_error, cbp->cb_comp);
409 }
410 count = cbp->cb_buf.b_bcount;
411 CBUF_PUT(cbp);
412
413 /* If all done, "interrupt". */
414 bp->b_resid -= count;
415 if (bp->b_resid < 0)
416 panic("ld_ataraid_iodone_raid0: count");
417 if (bp->b_resid == 0)
418 lddone(&sc->sc_ld, bp);
419 splx(s);
420 }
421
422 static int
423 ld_ataraid_dump(struct ld_softc *sc, void *data, int blkno, int blkcnt)
424 {
425
426 return (EIO);
427 }
428