rf_netbsdkintf.c revision 1.34 1 /* $NetBSD: rf_netbsdkintf.c,v 1.34 1999/12/12 20:51:41 oster Exp $ */
2 /*-
3 * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
4 * All rights reserved.
5 *
6 * This code is derived from software contributed to The NetBSD Foundation
7 * by Greg Oster; Jason R. Thorpe.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed by the NetBSD
20 * Foundation, Inc. and its contributors.
21 * 4. Neither the name of The NetBSD Foundation nor the names of its
22 * contributors may be used to endorse or promote products derived
23 * from this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
26 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
36 */
37
38 /*
39 * Copyright (c) 1988 University of Utah.
40 * Copyright (c) 1990, 1993
41 * The Regents of the University of California. All rights reserved.
42 *
43 * This code is derived from software contributed to Berkeley by
44 * the Systems Programming Group of the University of Utah Computer
45 * Science Department.
46 *
47 * Redistribution and use in source and binary forms, with or without
48 * modification, are permitted provided that the following conditions
49 * are met:
50 * 1. Redistributions of source code must retain the above copyright
51 * notice, this list of conditions and the following disclaimer.
52 * 2. Redistributions in binary form must reproduce the above copyright
53 * notice, this list of conditions and the following disclaimer in the
54 * documentation and/or other materials provided with the distribution.
55 * 3. All advertising materials mentioning features or use of this software
56 * must display the following acknowledgement:
57 * This product includes software developed by the University of
58 * California, Berkeley and its contributors.
59 * 4. Neither the name of the University nor the names of its contributors
60 * may be used to endorse or promote products derived from this software
61 * without specific prior written permission.
62 *
63 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
64 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
65 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
66 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
67 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
68 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
69 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
70 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
71 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
72 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
73 * SUCH DAMAGE.
74 *
75 * from: Utah $Hdr: cd.c 1.6 90/11/28$
76 *
77 * @(#)cd.c 8.2 (Berkeley) 11/16/93
78 */
79
80
81
82
83 /*
84 * Copyright (c) 1995 Carnegie-Mellon University.
85 * All rights reserved.
86 *
87 * Authors: Mark Holland, Jim Zelenka
88 *
89 * Permission to use, copy, modify and distribute this software and
90 * its documentation is hereby granted, provided that both the copyright
91 * notice and this permission notice appear in all copies of the
92 * software, derivative works or modified versions, and any portions
93 * thereof, and that both notices appear in supporting documentation.
94 *
95 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
96 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
97 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
98 *
99 * Carnegie Mellon requests users of this software to return to
100 *
101 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
102 * School of Computer Science
103 * Carnegie Mellon University
104 * Pittsburgh PA 15213-3890
105 *
106 * any improvements or extensions that they make and grant Carnegie the
107 * rights to redistribute these changes.
108 */
109
110 /***********************************************************
111 *
112 * rf_kintf.c -- the kernel interface routines for RAIDframe
113 *
114 ***********************************************************/
115
116 #include <sys/errno.h>
117 #include <sys/param.h>
118 #include <sys/pool.h>
119 #include <sys/queue.h>
120 #include <sys/disk.h>
121 #include <sys/device.h>
122 #include <sys/stat.h>
123 #include <sys/ioctl.h>
124 #include <sys/fcntl.h>
125 #include <sys/systm.h>
126 #include <sys/namei.h>
127 #include <sys/vnode.h>
128 #include <sys/param.h>
129 #include <sys/types.h>
130 #include <machine/types.h>
131 #include <sys/disklabel.h>
132 #include <sys/conf.h>
133 #include <sys/lock.h>
134 #include <sys/buf.h>
135 #include <sys/user.h>
136
137 #include "raid.h"
138 #include "rf_raid.h"
139 #include "rf_raidframe.h"
140 #include "rf_dag.h"
141 #include "rf_dagflags.h"
142 #include "rf_diskqueue.h"
143 #include "rf_acctrace.h"
144 #include "rf_etimer.h"
145 #include "rf_general.h"
146 #include "rf_debugMem.h"
147 #include "rf_kintf.h"
148 #include "rf_options.h"
149 #include "rf_driver.h"
150 #include "rf_parityscan.h"
151 #include "rf_debugprint.h"
152 #include "rf_threadstuff.h"
153
154 int rf_kdebug_level = 0;
155
156 #ifdef DEBUG
157 #define db0_printf(a) printf a
158 #define db_printf(a) if (rf_kdebug_level > 0) printf a
159 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
160 #define db2_printf(a) if (rf_kdebug_level > 1) printf a
161 #define db3_printf(a) if (rf_kdebug_level > 2) printf a
162 #define db4_printf(a) if (rf_kdebug_level > 3) printf a
163 #define db5_printf(a) if (rf_kdebug_level > 4) printf a
164 #else /* DEBUG */
165 #define db0_printf(a) printf a
166 #define db1_printf(a) { }
167 #define db2_printf(a) { }
168 #define db3_printf(a) { }
169 #define db4_printf(a) { }
170 #define db5_printf(a) { }
171 #endif /* DEBUG */
172
173 static RF_Raid_t **raidPtrs; /* global raid device descriptors */
174
175 RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex)
176
177 static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a
178 * spare table */
179 static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from
180 * installation process */
181
182 static struct rf_recon_req *recon_queue = NULL; /* used to communicate
183 * reconstruction
184 * requests */
185
186
187 decl_simple_lock_data(, recon_queue_mutex)
188 #define LOCK_RECON_Q_MUTEX() simple_lock(&recon_queue_mutex)
189 #define UNLOCK_RECON_Q_MUTEX() simple_unlock(&recon_queue_mutex)
190
191 /* prototypes */
192 static void KernelWakeupFunc(struct buf * bp);
193 static void InitBP(struct buf * bp, struct vnode *, unsigned rw_flag,
194 dev_t dev, RF_SectorNum_t startSect,
195 RF_SectorCount_t numSect, caddr_t buf,
196 void (*cbFunc) (struct buf *), void *cbArg,
197 int logBytesPerSector, struct proc * b_proc);
198
199 #define Dprintf0(s) if (rf_queueDebug) \
200 rf_debug_printf(s,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL)
201 #define Dprintf1(s,a) if (rf_queueDebug) \
202 rf_debug_printf(s,a,NULL,NULL,NULL,NULL,NULL,NULL,NULL)
203 #define Dprintf2(s,a,b) if (rf_queueDebug) \
204 rf_debug_printf(s,a,b,NULL,NULL,NULL,NULL,NULL,NULL)
205 #define Dprintf3(s,a,b,c) if (rf_queueDebug) \
206 rf_debug_printf(s,a,b,c,NULL,NULL,NULL,NULL,NULL)
207
208 int raidmarkclean(dev_t dev, struct vnode *b_vp, int);
209 int raidmarkdirty(dev_t dev, struct vnode *b_vp, int);
210
211 void raidattach __P((int));
212 int raidsize __P((dev_t));
213
214 void rf_DiskIOComplete(RF_DiskQueue_t *, RF_DiskQueueData_t *, int);
215 void rf_CopybackReconstructedData(RF_Raid_t * raidPtr);
216 static int raidinit __P((dev_t, RF_Raid_t *, int));
217
218 int raidopen __P((dev_t, int, int, struct proc *));
219 int raidclose __P((dev_t, int, int, struct proc *));
220 int raidioctl __P((dev_t, u_long, caddr_t, int, struct proc *));
221 int raidwrite __P((dev_t, struct uio *, int));
222 int raidread __P((dev_t, struct uio *, int));
223 void raidstrategy __P((struct buf *));
224 int raiddump __P((dev_t, daddr_t, caddr_t, size_t));
225
226 int raidwrite_component_label(dev_t, struct vnode *, RF_ComponentLabel_t *);
227 int raidread_component_label(dev_t, struct vnode *, RF_ComponentLabel_t *);
228 void rf_update_component_labels( RF_Raid_t *);
229 /*
230 * Pilfered from ccd.c
231 */
232
233 struct raidbuf {
234 struct buf rf_buf; /* new I/O buf. MUST BE FIRST!!! */
235 struct buf *rf_obp; /* ptr. to original I/O buf */
236 int rf_flags; /* misc. flags */
237 RF_DiskQueueData_t *req;/* the request that this was part of.. */
238 };
239
240
241 #define RAIDGETBUF(rs) pool_get(&(rs)->sc_cbufpool, PR_NOWAIT)
242 #define RAIDPUTBUF(rs, cbp) pool_put(&(rs)->sc_cbufpool, cbp)
243
244 /* XXX Not sure if the following should be replacing the raidPtrs above,
245 or if it should be used in conjunction with that... */
246
247 struct raid_softc {
248 int sc_flags; /* flags */
249 int sc_cflags; /* configuration flags */
250 size_t sc_size; /* size of the raid device */
251 dev_t sc_dev; /* our device.. */
252 char sc_xname[20]; /* XXX external name */
253 struct disk sc_dkdev; /* generic disk device info */
254 struct pool sc_cbufpool; /* component buffer pool */
255 struct buf buf_queue; /* used for the device queue */
256 };
257 /* sc_flags */
258 #define RAIDF_INITED 0x01 /* unit has been initialized */
259 #define RAIDF_WLABEL 0x02 /* label area is writable */
260 #define RAIDF_LABELLING 0x04 /* unit is currently being labelled */
261 #define RAIDF_WANTED 0x40 /* someone is waiting to obtain a lock */
262 #define RAIDF_LOCKED 0x80 /* unit is locked */
263
264 #define raidunit(x) DISKUNIT(x)
265 static int numraid = 0;
266
267 /*
268 * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
269 * Be aware that large numbers can allow the driver to consume a lot of
270 * kernel memory, especially on writes, and in degraded mode reads.
271 *
272 * For example: with a stripe width of 64 blocks (32k) and 5 disks,
273 * a single 64K write will typically require 64K for the old data,
274 * 64K for the old parity, and 64K for the new parity, for a total
275 * of 192K (if the parity buffer is not re-used immediately).
276 * Even it if is used immedately, that's still 128K, which when multiplied
277 * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
278 *
279 * Now in degraded mode, for example, a 64K read on the above setup may
280 * require data reconstruction, which will require *all* of the 4 remaining
281 * disks to participate -- 4 * 32K/disk == 128K again.
282 */
283
284 #ifndef RAIDOUTSTANDING
285 #define RAIDOUTSTANDING 6
286 #endif
287
288 #define RAIDLABELDEV(dev) \
289 (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
290
291 /* declared here, and made public, for the benefit of KVM stuff.. */
292 struct raid_softc *raid_softc;
293
294 static void raidgetdefaultlabel __P((RF_Raid_t *, struct raid_softc *,
295 struct disklabel *));
296 static void raidgetdisklabel __P((dev_t));
297 static void raidmakedisklabel __P((struct raid_softc *));
298
299 static int raidlock __P((struct raid_softc *));
300 static void raidunlock __P((struct raid_softc *));
301 int raidlookup __P((char *, struct proc * p, struct vnode **));
302
303 static void rf_markalldirty __P((RF_Raid_t *));
304
305 void
306 raidattach(num)
307 int num;
308 {
309 int raidID;
310 int i, rc;
311
312 #ifdef DEBUG
313 printf("raidattach: Asked for %d units\n", num);
314 #endif
315
316 if (num <= 0) {
317 #ifdef DIAGNOSTIC
318 panic("raidattach: count <= 0");
319 #endif
320 return;
321 }
322 /* This is where all the initialization stuff gets done. */
323
324 /* Make some space for requested number of units... */
325
326 RF_Calloc(raidPtrs, num, sizeof(RF_Raid_t *), (RF_Raid_t **));
327 if (raidPtrs == NULL) {
328 panic("raidPtrs is NULL!!\n");
329 }
330
331 rc = rf_mutex_init(&rf_sparet_wait_mutex);
332 if (rc) {
333 RF_PANIC();
334 }
335
336 rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
337 recon_queue = NULL;
338
339 for (i = 0; i < numraid; i++)
340 raidPtrs[i] = NULL;
341 rc = rf_BootRaidframe();
342 if (rc == 0)
343 printf("Kernelized RAIDframe activated\n");
344 else
345 panic("Serious error booting RAID!!\n");
346
347 /* put together some datastructures like the CCD device does.. This
348 * lets us lock the device and what-not when it gets opened. */
349
350 raid_softc = (struct raid_softc *)
351 malloc(num * sizeof(struct raid_softc),
352 M_RAIDFRAME, M_NOWAIT);
353 if (raid_softc == NULL) {
354 printf("WARNING: no memory for RAIDframe driver\n");
355 return;
356 }
357 numraid = num;
358 bzero(raid_softc, num * sizeof(struct raid_softc));
359 raid_softc->buf_queue.b_actf = NULL;
360 raid_softc->buf_queue.b_actb = &raid_softc->buf_queue.b_actf;
361
362 for (raidID = 0; raidID < num; raidID++) {
363 RF_Calloc(raidPtrs[raidID], 1, sizeof(RF_Raid_t),
364 (RF_Raid_t *));
365 if (raidPtrs[raidID] == NULL) {
366 printf("raidPtrs[%d] is NULL\n", raidID);
367 }
368 }
369 }
370
371
372 int
373 raidsize(dev)
374 dev_t dev;
375 {
376 struct raid_softc *rs;
377 struct disklabel *lp;
378 int part, unit, omask, size;
379
380 unit = raidunit(dev);
381 if (unit >= numraid)
382 return (-1);
383 rs = &raid_softc[unit];
384
385 if ((rs->sc_flags & RAIDF_INITED) == 0)
386 return (-1);
387
388 part = DISKPART(dev);
389 omask = rs->sc_dkdev.dk_openmask & (1 << part);
390 lp = rs->sc_dkdev.dk_label;
391
392 if (omask == 0 && raidopen(dev, 0, S_IFBLK, curproc))
393 return (-1);
394
395 if (lp->d_partitions[part].p_fstype != FS_SWAP)
396 size = -1;
397 else
398 size = lp->d_partitions[part].p_size *
399 (lp->d_secsize / DEV_BSIZE);
400
401 if (omask == 0 && raidclose(dev, 0, S_IFBLK, curproc))
402 return (-1);
403
404 return (size);
405
406 }
407
408 int
409 raiddump(dev, blkno, va, size)
410 dev_t dev;
411 daddr_t blkno;
412 caddr_t va;
413 size_t size;
414 {
415 /* Not implemented. */
416 return ENXIO;
417 }
418 /* ARGSUSED */
419 int
420 raidopen(dev, flags, fmt, p)
421 dev_t dev;
422 int flags, fmt;
423 struct proc *p;
424 {
425 int unit = raidunit(dev);
426 struct raid_softc *rs;
427 struct disklabel *lp;
428 int part, pmask;
429 int error = 0;
430
431 if (unit >= numraid)
432 return (ENXIO);
433 rs = &raid_softc[unit];
434
435 if ((error = raidlock(rs)) != 0)
436 return (error);
437 lp = rs->sc_dkdev.dk_label;
438
439 part = DISKPART(dev);
440 pmask = (1 << part);
441
442 db1_printf(("Opening raid device number: %d partition: %d\n",
443 unit, part));
444
445
446 if ((rs->sc_flags & RAIDF_INITED) &&
447 (rs->sc_dkdev.dk_openmask == 0))
448 raidgetdisklabel(dev);
449
450 /* make sure that this partition exists */
451
452 if (part != RAW_PART) {
453 db1_printf(("Not a raw partition..\n"));
454 if (((rs->sc_flags & RAIDF_INITED) == 0) ||
455 ((part >= lp->d_npartitions) ||
456 (lp->d_partitions[part].p_fstype == FS_UNUSED))) {
457 error = ENXIO;
458 raidunlock(rs);
459 db1_printf(("Bailing out...\n"));
460 return (error);
461 }
462 }
463 /* Prevent this unit from being unconfigured while open. */
464 switch (fmt) {
465 case S_IFCHR:
466 rs->sc_dkdev.dk_copenmask |= pmask;
467 break;
468
469 case S_IFBLK:
470 rs->sc_dkdev.dk_bopenmask |= pmask;
471 break;
472 }
473
474 if ((rs->sc_dkdev.dk_openmask == 0) &&
475 ((rs->sc_flags & RAIDF_INITED) != 0)) {
476 /* First one... mark things as dirty... Note that we *MUST*
477 have done a configure before this. I DO NOT WANT TO BE
478 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
479 THAT THEY BELONG TOGETHER!!!!! */
480 /* XXX should check to see if we're only open for reading
481 here... If so, we needn't do this, but then need some
482 other way of keeping track of what's happened.. */
483
484 rf_markalldirty( raidPtrs[unit] );
485 }
486
487
488 rs->sc_dkdev.dk_openmask =
489 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
490
491 raidunlock(rs);
492
493 return (error);
494
495
496 }
497 /* ARGSUSED */
498 int
499 raidclose(dev, flags, fmt, p)
500 dev_t dev;
501 int flags, fmt;
502 struct proc *p;
503 {
504 int unit = raidunit(dev);
505 struct raid_softc *rs;
506 int error = 0;
507 int part;
508
509 if (unit >= numraid)
510 return (ENXIO);
511 rs = &raid_softc[unit];
512
513 if ((error = raidlock(rs)) != 0)
514 return (error);
515
516 part = DISKPART(dev);
517
518 /* ...that much closer to allowing unconfiguration... */
519 switch (fmt) {
520 case S_IFCHR:
521 rs->sc_dkdev.dk_copenmask &= ~(1 << part);
522 break;
523
524 case S_IFBLK:
525 rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
526 break;
527 }
528 rs->sc_dkdev.dk_openmask =
529 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
530
531 if ((rs->sc_dkdev.dk_openmask == 0) &&
532 ((rs->sc_flags & RAIDF_INITED) != 0)) {
533 /* Last one... device is not unconfigured yet.
534 Device shutdown has taken care of setting the
535 clean bits if RAIDF_INITED is not set
536 mark things as clean... */
537 rf_update_component_labels( raidPtrs[unit] );
538 }
539
540 raidunlock(rs);
541 return (0);
542
543 }
544
545 void
546 raidstrategy(bp)
547 register struct buf *bp;
548 {
549 register int s;
550
551 unsigned int raidID = raidunit(bp->b_dev);
552 RF_Raid_t *raidPtr;
553 struct raid_softc *rs = &raid_softc[raidID];
554 struct disklabel *lp;
555 struct buf *dp;
556 int wlabel;
557
558 #if 0
559 db1_printf(("Strategy: 0x%x 0x%x\n", bp, bp->b_data));
560 db1_printf(("Strategy(2): bp->b_bufsize%d\n", (int) bp->b_bufsize));
561 db1_printf(("bp->b_count=%d\n", (int) bp->b_bcount));
562 db1_printf(("bp->b_resid=%d\n", (int) bp->b_resid));
563 db1_printf(("bp->b_blkno=%d\n", (int) bp->b_blkno));
564
565 if (bp->b_flags & B_READ)
566 db1_printf(("READ\n"));
567 else
568 db1_printf(("WRITE\n"));
569 #endif
570 if ((rs->sc_flags & RAIDF_INITED) ==0) {
571 bp->b_error = ENXIO;
572 bp->b_flags = B_ERROR;
573 bp->b_resid = bp->b_bcount;
574 biodone(bp);
575 return;
576 }
577 if (raidID >= numraid || !raidPtrs[raidID]) {
578 bp->b_error = ENODEV;
579 bp->b_flags |= B_ERROR;
580 bp->b_resid = bp->b_bcount;
581 biodone(bp);
582 return;
583 }
584 raidPtr = raidPtrs[raidID];
585 if (!raidPtr->valid) {
586 bp->b_error = ENODEV;
587 bp->b_flags |= B_ERROR;
588 bp->b_resid = bp->b_bcount;
589 biodone(bp);
590 return;
591 }
592 if (bp->b_bcount == 0) {
593 db1_printf(("b_bcount is zero..\n"));
594 biodone(bp);
595 return;
596 }
597 lp = rs->sc_dkdev.dk_label;
598
599 /*
600 * Do bounds checking and adjust transfer. If there's an
601 * error, the bounds check will flag that for us.
602 */
603
604 wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
605 if (DISKPART(bp->b_dev) != RAW_PART)
606 if (bounds_check_with_label(bp, lp, wlabel) <= 0) {
607 db1_printf(("Bounds check failed!!:%d %d\n",
608 (int) bp->b_blkno, (int) wlabel));
609 biodone(bp);
610 return;
611 }
612 s = splbio();
613
614 bp->b_resid = 0;
615
616 /* stuff it onto our queue */
617
618 dp = &rs->buf_queue;
619 bp->b_actf = NULL;
620 bp->b_actb = dp->b_actb;
621 *dp->b_actb = bp;
622 dp->b_actb = &bp->b_actf;
623
624 raidstart(raidPtrs[raidID]);
625
626 splx(s);
627 }
628 /* ARGSUSED */
629 int
630 raidread(dev, uio, flags)
631 dev_t dev;
632 struct uio *uio;
633 int flags;
634 {
635 int unit = raidunit(dev);
636 struct raid_softc *rs;
637 int part;
638
639 if (unit >= numraid)
640 return (ENXIO);
641 rs = &raid_softc[unit];
642
643 if ((rs->sc_flags & RAIDF_INITED) == 0)
644 return (ENXIO);
645 part = DISKPART(dev);
646
647 db1_printf(("raidread: unit: %d partition: %d\n", unit, part));
648
649 return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
650
651 }
652 /* ARGSUSED */
653 int
654 raidwrite(dev, uio, flags)
655 dev_t dev;
656 struct uio *uio;
657 int flags;
658 {
659 int unit = raidunit(dev);
660 struct raid_softc *rs;
661
662 if (unit >= numraid)
663 return (ENXIO);
664 rs = &raid_softc[unit];
665
666 if ((rs->sc_flags & RAIDF_INITED) == 0)
667 return (ENXIO);
668 db1_printf(("raidwrite\n"));
669 return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
670
671 }
672
673 int
674 raidioctl(dev, cmd, data, flag, p)
675 dev_t dev;
676 u_long cmd;
677 caddr_t data;
678 int flag;
679 struct proc *p;
680 {
681 int unit = raidunit(dev);
682 int error = 0;
683 int part, pmask;
684 struct raid_softc *rs;
685 RF_Config_t *k_cfg, *u_cfg;
686 u_char *specific_buf;
687 int retcode = 0;
688 int row;
689 int column;
690 int s;
691 struct rf_recon_req *rrcopy, *rr;
692 RF_ComponentLabel_t *component_label;
693 RF_ComponentLabel_t ci_label;
694 RF_ComponentLabel_t **c_label_ptr;
695 RF_SingleComponent_t *sparePtr,*componentPtr;
696 RF_SingleComponent_t hot_spare;
697 RF_SingleComponent_t component;
698
699 if (unit >= numraid)
700 return (ENXIO);
701 rs = &raid_softc[unit];
702
703 db1_printf(("raidioctl: %d %d %d %d\n", (int) dev,
704 (int) DISKPART(dev), (int) unit, (int) cmd));
705
706 /* Must be open for writes for these commands... */
707 switch (cmd) {
708 case DIOCSDINFO:
709 case DIOCWDINFO:
710 case DIOCWLABEL:
711 if ((flag & FWRITE) == 0)
712 return (EBADF);
713 }
714
715 /* Must be initialized for these... */
716 switch (cmd) {
717 case DIOCGDINFO:
718 case DIOCSDINFO:
719 case DIOCWDINFO:
720 case DIOCGPART:
721 case DIOCWLABEL:
722 case DIOCGDEFLABEL:
723 case RAIDFRAME_SHUTDOWN:
724 case RAIDFRAME_REWRITEPARITY:
725 case RAIDFRAME_GET_INFO:
726 case RAIDFRAME_RESET_ACCTOTALS:
727 case RAIDFRAME_GET_ACCTOTALS:
728 case RAIDFRAME_KEEP_ACCTOTALS:
729 case RAIDFRAME_GET_SIZE:
730 case RAIDFRAME_FAIL_DISK:
731 case RAIDFRAME_COPYBACK:
732 case RAIDFRAME_CHECKRECON:
733 case RAIDFRAME_GET_COMPONENT_LABEL:
734 case RAIDFRAME_SET_COMPONENT_LABEL:
735 case RAIDFRAME_ADD_HOT_SPARE:
736 case RAIDFRAME_REMOVE_HOT_SPARE:
737 case RAIDFRAME_INIT_LABELS:
738 case RAIDFRAME_REBUILD_IN_PLACE:
739 case RAIDFRAME_CHECK_PARITY:
740 if ((rs->sc_flags & RAIDF_INITED) == 0)
741 return (ENXIO);
742 }
743
744 switch (cmd) {
745
746
747 /* configure the system */
748 case RAIDFRAME_CONFIGURE:
749
750 db3_printf(("rf_ioctl: RAIDFRAME_CONFIGURE\n"));
751 /* copy-in the configuration information */
752 /* data points to a pointer to the configuration structure */
753 u_cfg = *((RF_Config_t **) data);
754 RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
755 if (k_cfg == NULL) {
756 db3_printf(("rf_ioctl: ENOMEM for config. Code is %d\n", retcode));
757 return (ENOMEM);
758 }
759 retcode = copyin((caddr_t) u_cfg, (caddr_t) k_cfg,
760 sizeof(RF_Config_t));
761 if (retcode) {
762 RF_Free(k_cfg, sizeof(RF_Config_t));
763 db3_printf(("rf_ioctl: retcode=%d copyin.1\n",
764 retcode));
765 return (retcode);
766 }
767 /* allocate a buffer for the layout-specific data, and copy it
768 * in */
769 if (k_cfg->layoutSpecificSize) {
770 if (k_cfg->layoutSpecificSize > 10000) {
771 /* sanity check */
772 RF_Free(k_cfg, sizeof(RF_Config_t));
773 db3_printf(("rf_ioctl: EINVAL %d\n", retcode));
774 return (EINVAL);
775 }
776 RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
777 (u_char *));
778 if (specific_buf == NULL) {
779 RF_Free(k_cfg, sizeof(RF_Config_t));
780 db3_printf(("rf_ioctl: ENOMEM %d\n", retcode));
781 return (ENOMEM);
782 }
783 retcode = copyin(k_cfg->layoutSpecific,
784 (caddr_t) specific_buf,
785 k_cfg->layoutSpecificSize);
786 if (retcode) {
787 RF_Free(k_cfg, sizeof(RF_Config_t));
788 RF_Free(specific_buf, k_cfg->layoutSpecificSize);
789 db3_printf(("rf_ioctl: retcode=%d copyin.2\n",
790 retcode));
791 return (retcode);
792 }
793 } else
794 specific_buf = NULL;
795 k_cfg->layoutSpecific = specific_buf;
796
797 /* should do some kind of sanity check on the configuration.
798 * Store the sum of all the bytes in the last byte? */
799
800 /* configure the system */
801
802 raidPtrs[unit]->raidid = unit;
803
804 retcode = rf_Configure(raidPtrs[unit], k_cfg);
805
806 /* allow this many simultaneous IO's to this RAID device */
807 raidPtrs[unit]->openings = RAIDOUTSTANDING;
808
809 if (retcode == 0) {
810 retcode = raidinit(dev, raidPtrs[unit], unit);
811 rf_markalldirty( raidPtrs[unit] );
812 }
813 /* free the buffers. No return code here. */
814 if (k_cfg->layoutSpecificSize) {
815 RF_Free(specific_buf, k_cfg->layoutSpecificSize);
816 }
817 RF_Free(k_cfg, sizeof(RF_Config_t));
818
819 db3_printf(("rf_ioctl: retcode=%d RAIDFRAME_CONFIGURE\n",
820 retcode));
821
822 return (retcode);
823
824 /* shutdown the system */
825 case RAIDFRAME_SHUTDOWN:
826
827 if ((error = raidlock(rs)) != 0)
828 return (error);
829
830 /*
831 * If somebody has a partition mounted, we shouldn't
832 * shutdown.
833 */
834
835 part = DISKPART(dev);
836 pmask = (1 << part);
837 if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
838 ((rs->sc_dkdev.dk_bopenmask & pmask) &&
839 (rs->sc_dkdev.dk_copenmask & pmask))) {
840 raidunlock(rs);
841 return (EBUSY);
842 }
843
844 if (rf_debugKernelAccess) {
845 printf("call shutdown\n");
846 }
847
848 retcode = rf_Shutdown(raidPtrs[unit]);
849
850 db1_printf(("Done main shutdown\n"));
851
852 pool_destroy(&rs->sc_cbufpool);
853 db1_printf(("Done freeing component buffer freelist\n"));
854
855 /* It's no longer initialized... */
856 rs->sc_flags &= ~RAIDF_INITED;
857
858 /* Detach the disk. */
859 disk_detach(&rs->sc_dkdev);
860
861 raidunlock(rs);
862
863 return (retcode);
864 case RAIDFRAME_GET_COMPONENT_LABEL:
865 c_label_ptr = (RF_ComponentLabel_t **) data;
866 /* need to read the component label for the disk indicated
867 by row,column in component_label
868 XXX need to sanity check these values!!!
869 */
870
871 /* For practice, let's get it directly fromdisk, rather
872 than from the in-core copy */
873 RF_Malloc( component_label, sizeof( RF_ComponentLabel_t ),
874 (RF_ComponentLabel_t *));
875 if (component_label == NULL)
876 return (ENOMEM);
877
878 bzero((char *) component_label, sizeof(RF_ComponentLabel_t));
879
880 retcode = copyin( *c_label_ptr, component_label,
881 sizeof(RF_ComponentLabel_t));
882
883 if (retcode) {
884 RF_Free( component_label, sizeof(RF_ComponentLabel_t));
885 return(retcode);
886 }
887
888 row = component_label->row;
889 column = component_label->column;
890
891 if ((row < 0) || (row >= raidPtrs[unit]->numRow) ||
892 (column < 0) || (column >= raidPtrs[unit]->numCol)) {
893 RF_Free( component_label, sizeof(RF_ComponentLabel_t));
894 return(EINVAL);
895 }
896
897 raidread_component_label(
898 raidPtrs[unit]->Disks[row][column].dev,
899 raidPtrs[unit]->raid_cinfo[row][column].ci_vp,
900 component_label );
901
902 retcode = copyout((caddr_t) component_label,
903 (caddr_t) *c_label_ptr,
904 sizeof(RF_ComponentLabel_t));
905 RF_Free( component_label, sizeof(RF_ComponentLabel_t));
906 return (retcode);
907
908 case RAIDFRAME_SET_COMPONENT_LABEL:
909 component_label = (RF_ComponentLabel_t *) data;
910
911 /* XXX check the label for valid stuff... */
912 /* Note that some things *should not* get modified --
913 the user should be re-initing the labels instead of
914 trying to patch things.
915 */
916
917 printf("Got component label:\n");
918 printf("Version: %d\n",component_label->version);
919 printf("Serial Number: %d\n",component_label->serial_number);
920 printf("Mod counter: %d\n",component_label->mod_counter);
921 printf("Row: %d\n", component_label->row);
922 printf("Column: %d\n", component_label->column);
923 printf("Num Rows: %d\n", component_label->num_rows);
924 printf("Num Columns: %d\n", component_label->num_columns);
925 printf("Clean: %d\n", component_label->clean);
926 printf("Status: %d\n", component_label->status);
927
928 row = component_label->row;
929 column = component_label->column;
930
931 if ((row < 0) || (row >= raidPtrs[unit]->numRow) ||
932 (column < 0) || (column >= raidPtrs[unit]->numCol)) {
933 return(EINVAL);
934 }
935
936 /* XXX this isn't allowed to do anything for now :-) */
937 #if 0
938 raidwrite_component_label(
939 raidPtrs[unit]->Disks[row][column].dev,
940 raidPtrs[unit]->raid_cinfo[row][column].ci_vp,
941 component_label );
942 #endif
943 return (0);
944
945 case RAIDFRAME_INIT_LABELS:
946 component_label = (RF_ComponentLabel_t *) data;
947 /*
948 we only want the serial number from
949 the above. We get all the rest of the information
950 from the config that was used to create this RAID
951 set.
952 */
953
954 raidPtrs[unit]->serial_number = component_label->serial_number;
955 /* current version number */
956 ci_label.version = RF_COMPONENT_LABEL_VERSION;
957 ci_label.serial_number = component_label->serial_number;
958 ci_label.mod_counter = raidPtrs[unit]->mod_counter;
959 ci_label.num_rows = raidPtrs[unit]->numRow;
960 ci_label.num_columns = raidPtrs[unit]->numCol;
961 ci_label.clean = RF_RAID_DIRTY; /* not clean */
962 ci_label.status = rf_ds_optimal; /* "It's good!" */
963
964 for(row=0;row<raidPtrs[unit]->numRow;row++) {
965 ci_label.row = row;
966 for(column=0;column<raidPtrs[unit]->numCol;column++) {
967 ci_label.column = column;
968 raidwrite_component_label(
969 raidPtrs[unit]->Disks[row][column].dev,
970 raidPtrs[unit]->raid_cinfo[row][column].ci_vp,
971 &ci_label );
972 }
973 }
974
975 return (retcode);
976
977 /* initialize all parity */
978 case RAIDFRAME_REWRITEPARITY:
979
980 if (raidPtrs[unit]->Layout.map->faultsTolerated == 0) {
981 /* Parity for RAID 0 is trivially correct */
982 raidPtrs[unit]->parity_good = RF_RAID_CLEAN;
983 return(0);
984 }
985
986 /* borrow the thread of the requesting process */
987
988 s = splbio();
989 retcode = rf_RewriteParity(raidPtrs[unit]);
990 splx(s);
991 /* return I/O Error if the parity rewrite fails */
992
993 if (retcode) {
994 retcode = EIO;
995 } else {
996 /* set the clean bit! If we shutdown correctly,
997 the clean bit on each component label will get
998 set */
999 raidPtrs[unit]->parity_good = RF_RAID_CLEAN;
1000 }
1001 return (retcode);
1002
1003
1004 case RAIDFRAME_ADD_HOT_SPARE:
1005 sparePtr = (RF_SingleComponent_t *) data;
1006 memcpy( &hot_spare, sparePtr, sizeof(RF_SingleComponent_t));
1007 printf("Adding spare\n");
1008 retcode = rf_add_hot_spare(raidPtrs[unit], &hot_spare);
1009 return(retcode);
1010
1011 case RAIDFRAME_REMOVE_HOT_SPARE:
1012 return(retcode);
1013
1014 case RAIDFRAME_REBUILD_IN_PLACE:
1015
1016 if (raidPtrs[unit]->Layout.map->faultsTolerated == 0) {
1017 /* Can't do this on a RAID 0!! */
1018 return(EINVAL);
1019 }
1020
1021 componentPtr = (RF_SingleComponent_t *) data;
1022 memcpy( &component, componentPtr,
1023 sizeof(RF_SingleComponent_t));
1024 row = component.row;
1025 column = component.column;
1026 printf("Rebuild: %d %d\n",row, column);
1027 if ((row < 0) || (row >= raidPtrs[unit]->numRow) ||
1028 (column < 0) || (column >= raidPtrs[unit]->numCol)) {
1029 return(EINVAL);
1030 }
1031 printf("Attempting a rebuild in place\n");
1032 s = splbio();
1033 retcode = rf_ReconstructInPlace(raidPtrs[unit], row, column);
1034 splx(s);
1035 return(retcode);
1036
1037 case RAIDFRAME_GET_INFO:
1038 {
1039 RF_Raid_t *raid = raidPtrs[unit];
1040 RF_DeviceConfig_t *cfg, **ucfgp;
1041 int i, j, d;
1042
1043 if (!raid->valid)
1044 return (ENODEV);
1045 ucfgp = (RF_DeviceConfig_t **) data;
1046 RF_Malloc(cfg, sizeof(RF_DeviceConfig_t),
1047 (RF_DeviceConfig_t *));
1048 if (cfg == NULL)
1049 return (ENOMEM);
1050 bzero((char *) cfg, sizeof(RF_DeviceConfig_t));
1051 cfg->rows = raid->numRow;
1052 cfg->cols = raid->numCol;
1053 cfg->ndevs = raid->numRow * raid->numCol;
1054 if (cfg->ndevs >= RF_MAX_DISKS) {
1055 RF_Free(cfg, sizeof(RF_DeviceConfig_t));
1056 return (ENOMEM);
1057 }
1058 cfg->nspares = raid->numSpare;
1059 if (cfg->nspares >= RF_MAX_DISKS) {
1060 RF_Free(cfg, sizeof(RF_DeviceConfig_t));
1061 return (ENOMEM);
1062 }
1063 cfg->maxqdepth = raid->maxQueueDepth;
1064 d = 0;
1065 for (i = 0; i < cfg->rows; i++) {
1066 for (j = 0; j < cfg->cols; j++) {
1067 cfg->devs[d] = raid->Disks[i][j];
1068 d++;
1069 }
1070 }
1071 for (j = cfg->cols, i = 0; i < cfg->nspares; i++, j++) {
1072 cfg->spares[i] = raid->Disks[0][j];
1073 }
1074 retcode = copyout((caddr_t) cfg, (caddr_t) * ucfgp,
1075 sizeof(RF_DeviceConfig_t));
1076 RF_Free(cfg, sizeof(RF_DeviceConfig_t));
1077
1078 return (retcode);
1079 }
1080 break;
1081 case RAIDFRAME_CHECK_PARITY:
1082 *(int *) data = raidPtrs[unit]->parity_good;
1083 return (0);
1084 case RAIDFRAME_RESET_ACCTOTALS:
1085 {
1086 RF_Raid_t *raid = raidPtrs[unit];
1087
1088 bzero(&raid->acc_totals, sizeof(raid->acc_totals));
1089 return (0);
1090 }
1091 break;
1092
1093 case RAIDFRAME_GET_ACCTOTALS:
1094 {
1095 RF_AccTotals_t *totals = (RF_AccTotals_t *) data;
1096 RF_Raid_t *raid = raidPtrs[unit];
1097
1098 *totals = raid->acc_totals;
1099 return (0);
1100 }
1101 break;
1102
1103 case RAIDFRAME_KEEP_ACCTOTALS:
1104 {
1105 RF_Raid_t *raid = raidPtrs[unit];
1106 int *keep = (int *) data;
1107
1108 raid->keep_acc_totals = *keep;
1109 return (0);
1110 }
1111 break;
1112
1113 case RAIDFRAME_GET_SIZE:
1114 *(int *) data = raidPtrs[unit]->totalSectors;
1115 return (0);
1116
1117 #define RAIDFRAME_RECON 1
1118 /* XXX The above should probably be set somewhere else!! GO */
1119 #if RAIDFRAME_RECON > 0
1120
1121 /* fail a disk & optionally start reconstruction */
1122 case RAIDFRAME_FAIL_DISK:
1123
1124 if (raidPtrs[unit]->Layout.map->faultsTolerated == 0) {
1125 /* Can't do this on a RAID 0!! */
1126 return(EINVAL);
1127 }
1128
1129 rr = (struct rf_recon_req *) data;
1130
1131 if (rr->row < 0 || rr->row >= raidPtrs[unit]->numRow
1132 || rr->col < 0 || rr->col >= raidPtrs[unit]->numCol)
1133 return (EINVAL);
1134
1135 printf("raid%d: Failing the disk: row: %d col: %d\n",
1136 unit, rr->row, rr->col);
1137
1138 /* make a copy of the recon request so that we don't rely on
1139 * the user's buffer */
1140 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1141 bcopy(rr, rrcopy, sizeof(*rr));
1142 rrcopy->raidPtr = (void *) raidPtrs[unit];
1143
1144 LOCK_RECON_Q_MUTEX();
1145 rrcopy->next = recon_queue;
1146 recon_queue = rrcopy;
1147 wakeup(&recon_queue);
1148 UNLOCK_RECON_Q_MUTEX();
1149
1150 return (0);
1151
1152 /* invoke a copyback operation after recon on whatever disk
1153 * needs it, if any */
1154 case RAIDFRAME_COPYBACK:
1155
1156 if (raidPtrs[unit]->Layout.map->faultsTolerated == 0) {
1157 /* This makes no sense on a RAID 0!! */
1158 return(EINVAL);
1159 }
1160
1161 /* borrow the current thread to get this done */
1162
1163 s = splbio();
1164 rf_CopybackReconstructedData(raidPtrs[unit]);
1165 splx(s);
1166 return (0);
1167
1168 /* return the percentage completion of reconstruction */
1169 case RAIDFRAME_CHECKRECON:
1170 if (raidPtrs[unit]->Layout.map->faultsTolerated == 0) {
1171 /* This makes no sense on a RAID 0 */
1172 return(EINVAL);
1173 }
1174
1175 row = *(int *) data;
1176 if (row < 0 || row >= raidPtrs[unit]->numRow)
1177 return (EINVAL);
1178 if (raidPtrs[unit]->status[row] != rf_rs_reconstructing)
1179 *(int *) data = 100;
1180 else
1181 *(int *) data = raidPtrs[unit]->reconControl[row]->percentComplete;
1182 return (0);
1183
1184 /* the sparetable daemon calls this to wait for the kernel to
1185 * need a spare table. this ioctl does not return until a
1186 * spare table is needed. XXX -- calling mpsleep here in the
1187 * ioctl code is almost certainly wrong and evil. -- XXX XXX
1188 * -- I should either compute the spare table in the kernel,
1189 * or have a different -- XXX XXX -- interface (a different
1190 * character device) for delivering the table -- XXX */
1191 #if 0
1192 case RAIDFRAME_SPARET_WAIT:
1193 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1194 while (!rf_sparet_wait_queue)
1195 mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE);
1196 waitreq = rf_sparet_wait_queue;
1197 rf_sparet_wait_queue = rf_sparet_wait_queue->next;
1198 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1199
1200 *((RF_SparetWait_t *) data) = *waitreq; /* structure assignment */
1201
1202 RF_Free(waitreq, sizeof(*waitreq));
1203 return (0);
1204
1205
1206 /* wakes up a process waiting on SPARET_WAIT and puts an error
1207 * code in it that will cause the dameon to exit */
1208 case RAIDFRAME_ABORT_SPARET_WAIT:
1209 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1210 waitreq->fcol = -1;
1211 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1212 waitreq->next = rf_sparet_wait_queue;
1213 rf_sparet_wait_queue = waitreq;
1214 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1215 wakeup(&rf_sparet_wait_queue);
1216 return (0);
1217
1218 /* used by the spare table daemon to deliver a spare table
1219 * into the kernel */
1220 case RAIDFRAME_SEND_SPARET:
1221
1222 /* install the spare table */
1223 retcode = rf_SetSpareTable(raidPtrs[unit], *(void **) data);
1224
1225 /* respond to the requestor. the return status of the spare
1226 * table installation is passed in the "fcol" field */
1227 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1228 waitreq->fcol = retcode;
1229 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1230 waitreq->next = rf_sparet_resp_queue;
1231 rf_sparet_resp_queue = waitreq;
1232 wakeup(&rf_sparet_resp_queue);
1233 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1234
1235 return (retcode);
1236 #endif
1237
1238
1239 #endif /* RAIDFRAME_RECON > 0 */
1240
1241 default:
1242 break; /* fall through to the os-specific code below */
1243
1244 }
1245
1246 if (!raidPtrs[unit]->valid)
1247 return (EINVAL);
1248
1249 /*
1250 * Add support for "regular" device ioctls here.
1251 */
1252
1253 switch (cmd) {
1254 case DIOCGDINFO:
1255 db1_printf(("DIOCGDINFO %d %d\n", (int) dev, (int) DISKPART(dev)));
1256 *(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
1257 break;
1258
1259 case DIOCGPART:
1260 db1_printf(("DIOCGPART: %d %d\n", (int) dev, (int) DISKPART(dev)));
1261 ((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
1262 ((struct partinfo *) data)->part =
1263 &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
1264 break;
1265
1266 case DIOCWDINFO:
1267 db1_printf(("DIOCWDINFO\n"));
1268 case DIOCSDINFO:
1269 db1_printf(("DIOCSDINFO\n"));
1270 if ((error = raidlock(rs)) != 0)
1271 return (error);
1272
1273 rs->sc_flags |= RAIDF_LABELLING;
1274
1275 error = setdisklabel(rs->sc_dkdev.dk_label,
1276 (struct disklabel *) data, 0, rs->sc_dkdev.dk_cpulabel);
1277 if (error == 0) {
1278 if (cmd == DIOCWDINFO)
1279 error = writedisklabel(RAIDLABELDEV(dev),
1280 raidstrategy, rs->sc_dkdev.dk_label,
1281 rs->sc_dkdev.dk_cpulabel);
1282 }
1283 rs->sc_flags &= ~RAIDF_LABELLING;
1284
1285 raidunlock(rs);
1286
1287 if (error)
1288 return (error);
1289 break;
1290
1291 case DIOCWLABEL:
1292 db1_printf(("DIOCWLABEL\n"));
1293 if (*(int *) data != 0)
1294 rs->sc_flags |= RAIDF_WLABEL;
1295 else
1296 rs->sc_flags &= ~RAIDF_WLABEL;
1297 break;
1298
1299 case DIOCGDEFLABEL:
1300 db1_printf(("DIOCGDEFLABEL\n"));
1301 raidgetdefaultlabel(raidPtrs[unit], rs,
1302 (struct disklabel *) data);
1303 break;
1304
1305 default:
1306 retcode = ENOTTY; /* XXXX ?? OR EINVAL ? */
1307 }
1308 return (retcode);
1309
1310 }
1311
1312
1313 /* raidinit -- complete the rest of the initialization for the
1314 RAIDframe device. */
1315
1316
1317 static int
1318 raidinit(dev, raidPtr, unit)
1319 dev_t dev;
1320 RF_Raid_t *raidPtr;
1321 int unit;
1322 {
1323 int retcode;
1324 /* int ix; */
1325 /* struct raidbuf *raidbp; */
1326 struct raid_softc *rs;
1327
1328 retcode = 0;
1329
1330 rs = &raid_softc[unit];
1331 pool_init(&rs->sc_cbufpool, sizeof(struct raidbuf), 0,
1332 0, 0, "raidpl", 0, NULL, NULL, M_RAIDFRAME);
1333
1334
1335 /* XXX should check return code first... */
1336 rs->sc_flags |= RAIDF_INITED;
1337
1338 sprintf(rs->sc_xname, "raid%d", unit); /* XXX doesn't check bounds. */
1339
1340 rs->sc_dkdev.dk_name = rs->sc_xname;
1341
1342 /* disk_attach actually creates space for the CPU disklabel, among
1343 * other things, so it's critical to call this *BEFORE* we try putzing
1344 * with disklabels. */
1345
1346 disk_attach(&rs->sc_dkdev);
1347
1348 /* XXX There may be a weird interaction here between this, and
1349 * protectedSectors, as used in RAIDframe. */
1350
1351 rs->sc_size = raidPtr->totalSectors;
1352 rs->sc_dev = dev;
1353
1354 return (retcode);
1355 }
1356
1357 /*
1358 * This kernel thread never exits. It is created once, and persists
1359 * until the system reboots.
1360 */
1361
1362 void
1363 rf_ReconKernelThread()
1364 {
1365 struct rf_recon_req *req;
1366 int s;
1367
1368 /* XXX not sure what spl() level we should be at here... probably
1369 * splbio() */
1370 s = splbio();
1371
1372 while (1) {
1373 /* grab the next reconstruction request from the queue */
1374 LOCK_RECON_Q_MUTEX();
1375 while (!recon_queue) {
1376 UNLOCK_RECON_Q_MUTEX();
1377 tsleep(&recon_queue, PRIBIO,
1378 "raidframe recon", 0);
1379 LOCK_RECON_Q_MUTEX();
1380 }
1381 req = recon_queue;
1382 recon_queue = recon_queue->next;
1383 UNLOCK_RECON_Q_MUTEX();
1384
1385 /*
1386 * If flags specifies that we should start recon, this call
1387 * will not return until reconstruction completes, fails,
1388 * or is aborted.
1389 */
1390 rf_FailDisk((RF_Raid_t *) req->raidPtr, req->row, req->col,
1391 ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
1392
1393 RF_Free(req, sizeof(*req));
1394 }
1395 }
1396 /* wake up the daemon & tell it to get us a spare table
1397 * XXX
1398 * the entries in the queues should be tagged with the raidPtr
1399 * so that in the extremely rare case that two recons happen at once,
1400 * we know for which device were requesting a spare table
1401 * XXX
1402 */
1403 int
1404 rf_GetSpareTableFromDaemon(req)
1405 RF_SparetWait_t *req;
1406 {
1407 int retcode;
1408
1409 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1410 req->next = rf_sparet_wait_queue;
1411 rf_sparet_wait_queue = req;
1412 wakeup(&rf_sparet_wait_queue);
1413
1414 /* mpsleep unlocks the mutex */
1415 while (!rf_sparet_resp_queue) {
1416 tsleep(&rf_sparet_resp_queue, PRIBIO,
1417 "raidframe getsparetable", 0);
1418 }
1419 req = rf_sparet_resp_queue;
1420 rf_sparet_resp_queue = req->next;
1421 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1422
1423 retcode = req->fcol;
1424 RF_Free(req, sizeof(*req)); /* this is not the same req as we
1425 * alloc'd */
1426 return (retcode);
1427 }
1428 /* a wrapper around rf_DoAccess that extracts appropriate info from the
1429 * bp & passes it down.
1430 * any calls originating in the kernel must use non-blocking I/O
1431 * do some extra sanity checking to return "appropriate" error values for
1432 * certain conditions (to make some standard utilities work)
1433 *
1434 * Formerly known as: rf_DoAccessKernel
1435 */
1436 void
1437 raidstart(raidPtr)
1438 RF_Raid_t *raidPtr;
1439 {
1440 RF_SectorCount_t num_blocks, pb, sum;
1441 RF_RaidAddr_t raid_addr;
1442 int retcode;
1443 struct partition *pp;
1444 daddr_t blocknum;
1445 int unit;
1446 struct raid_softc *rs;
1447 int do_async;
1448 struct buf *bp;
1449 struct buf *dp;
1450
1451 unit = raidPtr->raidid;
1452 rs = &raid_softc[unit];
1453
1454 /* Check to see if we're at the limit... */
1455 RF_LOCK_MUTEX(raidPtr->mutex);
1456 while (raidPtr->openings > 0) {
1457 RF_UNLOCK_MUTEX(raidPtr->mutex);
1458
1459 /* get the next item, if any, from the queue */
1460 dp = &rs->buf_queue;
1461 bp = dp->b_actf;
1462 if (bp == NULL) {
1463 /* nothing more to do */
1464 return;
1465 }
1466
1467 /* update structures */
1468 dp = bp->b_actf;
1469 if (dp != NULL) {
1470 dp->b_actb = bp->b_actb;
1471 } else {
1472 rs->buf_queue.b_actb = bp->b_actb;
1473 }
1474 *bp->b_actb = dp;
1475
1476 /* Ok, for the bp we have here, bp->b_blkno is relative to the
1477 * partition.. Need to make it absolute to the underlying
1478 * device.. */
1479
1480 blocknum = bp->b_blkno;
1481 if (DISKPART(bp->b_dev) != RAW_PART) {
1482 pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
1483 blocknum += pp->p_offset;
1484 }
1485
1486 db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
1487 (int) blocknum));
1488
1489 db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
1490 db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
1491
1492 /* *THIS* is where we adjust what block we're going to...
1493 * but DO NOT TOUCH bp->b_blkno!!! */
1494 raid_addr = blocknum;
1495
1496 num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
1497 pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
1498 sum = raid_addr + num_blocks + pb;
1499 if (1 || rf_debugKernelAccess) {
1500 db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
1501 (int) raid_addr, (int) sum, (int) num_blocks,
1502 (int) pb, (int) bp->b_resid));
1503 }
1504 if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
1505 || (sum < num_blocks) || (sum < pb)) {
1506 bp->b_error = ENOSPC;
1507 bp->b_flags |= B_ERROR;
1508 bp->b_resid = bp->b_bcount;
1509 biodone(bp);
1510 RF_LOCK_MUTEX(raidPtr->mutex);
1511 continue;
1512 }
1513 /*
1514 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
1515 */
1516
1517 if (bp->b_bcount & raidPtr->sectorMask) {
1518 bp->b_error = EINVAL;
1519 bp->b_flags |= B_ERROR;
1520 bp->b_resid = bp->b_bcount;
1521 biodone(bp);
1522 RF_LOCK_MUTEX(raidPtr->mutex);
1523 continue;
1524
1525 }
1526 db1_printf(("Calling DoAccess..\n"));
1527
1528
1529 RF_LOCK_MUTEX(raidPtr->mutex);
1530 raidPtr->openings--;
1531 RF_UNLOCK_MUTEX(raidPtr->mutex);
1532
1533 /*
1534 * Everything is async.
1535 */
1536 do_async = 1;
1537
1538 /* don't ever condition on bp->b_flags & B_WRITE.
1539 * always condition on B_READ instead */
1540
1541 /* XXX we're still at splbio() here... do we *really*
1542 need to be? */
1543
1544 retcode = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
1545 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
1546 do_async, raid_addr, num_blocks,
1547 bp->b_un.b_addr, bp, NULL, NULL,
1548 RF_DAG_NONBLOCKING_IO, NULL, NULL, NULL);
1549
1550
1551 RF_LOCK_MUTEX(raidPtr->mutex);
1552 }
1553 RF_UNLOCK_MUTEX(raidPtr->mutex);
1554 }
1555
1556
1557
1558
1559 /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */
1560
1561 int
1562 rf_DispatchKernelIO(queue, req)
1563 RF_DiskQueue_t *queue;
1564 RF_DiskQueueData_t *req;
1565 {
1566 int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
1567 struct buf *bp;
1568 struct raidbuf *raidbp = NULL;
1569 struct raid_softc *rs;
1570 int unit;
1571
1572 /* XXX along with the vnode, we also need the softc associated with
1573 * this device.. */
1574
1575 req->queue = queue;
1576
1577 unit = queue->raidPtr->raidid;
1578
1579 db1_printf(("DispatchKernelIO unit: %d\n", unit));
1580
1581 if (unit >= numraid) {
1582 printf("Invalid unit number: %d %d\n", unit, numraid);
1583 panic("Invalid Unit number in rf_DispatchKernelIO\n");
1584 }
1585 rs = &raid_softc[unit];
1586
1587 /* XXX is this the right place? */
1588 disk_busy(&rs->sc_dkdev);
1589
1590 bp = req->bp;
1591 #if 1
1592 /* XXX when there is a physical disk failure, someone is passing us a
1593 * buffer that contains old stuff!! Attempt to deal with this problem
1594 * without taking a performance hit... (not sure where the real bug
1595 * is. It's buried in RAIDframe somewhere) :-( GO ) */
1596
1597 if (bp->b_flags & B_ERROR) {
1598 bp->b_flags &= ~B_ERROR;
1599 }
1600 if (bp->b_error != 0) {
1601 bp->b_error = 0;
1602 }
1603 #endif
1604 raidbp = RAIDGETBUF(rs);
1605
1606 raidbp->rf_flags = 0; /* XXX not really used anywhere... */
1607
1608 /*
1609 * context for raidiodone
1610 */
1611 raidbp->rf_obp = bp;
1612 raidbp->req = req;
1613
1614 LIST_INIT(&raidbp->rf_buf.b_dep);
1615
1616 switch (req->type) {
1617 case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */
1618 /* Dprintf2("rf_DispatchKernelIO: NOP to r %d c %d\n",
1619 * queue->row, queue->col); */
1620 /* XXX need to do something extra here.. */
1621 /* I'm leaving this in, as I've never actually seen it used,
1622 * and I'd like folks to report it... GO */
1623 printf(("WAKEUP CALLED\n"));
1624 queue->numOutstanding++;
1625
1626 /* XXX need to glue the original buffer into this?? */
1627
1628 KernelWakeupFunc(&raidbp->rf_buf);
1629 break;
1630
1631 case RF_IO_TYPE_READ:
1632 case RF_IO_TYPE_WRITE:
1633
1634 if (req->tracerec) {
1635 RF_ETIMER_START(req->tracerec->timer);
1636 }
1637 InitBP(&raidbp->rf_buf, queue->rf_cinfo->ci_vp,
1638 op | bp->b_flags, queue->rf_cinfo->ci_dev,
1639 req->sectorOffset, req->numSector,
1640 req->buf, KernelWakeupFunc, (void *) req,
1641 queue->raidPtr->logBytesPerSector, req->b_proc);
1642
1643 if (rf_debugKernelAccess) {
1644 db1_printf(("dispatch: bp->b_blkno = %ld\n",
1645 (long) bp->b_blkno));
1646 }
1647 queue->numOutstanding++;
1648 queue->last_deq_sector = req->sectorOffset;
1649 /* acc wouldn't have been let in if there were any pending
1650 * reqs at any other priority */
1651 queue->curPriority = req->priority;
1652 /* Dprintf3("rf_DispatchKernelIO: %c to row %d col %d\n",
1653 * req->type, queue->row, queue->col); */
1654
1655 db1_printf(("Going for %c to unit %d row %d col %d\n",
1656 req->type, unit, queue->row, queue->col));
1657 db1_printf(("sector %d count %d (%d bytes) %d\n",
1658 (int) req->sectorOffset, (int) req->numSector,
1659 (int) (req->numSector <<
1660 queue->raidPtr->logBytesPerSector),
1661 (int) queue->raidPtr->logBytesPerSector));
1662 if ((raidbp->rf_buf.b_flags & B_READ) == 0) {
1663 raidbp->rf_buf.b_vp->v_numoutput++;
1664 }
1665 VOP_STRATEGY(&raidbp->rf_buf);
1666
1667 break;
1668
1669 default:
1670 panic("bad req->type in rf_DispatchKernelIO");
1671 }
1672 db1_printf(("Exiting from DispatchKernelIO\n"));
1673 return (0);
1674 }
1675 /* this is the callback function associated with a I/O invoked from
1676 kernel code.
1677 */
1678 static void
1679 KernelWakeupFunc(vbp)
1680 struct buf *vbp;
1681 {
1682 RF_DiskQueueData_t *req = NULL;
1683 RF_DiskQueue_t *queue;
1684 struct raidbuf *raidbp = (struct raidbuf *) vbp;
1685 struct buf *bp;
1686 struct raid_softc *rs;
1687 int unit;
1688 register int s;
1689
1690 s = splbio(); /* XXX */
1691 db1_printf(("recovering the request queue:\n"));
1692 req = raidbp->req;
1693
1694 bp = raidbp->rf_obp;
1695 #if 0
1696 db1_printf(("bp=0x%x\n", bp));
1697 #endif
1698
1699 queue = (RF_DiskQueue_t *) req->queue;
1700
1701 if (raidbp->rf_buf.b_flags & B_ERROR) {
1702 #if 0
1703 printf("Setting bp->b_flags!!! %d\n", raidbp->rf_buf.b_error);
1704 #endif
1705 bp->b_flags |= B_ERROR;
1706 bp->b_error = raidbp->rf_buf.b_error ?
1707 raidbp->rf_buf.b_error : EIO;
1708 }
1709 #if 0
1710 db1_printf(("raidbp->rf_buf.b_bcount=%d\n", (int) raidbp->rf_buf.b_bcount));
1711 db1_printf(("raidbp->rf_buf.b_bufsize=%d\n", (int) raidbp->rf_buf.b_bufsize));
1712 db1_printf(("raidbp->rf_buf.b_resid=%d\n", (int) raidbp->rf_buf.b_resid));
1713 db1_printf(("raidbp->rf_buf.b_data=0x%x\n", raidbp->rf_buf.b_data));
1714 #endif
1715
1716 /* XXX methinks this could be wrong... */
1717 #if 1
1718 bp->b_resid = raidbp->rf_buf.b_resid;
1719 #endif
1720
1721 if (req->tracerec) {
1722 RF_ETIMER_STOP(req->tracerec->timer);
1723 RF_ETIMER_EVAL(req->tracerec->timer);
1724 RF_LOCK_MUTEX(rf_tracing_mutex);
1725 req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1726 req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1727 req->tracerec->num_phys_ios++;
1728 RF_UNLOCK_MUTEX(rf_tracing_mutex);
1729 }
1730 bp->b_bcount = raidbp->rf_buf.b_bcount; /* XXXX ?? */
1731
1732 unit = queue->raidPtr->raidid; /* *Much* simpler :-> */
1733
1734
1735 /* XXX Ok, let's get aggressive... If B_ERROR is set, let's go
1736 * ballistic, and mark the component as hosed... */
1737 #if 1
1738 if (bp->b_flags & B_ERROR) {
1739 /* Mark the disk as dead */
1740 /* but only mark it once... */
1741 if (queue->raidPtr->Disks[queue->row][queue->col].status ==
1742 rf_ds_optimal) {
1743 printf("raid%d: IO Error. Marking %s as failed.\n",
1744 unit, queue->raidPtr->Disks[queue->row][queue->col].devname);
1745 queue->raidPtr->Disks[queue->row][queue->col].status =
1746 rf_ds_failed;
1747 queue->raidPtr->status[queue->row] = rf_rs_degraded;
1748 queue->raidPtr->numFailures++;
1749 /* XXX here we should bump the version number for each component, and write that data out */
1750 } else { /* Disk is already dead... */
1751 /* printf("Disk already marked as dead!\n"); */
1752 }
1753
1754 }
1755 #endif
1756
1757 rs = &raid_softc[unit];
1758 RAIDPUTBUF(rs, raidbp);
1759
1760
1761 if (bp->b_resid == 0) {
1762 db1_printf(("Disk is no longer busy for this buffer... %d %ld %ld\n",
1763 unit, bp->b_resid, bp->b_bcount));
1764 /* XXX is this the right place for a disk_unbusy()??!??!?!? */
1765 disk_unbusy(&rs->sc_dkdev, (bp->b_bcount - bp->b_resid));
1766 } else {
1767 db1_printf(("b_resid is still %ld\n", bp->b_resid));
1768 }
1769
1770 rf_DiskIOComplete(queue, req, (bp->b_flags & B_ERROR) ? 1 : 0);
1771 (req->CompleteFunc) (req->argument, (bp->b_flags & B_ERROR) ? 1 : 0);
1772 /* printf("Exiting KernelWakeupFunc\n"); */
1773
1774 splx(s); /* XXX */
1775 }
1776
1777
1778
1779 /*
1780 * initialize a buf structure for doing an I/O in the kernel.
1781 */
1782 static void
1783 InitBP(
1784 struct buf * bp,
1785 struct vnode * b_vp,
1786 unsigned rw_flag,
1787 dev_t dev,
1788 RF_SectorNum_t startSect,
1789 RF_SectorCount_t numSect,
1790 caddr_t buf,
1791 void (*cbFunc) (struct buf *),
1792 void *cbArg,
1793 int logBytesPerSector,
1794 struct proc * b_proc)
1795 {
1796 /* bp->b_flags = B_PHYS | rw_flag; */
1797 bp->b_flags = B_CALL | rw_flag; /* XXX need B_PHYS here too??? */
1798 bp->b_bcount = numSect << logBytesPerSector;
1799 bp->b_bufsize = bp->b_bcount;
1800 bp->b_error = 0;
1801 bp->b_dev = dev;
1802 db1_printf(("bp->b_dev is %d\n", dev));
1803 bp->b_un.b_addr = buf;
1804 #if 0
1805 db1_printf(("bp->b_data=0x%x\n", bp->b_data));
1806 #endif
1807 bp->b_blkno = startSect;
1808 bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */
1809 db1_printf(("b_bcount is: %d\n", (int) bp->b_bcount));
1810 if (bp->b_bcount == 0) {
1811 panic("bp->b_bcount is zero in InitBP!!\n");
1812 }
1813 bp->b_proc = b_proc;
1814 bp->b_iodone = cbFunc;
1815 bp->b_vp = b_vp;
1816
1817 }
1818
1819 static void
1820 raidgetdefaultlabel(raidPtr, rs, lp)
1821 RF_Raid_t *raidPtr;
1822 struct raid_softc *rs;
1823 struct disklabel *lp;
1824 {
1825 db1_printf(("Building a default label...\n"));
1826 bzero(lp, sizeof(*lp));
1827
1828 /* fabricate a label... */
1829 lp->d_secperunit = raidPtr->totalSectors;
1830 lp->d_secsize = raidPtr->bytesPerSector;
1831 lp->d_nsectors = 1024 * (1024 / raidPtr->bytesPerSector);
1832 lp->d_ntracks = 1;
1833 lp->d_ncylinders = raidPtr->totalSectors / lp->d_nsectors;
1834 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
1835
1836 strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
1837 lp->d_type = DTYPE_RAID;
1838 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
1839 lp->d_rpm = 3600;
1840 lp->d_interleave = 1;
1841 lp->d_flags = 0;
1842
1843 lp->d_partitions[RAW_PART].p_offset = 0;
1844 lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors;
1845 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
1846 lp->d_npartitions = RAW_PART + 1;
1847
1848 lp->d_magic = DISKMAGIC;
1849 lp->d_magic2 = DISKMAGIC;
1850 lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
1851
1852 }
1853 /*
1854 * Read the disklabel from the raid device. If one is not present, fake one
1855 * up.
1856 */
1857 static void
1858 raidgetdisklabel(dev)
1859 dev_t dev;
1860 {
1861 int unit = raidunit(dev);
1862 struct raid_softc *rs = &raid_softc[unit];
1863 char *errstring;
1864 struct disklabel *lp = rs->sc_dkdev.dk_label;
1865 struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel;
1866 RF_Raid_t *raidPtr;
1867
1868 db1_printf(("Getting the disklabel...\n"));
1869
1870 bzero(clp, sizeof(*clp));
1871
1872 raidPtr = raidPtrs[unit];
1873
1874 raidgetdefaultlabel(raidPtr, rs, lp);
1875
1876 /*
1877 * Call the generic disklabel extraction routine.
1878 */
1879 errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
1880 rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
1881 if (errstring)
1882 raidmakedisklabel(rs);
1883 else {
1884 int i;
1885 struct partition *pp;
1886
1887 /*
1888 * Sanity check whether the found disklabel is valid.
1889 *
1890 * This is necessary since total size of the raid device
1891 * may vary when an interleave is changed even though exactly
1892 * same componets are used, and old disklabel may used
1893 * if that is found.
1894 */
1895 if (lp->d_secperunit != rs->sc_size)
1896 printf("WARNING: %s: "
1897 "total sector size in disklabel (%d) != "
1898 "the size of raid (%ld)\n", rs->sc_xname,
1899 lp->d_secperunit, (long) rs->sc_size);
1900 for (i = 0; i < lp->d_npartitions; i++) {
1901 pp = &lp->d_partitions[i];
1902 if (pp->p_offset + pp->p_size > rs->sc_size)
1903 printf("WARNING: %s: end of partition `%c' "
1904 "exceeds the size of raid (%ld)\n",
1905 rs->sc_xname, 'a' + i, (long) rs->sc_size);
1906 }
1907 }
1908
1909 }
1910 /*
1911 * Take care of things one might want to take care of in the event
1912 * that a disklabel isn't present.
1913 */
1914 static void
1915 raidmakedisklabel(rs)
1916 struct raid_softc *rs;
1917 {
1918 struct disklabel *lp = rs->sc_dkdev.dk_label;
1919 db1_printf(("Making a label..\n"));
1920
1921 /*
1922 * For historical reasons, if there's no disklabel present
1923 * the raw partition must be marked FS_BSDFFS.
1924 */
1925
1926 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
1927
1928 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
1929
1930 lp->d_checksum = dkcksum(lp);
1931 }
1932 /*
1933 * Lookup the provided name in the filesystem. If the file exists,
1934 * is a valid block device, and isn't being used by anyone else,
1935 * set *vpp to the file's vnode.
1936 * You'll find the original of this in ccd.c
1937 */
1938 int
1939 raidlookup(path, p, vpp)
1940 char *path;
1941 struct proc *p;
1942 struct vnode **vpp; /* result */
1943 {
1944 struct nameidata nd;
1945 struct vnode *vp;
1946 struct vattr va;
1947 int error;
1948
1949 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, path, p);
1950 if ((error = vn_open(&nd, FREAD | FWRITE, 0)) != 0) {
1951 #ifdef DEBUG
1952 printf("RAIDframe: vn_open returned %d\n", error);
1953 #endif
1954 return (error);
1955 }
1956 vp = nd.ni_vp;
1957 if (vp->v_usecount > 1) {
1958 VOP_UNLOCK(vp, 0);
1959 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
1960 return (EBUSY);
1961 }
1962 if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)) != 0) {
1963 VOP_UNLOCK(vp, 0);
1964 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
1965 return (error);
1966 }
1967 /* XXX: eventually we should handle VREG, too. */
1968 if (va.va_type != VBLK) {
1969 VOP_UNLOCK(vp, 0);
1970 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
1971 return (ENOTBLK);
1972 }
1973 VOP_UNLOCK(vp, 0);
1974 *vpp = vp;
1975 return (0);
1976 }
1977 /*
1978 * Wait interruptibly for an exclusive lock.
1979 *
1980 * XXX
1981 * Several drivers do this; it should be abstracted and made MP-safe.
1982 * (Hmm... where have we seen this warning before :-> GO )
1983 */
1984 static int
1985 raidlock(rs)
1986 struct raid_softc *rs;
1987 {
1988 int error;
1989
1990 while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
1991 rs->sc_flags |= RAIDF_WANTED;
1992 if ((error =
1993 tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
1994 return (error);
1995 }
1996 rs->sc_flags |= RAIDF_LOCKED;
1997 return (0);
1998 }
1999 /*
2000 * Unlock and wake up any waiters.
2001 */
2002 static void
2003 raidunlock(rs)
2004 struct raid_softc *rs;
2005 {
2006
2007 rs->sc_flags &= ~RAIDF_LOCKED;
2008 if ((rs->sc_flags & RAIDF_WANTED) != 0) {
2009 rs->sc_flags &= ~RAIDF_WANTED;
2010 wakeup(rs);
2011 }
2012 }
2013
2014
2015 #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */
2016 #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */
2017
2018 int
2019 raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter)
2020 {
2021 RF_ComponentLabel_t component_label;
2022 raidread_component_label(dev, b_vp, &component_label);
2023 component_label.mod_counter = mod_counter;
2024 component_label.clean = RF_RAID_CLEAN;
2025 raidwrite_component_label(dev, b_vp, &component_label);
2026 return(0);
2027 }
2028
2029
2030 int
2031 raidmarkdirty(dev_t dev, struct vnode *b_vp, int mod_counter)
2032 {
2033 RF_ComponentLabel_t component_label;
2034 raidread_component_label(dev, b_vp, &component_label);
2035 component_label.mod_counter = mod_counter;
2036 component_label.clean = RF_RAID_DIRTY;
2037 raidwrite_component_label(dev, b_vp, &component_label);
2038 return(0);
2039 }
2040
2041 /* ARGSUSED */
2042 int
2043 raidread_component_label(dev, b_vp, component_label)
2044 dev_t dev;
2045 struct vnode *b_vp;
2046 RF_ComponentLabel_t *component_label;
2047 {
2048 struct buf *bp;
2049 int error;
2050
2051 /* XXX should probably ensure that we don't try to do this if
2052 someone has changed rf_protected_sectors. */
2053
2054 /* get a block of the appropriate size... */
2055 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2056 bp->b_dev = dev;
2057
2058 /* get our ducks in a row for the read */
2059 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2060 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2061 bp->b_flags = B_BUSY | B_READ;
2062 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2063
2064 (*bdevsw[major(bp->b_dev)].d_strategy)(bp);
2065
2066 error = biowait(bp);
2067
2068 if (!error) {
2069 memcpy(component_label, bp->b_un.b_addr,
2070 sizeof(RF_ComponentLabel_t));
2071 #if 0
2072 printf("raidread_component_label: got component label:\n");
2073 printf("Version: %d\n",component_label->version);
2074 printf("Serial Number: %d\n",component_label->serial_number);
2075 printf("Mod counter: %d\n",component_label->mod_counter);
2076 printf("Row: %d\n", component_label->row);
2077 printf("Column: %d\n", component_label->column);
2078 printf("Num Rows: %d\n", component_label->num_rows);
2079 printf("Num Columns: %d\n", component_label->num_columns);
2080 printf("Clean: %d\n", component_label->clean);
2081 printf("Status: %d\n", component_label->status);
2082 #endif
2083 } else {
2084 printf("Failed to read RAID component label!\n");
2085 }
2086
2087 bp->b_flags = B_INVAL | B_AGE;
2088 brelse(bp);
2089 return(error);
2090 }
2091 /* ARGSUSED */
2092 int
2093 raidwrite_component_label(dev, b_vp, component_label)
2094 dev_t dev;
2095 struct vnode *b_vp;
2096 RF_ComponentLabel_t *component_label;
2097 {
2098 struct buf *bp;
2099 int error;
2100
2101 /* get a block of the appropriate size... */
2102 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2103 bp->b_dev = dev;
2104
2105 /* get our ducks in a row for the write */
2106 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2107 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2108 bp->b_flags = B_BUSY | B_WRITE;
2109 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2110
2111 memset( bp->b_un.b_addr, 0, RF_COMPONENT_INFO_SIZE );
2112
2113 memcpy( bp->b_un.b_addr, component_label, sizeof(RF_ComponentLabel_t));
2114
2115 (*bdevsw[major(bp->b_dev)].d_strategy)(bp);
2116 error = biowait(bp);
2117 bp->b_flags = B_INVAL | B_AGE;
2118 brelse(bp);
2119 if (error) {
2120 printf("Failed to write RAID component info!\n");
2121 }
2122
2123 return(error);
2124 }
2125
2126 void
2127 rf_markalldirty( raidPtr )
2128 RF_Raid_t *raidPtr;
2129 {
2130 RF_ComponentLabel_t c_label;
2131 int r,c;
2132
2133 raidPtr->mod_counter++;
2134 for (r = 0; r < raidPtr->numRow; r++) {
2135 for (c = 0; c < raidPtr->numCol; c++) {
2136 if (raidPtr->Disks[r][c].status != rf_ds_failed) {
2137 raidread_component_label(
2138 raidPtr->Disks[r][c].dev,
2139 raidPtr->raid_cinfo[r][c].ci_vp,
2140 &c_label);
2141 if (c_label.status == rf_ds_spared) {
2142 /* XXX do something special...
2143 but whatever you do, don't
2144 try to access it!! */
2145 } else {
2146 #if 0
2147 c_label.status =
2148 raidPtr->Disks[r][c].status;
2149 raidwrite_component_label(
2150 raidPtr->Disks[r][c].dev,
2151 raidPtr->raid_cinfo[r][c].ci_vp,
2152 &c_label);
2153 #endif
2154 raidmarkdirty(
2155 raidPtr->Disks[r][c].dev,
2156 raidPtr->raid_cinfo[r][c].ci_vp,
2157 raidPtr->mod_counter);
2158 }
2159 }
2160 }
2161 }
2162 /* printf("Component labels marked dirty.\n"); */
2163 #if 0
2164 for( c = 0; c < raidPtr->numSpare ; c++) {
2165 sparecol = raidPtr->numCol + c;
2166 if (raidPtr->Disks[r][sparecol].status == rf_ds_used_spare) {
2167 /*
2168
2169 XXX this is where we get fancy and map this spare
2170 into it's correct spot in the array.
2171
2172 */
2173 /*
2174
2175 we claim this disk is "optimal" if it's
2176 rf_ds_used_spare, as that means it should be
2177 directly substitutable for the disk it replaced.
2178 We note that too...
2179
2180 */
2181
2182 for(i=0;i<raidPtr->numRow;i++) {
2183 for(j=0;j<raidPtr->numCol;j++) {
2184 if ((raidPtr->Disks[i][j].spareRow ==
2185 r) &&
2186 (raidPtr->Disks[i][j].spareCol ==
2187 sparecol)) {
2188 srow = r;
2189 scol = sparecol;
2190 break;
2191 }
2192 }
2193 }
2194
2195 raidread_component_label(
2196 raidPtr->Disks[r][sparecol].dev,
2197 raidPtr->raid_cinfo[r][sparecol].ci_vp,
2198 &c_label);
2199 /* make sure status is noted */
2200 c_label.version = RF_COMPONENT_LABEL_VERSION;
2201 c_label.mod_counter = raidPtr->mod_counter;
2202 c_label.serial_number = raidPtr->serial_number;
2203 c_label.row = srow;
2204 c_label.column = scol;
2205 c_label.num_rows = raidPtr->numRow;
2206 c_label.num_columns = raidPtr->numCol;
2207 c_label.clean = RF_RAID_DIRTY; /* changed in a bit*/
2208 c_label.status = rf_ds_optimal;
2209 raidwrite_component_label(
2210 raidPtr->Disks[r][sparecol].dev,
2211 raidPtr->raid_cinfo[r][sparecol].ci_vp,
2212 &c_label);
2213 raidmarkclean( raidPtr->Disks[r][sparecol].dev,
2214 raidPtr->raid_cinfo[r][sparecol].ci_vp);
2215 }
2216 }
2217
2218 #endif
2219 }
2220
2221
2222 void
2223 rf_update_component_labels( raidPtr )
2224 RF_Raid_t *raidPtr;
2225 {
2226 RF_ComponentLabel_t c_label;
2227 int sparecol;
2228 int r,c;
2229 int i,j;
2230 int srow, scol;
2231
2232 srow = -1;
2233 scol = -1;
2234
2235 /* XXX should do extra checks to make sure things really are clean,
2236 rather than blindly setting the clean bit... */
2237
2238 raidPtr->mod_counter++;
2239
2240 for (r = 0; r < raidPtr->numRow; r++) {
2241 for (c = 0; c < raidPtr->numCol; c++) {
2242 if (raidPtr->Disks[r][c].status == rf_ds_optimal) {
2243 raidread_component_label(
2244 raidPtr->Disks[r][c].dev,
2245 raidPtr->raid_cinfo[r][c].ci_vp,
2246 &c_label);
2247 /* make sure status is noted */
2248 c_label.status = rf_ds_optimal;
2249 raidwrite_component_label(
2250 raidPtr->Disks[r][c].dev,
2251 raidPtr->raid_cinfo[r][c].ci_vp,
2252 &c_label);
2253 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2254 raidmarkclean(
2255 raidPtr->Disks[r][c].dev,
2256 raidPtr->raid_cinfo[r][c].ci_vp,
2257 raidPtr->mod_counter);
2258 }
2259 }
2260 /* else we don't touch it.. */
2261 #if 0
2262 else if (raidPtr->Disks[r][c].status !=
2263 rf_ds_failed) {
2264 raidread_component_label(
2265 raidPtr->Disks[r][c].dev,
2266 raidPtr->raid_cinfo[r][c].ci_vp,
2267 &c_label);
2268 /* make sure status is noted */
2269 c_label.status =
2270 raidPtr->Disks[r][c].status;
2271 raidwrite_component_label(
2272 raidPtr->Disks[r][c].dev,
2273 raidPtr->raid_cinfo[r][c].ci_vp,
2274 &c_label);
2275 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2276 raidmarkclean(
2277 raidPtr->Disks[r][c].dev,
2278 raidPtr->raid_cinfo[r][c].ci_vp,
2279 raidPtr->mod_counter);
2280 }
2281 }
2282 #endif
2283 }
2284 }
2285
2286 for( c = 0; c < raidPtr->numSpare ; c++) {
2287 sparecol = raidPtr->numCol + c;
2288 if (raidPtr->Disks[0][sparecol].status == rf_ds_used_spare) {
2289 /*
2290
2291 we claim this disk is "optimal" if it's
2292 rf_ds_used_spare, as that means it should be
2293 directly substitutable for the disk it replaced.
2294 We note that too...
2295
2296 */
2297
2298 for(i=0;i<raidPtr->numRow;i++) {
2299 for(j=0;j<raidPtr->numCol;j++) {
2300 if ((raidPtr->Disks[i][j].spareRow ==
2301 0) &&
2302 (raidPtr->Disks[i][j].spareCol ==
2303 sparecol)) {
2304 srow = i;
2305 scol = j;
2306 break;
2307 }
2308 }
2309 }
2310
2311 raidread_component_label(
2312 raidPtr->Disks[0][sparecol].dev,
2313 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2314 &c_label);
2315 /* make sure status is noted */
2316 c_label.version = RF_COMPONENT_LABEL_VERSION;
2317 c_label.mod_counter = raidPtr->mod_counter;
2318 c_label.serial_number = raidPtr->serial_number;
2319 c_label.row = srow;
2320 c_label.column = scol;
2321 c_label.num_rows = raidPtr->numRow;
2322 c_label.num_columns = raidPtr->numCol;
2323 c_label.clean = RF_RAID_DIRTY; /* changed in a bit*/
2324 c_label.status = rf_ds_optimal;
2325 raidwrite_component_label(
2326 raidPtr->Disks[0][sparecol].dev,
2327 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2328 &c_label);
2329 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2330 raidmarkclean( raidPtr->Disks[0][sparecol].dev,
2331 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2332 raidPtr->mod_counter);
2333 }
2334 }
2335 }
2336 /* printf("Component labels updated\n"); */
2337 }
2338