Home | History | Annotate | Line # | Download | only in raidframe
rf_netbsdkintf.c revision 1.29.8.1
      1 /*	$NetBSD: rf_netbsdkintf.c,v 1.29.8.1 1999/12/21 23:19:54 wrstuden Exp $	*/
      2 /*-
      3  * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
      4  * All rights reserved.
      5  *
      6  * This code is derived from software contributed to The NetBSD Foundation
      7  * by Greg Oster; Jason R. Thorpe.
      8  *
      9  * Redistribution and use in source and binary forms, with or without
     10  * modification, are permitted provided that the following conditions
     11  * are met:
     12  * 1. Redistributions of source code must retain the above copyright
     13  *    notice, this list of conditions and the following disclaimer.
     14  * 2. Redistributions in binary form must reproduce the above copyright
     15  *    notice, this list of conditions and the following disclaimer in the
     16  *    documentation and/or other materials provided with the distribution.
     17  * 3. All advertising materials mentioning features or use of this software
     18  *    must display the following acknowledgement:
     19  *        This product includes software developed by the NetBSD
     20  *        Foundation, Inc. and its contributors.
     21  * 4. Neither the name of The NetBSD Foundation nor the names of its
     22  *    contributors may be used to endorse or promote products derived
     23  *    from this software without specific prior written permission.
     24  *
     25  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     26  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     27  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     28  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     29  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     30  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     31  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     32  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     33  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     34  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     35  * POSSIBILITY OF SUCH DAMAGE.
     36  */
     37 
     38 /*
     39  * Copyright (c) 1988 University of Utah.
     40  * Copyright (c) 1990, 1993
     41  *      The Regents of the University of California.  All rights reserved.
     42  *
     43  * This code is derived from software contributed to Berkeley by
     44  * the Systems Programming Group of the University of Utah Computer
     45  * Science Department.
     46  *
     47  * Redistribution and use in source and binary forms, with or without
     48  * modification, are permitted provided that the following conditions
     49  * are met:
     50  * 1. Redistributions of source code must retain the above copyright
     51  *    notice, this list of conditions and the following disclaimer.
     52  * 2. Redistributions in binary form must reproduce the above copyright
     53  *    notice, this list of conditions and the following disclaimer in the
     54  *    documentation and/or other materials provided with the distribution.
     55  * 3. All advertising materials mentioning features or use of this software
     56  *    must display the following acknowledgement:
     57  *      This product includes software developed by the University of
     58  *      California, Berkeley and its contributors.
     59  * 4. Neither the name of the University nor the names of its contributors
     60  *    may be used to endorse or promote products derived from this software
     61  *    without specific prior written permission.
     62  *
     63  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     64  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     65  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     66  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     67  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     68  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     69  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     70  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     71  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     72  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     73  * SUCH DAMAGE.
     74  *
     75  * from: Utah $Hdr: cd.c 1.6 90/11/28$
     76  *
     77  *      @(#)cd.c        8.2 (Berkeley) 11/16/93
     78  */
     79 
     80 
     81 
     82 
     83 /*
     84  * Copyright (c) 1995 Carnegie-Mellon University.
     85  * All rights reserved.
     86  *
     87  * Authors: Mark Holland, Jim Zelenka
     88  *
     89  * Permission to use, copy, modify and distribute this software and
     90  * its documentation is hereby granted, provided that both the copyright
     91  * notice and this permission notice appear in all copies of the
     92  * software, derivative works or modified versions, and any portions
     93  * thereof, and that both notices appear in supporting documentation.
     94  *
     95  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
     96  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
     97  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
     98  *
     99  * Carnegie Mellon requests users of this software to return to
    100  *
    101  *  Software Distribution Coordinator  or  Software.Distribution (at) CS.CMU.EDU
    102  *  School of Computer Science
    103  *  Carnegie Mellon University
    104  *  Pittsburgh PA 15213-3890
    105  *
    106  * any improvements or extensions that they make and grant Carnegie the
    107  * rights to redistribute these changes.
    108  */
    109 
    110 /***********************************************************
    111  *
    112  * rf_kintf.c -- the kernel interface routines for RAIDframe
    113  *
    114  ***********************************************************/
    115 
    116 #include <sys/errno.h>
    117 #include <sys/param.h>
    118 #include <sys/pool.h>
    119 #include <sys/queue.h>
    120 #include <sys/disk.h>
    121 #include <sys/device.h>
    122 #include <sys/stat.h>
    123 #include <sys/ioctl.h>
    124 #include <sys/fcntl.h>
    125 #include <sys/systm.h>
    126 #include <sys/namei.h>
    127 #include <sys/vnode.h>
    128 #include <sys/param.h>
    129 #include <sys/types.h>
    130 #include <machine/types.h>
    131 #include <sys/disklabel.h>
    132 #include <sys/conf.h>
    133 #include <sys/lock.h>
    134 #include <sys/buf.h>
    135 #include <sys/user.h>
    136 
    137 #include "raid.h"
    138 #include "rf_raid.h"
    139 #include "rf_raidframe.h"
    140 #include "rf_dag.h"
    141 #include "rf_dagflags.h"
    142 #include "rf_diskqueue.h"
    143 #include "rf_acctrace.h"
    144 #include "rf_etimer.h"
    145 #include "rf_general.h"
    146 #include "rf_debugMem.h"
    147 #include "rf_kintf.h"
    148 #include "rf_options.h"
    149 #include "rf_driver.h"
    150 #include "rf_parityscan.h"
    151 #include "rf_debugprint.h"
    152 #include "rf_threadstuff.h"
    153 
    154 int     rf_kdebug_level = 0;
    155 
    156 #define RFK_BOOT_NONE 0
    157 #define RFK_BOOT_GOOD 1
    158 #define RFK_BOOT_BAD  2
    159 static int rf_kbooted = RFK_BOOT_NONE;
    160 
    161 #ifdef DEBUG
    162 #define db0_printf(a) printf a
    163 #define db_printf(a) if (rf_kdebug_level > 0) printf a
    164 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
    165 #define db2_printf(a) if (rf_kdebug_level > 1) printf a
    166 #define db3_printf(a) if (rf_kdebug_level > 2) printf a
    167 #define db4_printf(a) if (rf_kdebug_level > 3) printf a
    168 #define db5_printf(a) if (rf_kdebug_level > 4) printf a
    169 #else				/* DEBUG */
    170 #define db0_printf(a) printf a
    171 #define db1_printf(a) { }
    172 #define db2_printf(a) { }
    173 #define db3_printf(a) { }
    174 #define db4_printf(a) { }
    175 #define db5_printf(a) { }
    176 #endif				/* DEBUG */
    177 
    178 static RF_Raid_t **raidPtrs;	/* global raid device descriptors */
    179 
    180 RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex)
    181 
    182 static RF_SparetWait_t *rf_sparet_wait_queue;	/* requests to install a
    183 						 * spare table */
    184 static RF_SparetWait_t *rf_sparet_resp_queue;	/* responses from
    185 						 * installation process */
    186 
    187 static struct rf_recon_req *recon_queue = NULL;	/* used to communicate
    188 						 * reconstruction
    189 						 * requests */
    190 
    191 
    192 decl_simple_lock_data(, recon_queue_mutex)
    193 #define LOCK_RECON_Q_MUTEX() simple_lock(&recon_queue_mutex)
    194 #define UNLOCK_RECON_Q_MUTEX() simple_unlock(&recon_queue_mutex)
    195 
    196 /* prototypes */
    197 static void KernelWakeupFunc(struct buf * bp);
    198 static void InitBP(struct buf * bp, struct vnode *, unsigned rw_flag,
    199 		   dev_t dev, RF_SectorNum_t startSect,
    200 		   RF_SectorCount_t numSect, caddr_t buf,
    201 		   void (*cbFunc) (struct buf *), void *cbArg,
    202 		   int logBytesPerSector, struct proc * b_proc);
    203 
    204 #define Dprintf0(s)       if (rf_queueDebug) \
    205      rf_debug_printf(s,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL)
    206 #define Dprintf1(s,a)     if (rf_queueDebug) \
    207      rf_debug_printf(s,a,NULL,NULL,NULL,NULL,NULL,NULL,NULL)
    208 #define Dprintf2(s,a,b)   if (rf_queueDebug) \
    209      rf_debug_printf(s,a,b,NULL,NULL,NULL,NULL,NULL,NULL)
    210 #define Dprintf3(s,a,b,c) if (rf_queueDebug) \
    211      rf_debug_printf(s,a,b,c,NULL,NULL,NULL,NULL,NULL)
    212 
    213 int raidmarkclean(dev_t dev, struct vnode *b_vp, int);
    214 int raidmarkdirty(dev_t dev, struct vnode *b_vp, int);
    215 
    216 void raidattach __P((int));
    217 int raidsize __P((dev_t));
    218 
    219 void    rf_DiskIOComplete(RF_DiskQueue_t *, RF_DiskQueueData_t *, int);
    220 void    rf_CopybackReconstructedData(RF_Raid_t * raidPtr);
    221 static int raidinit __P((dev_t, RF_Raid_t *, int));
    222 
    223 int raidopen __P((dev_t, int, int, struct proc *));
    224 int raidclose __P((dev_t, int, int, struct proc *));
    225 int raidioctl __P((dev_t, u_long, caddr_t, int, struct proc *));
    226 int raidwrite __P((dev_t, struct uio *, int));
    227 int raidread __P((dev_t, struct uio *, int));
    228 void raidstrategy __P((struct buf *));
    229 int raiddump __P((dev_t, daddr_t, caddr_t, size_t));
    230 
    231 int raidwrite_component_label(dev_t, struct vnode *, RF_ComponentLabel_t *);
    232 int raidread_component_label(dev_t, struct vnode *, RF_ComponentLabel_t *);
    233 void rf_update_component_labels( RF_Raid_t *);
    234 /*
    235  * Pilfered from ccd.c
    236  */
    237 
    238 struct raidbuf {
    239 	struct buf rf_buf;	/* new I/O buf.  MUST BE FIRST!!! */
    240 	struct buf *rf_obp;	/* ptr. to original I/O buf */
    241 	int     rf_flags;	/* misc. flags */
    242 	RF_DiskQueueData_t *req;/* the request that this was part of.. */
    243 };
    244 
    245 
    246 #define RAIDGETBUF(rs) pool_get(&(rs)->sc_cbufpool, PR_NOWAIT)
    247 #define	RAIDPUTBUF(rs, cbp) pool_put(&(rs)->sc_cbufpool, cbp)
    248 
    249 /* XXX Not sure if the following should be replacing the raidPtrs above,
    250    or if it should be used in conjunction with that... */
    251 
    252 struct raid_softc {
    253 	int     sc_flags;	/* flags */
    254 	int     sc_cflags;	/* configuration flags */
    255 	size_t  sc_size;        /* size of the raid device */
    256 	dev_t   sc_dev;	        /* our device.. */
    257 	char    sc_xname[20];	/* XXX external name */
    258 	struct disk sc_dkdev;	/* generic disk device info */
    259 	struct pool sc_cbufpool;	/* component buffer pool */
    260 };
    261 /* sc_flags */
    262 #define RAIDF_INITED	0x01	/* unit has been initialized */
    263 #define RAIDF_WLABEL	0x02	/* label area is writable */
    264 #define RAIDF_LABELLING	0x04	/* unit is currently being labelled */
    265 #define RAIDF_WANTED	0x40	/* someone is waiting to obtain a lock */
    266 #define RAIDF_LOCKED	0x80	/* unit is locked */
    267 
    268 #define	raidunit(x)	DISKUNIT(x)
    269 static int numraid = 0;
    270 
    271 /*
    272  * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
    273  * Be aware that large numbers can allow the driver to consume a lot of
    274  * kernel memory, especially on writes, and in degraded mode reads.
    275  *
    276  * For example: with a stripe width of 64 blocks (32k) and 5 disks,
    277  * a single 64K write will typically require 64K for the old data,
    278  * 64K for the old parity, and 64K for the new parity, for a total
    279  * of 192K (if the parity buffer is not re-used immediately).
    280  * Even it if is used immedately, that's still 128K, which when multiplied
    281  * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
    282  *
    283  * Now in degraded mode, for example, a 64K read on the above setup may
    284  * require data reconstruction, which will require *all* of the 4 remaining
    285  * disks to participate -- 4 * 32K/disk == 128K again.
    286  */
    287 
    288 #ifndef RAIDOUTSTANDING
    289 #define RAIDOUTSTANDING   6
    290 #endif
    291 
    292 #define RAIDLABELDEV(dev)	\
    293 	(MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
    294 
    295 /* declared here, and made public, for the benefit of KVM stuff.. */
    296 struct raid_softc *raid_softc;
    297 
    298 static void raidgetdefaultlabel __P((RF_Raid_t *, struct raid_softc *,
    299 				     struct disklabel *));
    300 static void raidgetdisklabel __P((dev_t));
    301 static void raidmakedisklabel __P((struct raid_softc *));
    302 
    303 static int raidlock __P((struct raid_softc *));
    304 static void raidunlock __P((struct raid_softc *));
    305 int raidlookup __P((char *, struct proc * p, struct vnode **));
    306 
    307 static void rf_markalldirty __P((RF_Raid_t *));
    308 
    309 void
    310 raidattach(num)
    311 	int     num;
    312 {
    313 	int raidID;
    314 	int i, rc;
    315 
    316 #ifdef DEBUG
    317 	printf("raidattach: Asked for %d units\n", num);
    318 #endif
    319 
    320 	if (num <= 0) {
    321 #ifdef DIAGNOSTIC
    322 		panic("raidattach: count <= 0");
    323 #endif
    324 		return;
    325 	}
    326 	/* This is where all the initialization stuff gets done. */
    327 
    328 	/* Make some space for requested number of units... */
    329 
    330 	RF_Calloc(raidPtrs, num, sizeof(RF_Raid_t *), (RF_Raid_t **));
    331 	if (raidPtrs == NULL) {
    332 		panic("raidPtrs is NULL!!\n");
    333 	}
    334 
    335 	rc = rf_mutex_init(&rf_sparet_wait_mutex);
    336 	if (rc) {
    337 		RF_PANIC();
    338 	}
    339 
    340 	rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
    341 	recon_queue = NULL;
    342 
    343 	for (i = 0; i < numraid; i++)
    344 		raidPtrs[i] = NULL;
    345 	rc = rf_BootRaidframe();
    346 	if (rc == 0)
    347 		printf("Kernelized RAIDframe activated\n");
    348 	else
    349 		panic("Serious error booting RAID!!\n");
    350 
    351 	rf_kbooted = RFK_BOOT_GOOD;
    352 
    353 	/* put together some datastructures like the CCD device does.. This
    354 	 * lets us lock the device and what-not when it gets opened. */
    355 
    356 	raid_softc = (struct raid_softc *)
    357 	    malloc(num * sizeof(struct raid_softc),
    358 	    M_RAIDFRAME, M_NOWAIT);
    359 	if (raid_softc == NULL) {
    360 		printf("WARNING: no memory for RAIDframe driver\n");
    361 		return;
    362 	}
    363 	numraid = num;
    364 	bzero(raid_softc, num * sizeof(struct raid_softc));
    365 
    366 	for (raidID = 0; raidID < num; raidID++) {
    367 		RF_Calloc(raidPtrs[raidID], 1, sizeof(RF_Raid_t),
    368 			  (RF_Raid_t *));
    369 		if (raidPtrs[raidID] == NULL) {
    370 			printf("raidPtrs[%d] is NULL\n", raidID);
    371 		}
    372 	}
    373 }
    374 
    375 
    376 int
    377 raidsize(dev)
    378 	dev_t   dev;
    379 {
    380 	struct raid_softc *rs;
    381 	struct disklabel *lp;
    382 	int     part, unit, omask, size;
    383 
    384 	unit = raidunit(dev);
    385 	if (unit >= numraid)
    386 		return (-1);
    387 	rs = &raid_softc[unit];
    388 
    389 	if ((rs->sc_flags & RAIDF_INITED) == 0)
    390 		return (-1);
    391 
    392 	part = DISKPART(dev);
    393 	omask = rs->sc_dkdev.dk_openmask & (1 << part);
    394 	lp = rs->sc_dkdev.dk_label;
    395 
    396 	if (omask == 0 && raidopen(dev, 0, S_IFBLK, curproc))
    397 		return (-1);
    398 
    399 	if (lp->d_partitions[part].p_fstype != FS_SWAP)
    400 		size = -1;
    401 	else
    402 		size = lp->d_partitions[part].p_size *
    403 		    (lp->d_secsize / DEV_BSIZE);
    404 
    405 	if (omask == 0 && raidclose(dev, 0, S_IFBLK, curproc))
    406 		return (-1);
    407 
    408 	return (size);
    409 
    410 }
    411 
    412 int
    413 raiddump(dev, blkno, va, size)
    414 	dev_t   dev;
    415 	daddr_t blkno;
    416 	caddr_t va;
    417 	size_t  size;
    418 {
    419 	/* Not implemented. */
    420 	return ENXIO;
    421 }
    422 /* ARGSUSED */
    423 int
    424 raidopen(dev, flags, fmt, p)
    425 	dev_t   dev;
    426 	int     flags, fmt;
    427 	struct proc *p;
    428 {
    429 	int     unit = raidunit(dev);
    430 	struct raid_softc *rs;
    431 	struct disklabel *lp;
    432 	int     part, pmask;
    433 	int     error = 0;
    434 
    435 	if (unit >= numraid)
    436 		return (ENXIO);
    437 	rs = &raid_softc[unit];
    438 
    439 	if ((error = raidlock(rs)) != 0)
    440 		return (error);
    441 	lp = rs->sc_dkdev.dk_label;
    442 
    443 	part = DISKPART(dev);
    444 	pmask = (1 << part);
    445 
    446 	db1_printf(("Opening raid device number: %d partition: %d\n",
    447 		unit, part));
    448 
    449 
    450 	if ((rs->sc_flags & RAIDF_INITED) &&
    451 	    (rs->sc_dkdev.dk_openmask == 0))
    452 		raidgetdisklabel(dev);
    453 
    454 	/* make sure that this partition exists */
    455 
    456 	if (part != RAW_PART) {
    457 		db1_printf(("Not a raw partition..\n"));
    458 		if (((rs->sc_flags & RAIDF_INITED) == 0) ||
    459 		    ((part >= lp->d_npartitions) ||
    460 			(lp->d_partitions[part].p_fstype == FS_UNUSED))) {
    461 			error = ENXIO;
    462 			raidunlock(rs);
    463 			db1_printf(("Bailing out...\n"));
    464 			return (error);
    465 		}
    466 	}
    467 	/* Prevent this unit from being unconfigured while open. */
    468 	switch (fmt) {
    469 	case S_IFCHR:
    470 		rs->sc_dkdev.dk_copenmask |= pmask;
    471 		break;
    472 
    473 	case S_IFBLK:
    474 		rs->sc_dkdev.dk_bopenmask |= pmask;
    475 		break;
    476 	}
    477 
    478 	if ((rs->sc_dkdev.dk_openmask == 0) &&
    479 	    ((rs->sc_flags & RAIDF_INITED) != 0)) {
    480 		/* First one... mark things as dirty... Note that we *MUST*
    481 		 have done a configure before this.  I DO NOT WANT TO BE
    482 		 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
    483 		 THAT THEY BELONG TOGETHER!!!!! */
    484 		/* XXX should check to see if we're only open for reading
    485 		   here... If so, we needn't do this, but then need some
    486 		   other way of keeping track of what's happened.. */
    487 
    488 		rf_markalldirty( raidPtrs[unit] );
    489 	}
    490 
    491 
    492 	rs->sc_dkdev.dk_openmask =
    493 	    rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
    494 
    495 	raidunlock(rs);
    496 
    497 	return (error);
    498 
    499 
    500 }
    501 /* ARGSUSED */
    502 int
    503 raidclose(dev, flags, fmt, p)
    504 	dev_t   dev;
    505 	int     flags, fmt;
    506 	struct proc *p;
    507 {
    508 	int     unit = raidunit(dev);
    509 	struct raid_softc *rs;
    510 	int     error = 0;
    511 	int     part;
    512 
    513 	if (unit >= numraid)
    514 		return (ENXIO);
    515 	rs = &raid_softc[unit];
    516 
    517 	if ((error = raidlock(rs)) != 0)
    518 		return (error);
    519 
    520 	part = DISKPART(dev);
    521 
    522 	/* ...that much closer to allowing unconfiguration... */
    523 	switch (fmt) {
    524 	case S_IFCHR:
    525 		rs->sc_dkdev.dk_copenmask &= ~(1 << part);
    526 		break;
    527 
    528 	case S_IFBLK:
    529 		rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
    530 		break;
    531 	}
    532 	rs->sc_dkdev.dk_openmask =
    533 	    rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
    534 
    535 	if ((rs->sc_dkdev.dk_openmask == 0) &&
    536 	    ((rs->sc_flags & RAIDF_INITED) != 0)) {
    537 		/* Last one... device is not unconfigured yet.
    538 		   Device shutdown has taken care of setting the
    539 		   clean bits if RAIDF_INITED is not set
    540 		   mark things as clean... */
    541 		rf_update_component_labels( raidPtrs[unit] );
    542 	}
    543 
    544 	raidunlock(rs);
    545 	return (0);
    546 
    547 }
    548 
    549 void
    550 raidstrategy(bp)
    551 	register struct buf *bp;
    552 {
    553 	register int s;
    554 
    555 	unsigned int raidID = raidunit(bp->b_dev);
    556 	RF_Raid_t *raidPtr;
    557 	struct raid_softc *rs = &raid_softc[raidID];
    558 	struct disklabel *lp;
    559 	int     wlabel;
    560 
    561 #if 0
    562 	db1_printf(("Strategy: 0x%x 0x%x\n", bp, bp->b_data));
    563 	db1_printf(("Strategy(2): bp->b_bufsize%d\n", (int) bp->b_bufsize));
    564 	db1_printf(("bp->b_count=%d\n", (int) bp->b_bcount));
    565 	db1_printf(("bp->b_resid=%d\n", (int) bp->b_resid));
    566 	db1_printf(("bp->b_blkno=%d\n", (int) bp->b_blkno));
    567 
    568 	if (bp->b_flags & B_READ)
    569 		db1_printf(("READ\n"));
    570 	else
    571 		db1_printf(("WRITE\n"));
    572 #endif
    573 	if (rf_kbooted != RFK_BOOT_GOOD)
    574 		return;
    575 	if (raidID >= numraid || !raidPtrs[raidID]) {
    576 		bp->b_error = ENODEV;
    577 		bp->b_flags |= B_ERROR;
    578 		bp->b_resid = bp->b_bcount;
    579 		biodone(bp);
    580 		return;
    581 	}
    582 	raidPtr = raidPtrs[raidID];
    583 	if (!raidPtr->valid) {
    584 		bp->b_error = ENODEV;
    585 		bp->b_flags |= B_ERROR;
    586 		bp->b_resid = bp->b_bcount;
    587 		biodone(bp);
    588 		return;
    589 	}
    590 	if (bp->b_bcount == 0) {
    591 		db1_printf(("b_bcount is zero..\n"));
    592 		biodone(bp);
    593 		return;
    594 	}
    595 	lp = rs->sc_dkdev.dk_label;
    596 
    597 	/*
    598 	 * Do bounds checking and adjust transfer.  If there's an
    599 	 * error, the bounds check will flag that for us.
    600 	 */
    601 
    602 	wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
    603 	if (DISKPART(bp->b_dev) != RAW_PART)
    604 		if (bounds_check_with_label(bp, lp, wlabel) <= 0) {
    605 			db1_printf(("Bounds check failed!!:%d %d\n",
    606 				(int) bp->b_blkno, (int) wlabel));
    607 			biodone(bp);
    608 			return;
    609 		}
    610 	s = splbio();		/* XXX Needed? */
    611 	db1_printf(("Beginning strategy...\n"));
    612 
    613 	bp->b_resid = 0;
    614 	bp->b_error = rf_DoAccessKernel(raidPtrs[raidID], bp,
    615 	    NULL, NULL, NULL);
    616 	if (bp->b_error) {
    617 		bp->b_flags |= B_ERROR;
    618 		db1_printf(("bp->b_flags HAS B_ERROR SET!!!: %d\n",
    619 			bp->b_error));
    620 	}
    621 	splx(s);
    622 #if 0
    623 	db1_printf(("Strategy exiting: 0x%x 0x%x %d %d\n",
    624 		bp, bp->b_data,
    625 		(int) bp->b_bcount, (int) bp->b_resid));
    626 #endif
    627 }
    628 /* ARGSUSED */
    629 int
    630 raidread(dev, uio, flags)
    631 	dev_t   dev;
    632 	struct uio *uio;
    633 	int     flags;
    634 {
    635 	int     unit = raidunit(dev);
    636 	struct raid_softc *rs;
    637 	int     part;
    638 
    639 	if (unit >= numraid)
    640 		return (ENXIO);
    641 	rs = &raid_softc[unit];
    642 
    643 	if ((rs->sc_flags & RAIDF_INITED) == 0)
    644 		return (ENXIO);
    645 	part = DISKPART(dev);
    646 
    647 	db1_printf(("raidread: unit: %d partition: %d\n", unit, part));
    648 
    649 	return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
    650 
    651 }
    652 /* ARGSUSED */
    653 int
    654 raidwrite(dev, uio, flags)
    655 	dev_t   dev;
    656 	struct uio *uio;
    657 	int     flags;
    658 {
    659 	int     unit = raidunit(dev);
    660 	struct raid_softc *rs;
    661 
    662 	if (unit >= numraid)
    663 		return (ENXIO);
    664 	rs = &raid_softc[unit];
    665 
    666 	if ((rs->sc_flags & RAIDF_INITED) == 0)
    667 		return (ENXIO);
    668 	db1_printf(("raidwrite\n"));
    669 	return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
    670 
    671 }
    672 
    673 int
    674 raidioctl(dev, cmd, data, flag, p)
    675 	dev_t   dev;
    676 	u_long  cmd;
    677 	caddr_t data;
    678 	int     flag;
    679 	struct proc *p;
    680 {
    681 	int     unit = raidunit(dev);
    682 	int     error = 0;
    683 	int     part, pmask;
    684 	struct raid_softc *rs;
    685 	RF_Config_t *k_cfg, *u_cfg;
    686 	u_char *specific_buf;
    687 	int retcode = 0;
    688 	int row;
    689 	int column;
    690 	int s;
    691 	struct rf_recon_req *rrcopy, *rr;
    692 	RF_ComponentLabel_t *component_label;
    693 	RF_ComponentLabel_t ci_label;
    694 	RF_ComponentLabel_t **c_label_ptr;
    695 	RF_SingleComponent_t *sparePtr,*componentPtr;
    696 	RF_SingleComponent_t hot_spare;
    697 	RF_SingleComponent_t component;
    698 
    699 	if (unit >= numraid)
    700 		return (ENXIO);
    701 	rs = &raid_softc[unit];
    702 
    703 	db1_printf(("raidioctl: %d %d %d %d\n", (int) dev,
    704 		(int) DISKPART(dev), (int) unit, (int) cmd));
    705 
    706 	/* Must be open for writes for these commands... */
    707 	switch (cmd) {
    708 	case DIOCSDINFO:
    709 	case DIOCWDINFO:
    710 	case DIOCWLABEL:
    711 		if ((flag & FWRITE) == 0)
    712 			return (EBADF);
    713 	}
    714 
    715 	/* Must be initialized for these... */
    716 	switch (cmd) {
    717 	case DIOCGDINFO:
    718 	case DIOCSDINFO:
    719 	case DIOCWDINFO:
    720 	case DIOCGPART:
    721 	case DIOCWLABEL:
    722 	case DIOCGDEFLABEL:
    723 	case RAIDFRAME_SHUTDOWN:
    724 	case RAIDFRAME_REWRITEPARITY:
    725 	case RAIDFRAME_GET_INFO:
    726 	case RAIDFRAME_RESET_ACCTOTALS:
    727 	case RAIDFRAME_GET_ACCTOTALS:
    728 	case RAIDFRAME_KEEP_ACCTOTALS:
    729 	case RAIDFRAME_GET_SIZE:
    730 	case RAIDFRAME_FAIL_DISK:
    731 	case RAIDFRAME_COPYBACK:
    732 	case RAIDFRAME_CHECKRECON:
    733 	case RAIDFRAME_GET_COMPONENT_LABEL:
    734 	case RAIDFRAME_SET_COMPONENT_LABEL:
    735 	case RAIDFRAME_ADD_HOT_SPARE:
    736 	case RAIDFRAME_REMOVE_HOT_SPARE:
    737 	case RAIDFRAME_INIT_LABELS:
    738 	case RAIDFRAME_REBUILD_IN_PLACE:
    739 	case RAIDFRAME_CHECK_PARITY:
    740 		if ((rs->sc_flags & RAIDF_INITED) == 0)
    741 			return (ENXIO);
    742 	}
    743 
    744 	switch (cmd) {
    745 
    746 
    747 		/* configure the system */
    748 	case RAIDFRAME_CONFIGURE:
    749 
    750 		db3_printf(("rf_ioctl: RAIDFRAME_CONFIGURE\n"));
    751 		/* copy-in the configuration information */
    752 		/* data points to a pointer to the configuration structure */
    753 		u_cfg = *((RF_Config_t **) data);
    754 		RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
    755 		if (k_cfg == NULL) {
    756 			db3_printf(("rf_ioctl: ENOMEM for config. Code is %d\n", retcode));
    757 			return (ENOMEM);
    758 		}
    759 		retcode = copyin((caddr_t) u_cfg, (caddr_t) k_cfg,
    760 		    sizeof(RF_Config_t));
    761 		if (retcode) {
    762 			db3_printf(("rf_ioctl: retcode=%d copyin.1\n",
    763 				retcode));
    764 			return (retcode);
    765 		}
    766 		/* allocate a buffer for the layout-specific data, and copy it
    767 		 * in */
    768 		if (k_cfg->layoutSpecificSize) {
    769 			if (k_cfg->layoutSpecificSize > 10000) {
    770 				/* sanity check */
    771 				db3_printf(("rf_ioctl: EINVAL %d\n", retcode));
    772 				return (EINVAL);
    773 			}
    774 			RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
    775 			    (u_char *));
    776 			if (specific_buf == NULL) {
    777 				RF_Free(k_cfg, sizeof(RF_Config_t));
    778 				db3_printf(("rf_ioctl: ENOMEM %d\n", retcode));
    779 				return (ENOMEM);
    780 			}
    781 			retcode = copyin(k_cfg->layoutSpecific,
    782 			    (caddr_t) specific_buf,
    783 			    k_cfg->layoutSpecificSize);
    784 			if (retcode) {
    785 				db3_printf(("rf_ioctl: retcode=%d copyin.2\n",
    786 					retcode));
    787 				return (retcode);
    788 			}
    789 		} else
    790 			specific_buf = NULL;
    791 		k_cfg->layoutSpecific = specific_buf;
    792 
    793 		/* should do some kind of sanity check on the configuration.
    794 		 * Store the sum of all the bytes in the last byte? */
    795 
    796 		/* configure the system */
    797 
    798 		raidPtrs[unit]->raidid = unit;
    799 
    800 		retcode = rf_Configure(raidPtrs[unit], k_cfg);
    801 
    802 		/* allow this many simultaneous IO's to this RAID device */
    803 		raidPtrs[unit]->openings = RAIDOUTSTANDING;
    804 
    805 		if (retcode == 0) {
    806 			retcode = raidinit(dev, raidPtrs[unit], unit);
    807 			rf_markalldirty( raidPtrs[unit] );
    808 		}
    809 		/* free the buffers.  No return code here. */
    810 		if (k_cfg->layoutSpecificSize) {
    811 			RF_Free(specific_buf, k_cfg->layoutSpecificSize);
    812 		}
    813 		RF_Free(k_cfg, sizeof(RF_Config_t));
    814 
    815 		db3_printf(("rf_ioctl: retcode=%d RAIDFRAME_CONFIGURE\n",
    816 			retcode));
    817 
    818 		return (retcode);
    819 
    820 		/* shutdown the system */
    821 	case RAIDFRAME_SHUTDOWN:
    822 
    823 		if ((error = raidlock(rs)) != 0)
    824 			return (error);
    825 
    826 		/*
    827 		 * If somebody has a partition mounted, we shouldn't
    828 		 * shutdown.
    829 		 */
    830 
    831 		part = DISKPART(dev);
    832 		pmask = (1 << part);
    833 		if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
    834 		    ((rs->sc_dkdev.dk_bopenmask & pmask) &&
    835 			(rs->sc_dkdev.dk_copenmask & pmask))) {
    836 			raidunlock(rs);
    837 			return (EBUSY);
    838 		}
    839 
    840 		if (rf_debugKernelAccess) {
    841 			printf("call shutdown\n");
    842 		}
    843 
    844 		retcode = rf_Shutdown(raidPtrs[unit]);
    845 
    846 		db1_printf(("Done main shutdown\n"));
    847 
    848 		pool_destroy(&rs->sc_cbufpool);
    849 		db1_printf(("Done freeing component buffer freelist\n"));
    850 
    851 		/* It's no longer initialized... */
    852 		rs->sc_flags &= ~RAIDF_INITED;
    853 
    854 		/* Detach the disk. */
    855 		disk_detach(&rs->sc_dkdev);
    856 
    857 		raidunlock(rs);
    858 
    859 		return (retcode);
    860 	case RAIDFRAME_GET_COMPONENT_LABEL:
    861 		c_label_ptr = (RF_ComponentLabel_t **) data;
    862 		/* need to read the component label for the disk indicated
    863 		   by row,column in component_label
    864 		   XXX need to sanity check these values!!!
    865 		   */
    866 
    867 		/* For practice, let's get it directly fromdisk, rather
    868 		   than from the in-core copy */
    869 		RF_Malloc( component_label, sizeof( RF_ComponentLabel_t ),
    870 			   (RF_ComponentLabel_t *));
    871 		if (component_label == NULL)
    872 			return (ENOMEM);
    873 
    874 		bzero((char *) component_label, sizeof(RF_ComponentLabel_t));
    875 
    876 		retcode = copyin( *c_label_ptr, component_label,
    877 				  sizeof(RF_ComponentLabel_t));
    878 
    879 		if (retcode) {
    880 			return(retcode);
    881 		}
    882 
    883 		row = component_label->row;
    884 		column = component_label->column;
    885 
    886 		if ((row < 0) || (row >= raidPtrs[unit]->numRow) ||
    887 		    (column < 0) || (column >= raidPtrs[unit]->numCol)) {
    888 			return(EINVAL);
    889 		}
    890 
    891 		raidread_component_label(
    892                               raidPtrs[unit]->Disks[row][column].dev,
    893 			      raidPtrs[unit]->raid_cinfo[row][column].ci_vp,
    894 			      component_label );
    895 
    896 		retcode = copyout((caddr_t) component_label,
    897 				  (caddr_t) *c_label_ptr,
    898 				  sizeof(RF_ComponentLabel_t));
    899 		RF_Free( component_label, sizeof(RF_ComponentLabel_t));
    900 		return (retcode);
    901 
    902 	case RAIDFRAME_SET_COMPONENT_LABEL:
    903 		component_label = (RF_ComponentLabel_t *) data;
    904 
    905 		/* XXX check the label for valid stuff... */
    906 		/* Note that some things *should not* get modified --
    907 		   the user should be re-initing the labels instead of
    908 		   trying to patch things.
    909 		   */
    910 
    911 		printf("Got component label:\n");
    912 		printf("Version: %d\n",component_label->version);
    913 		printf("Serial Number: %d\n",component_label->serial_number);
    914 		printf("Mod counter: %d\n",component_label->mod_counter);
    915 		printf("Row: %d\n", component_label->row);
    916 		printf("Column: %d\n", component_label->column);
    917 		printf("Num Rows: %d\n", component_label->num_rows);
    918 		printf("Num Columns: %d\n", component_label->num_columns);
    919 		printf("Clean: %d\n", component_label->clean);
    920 		printf("Status: %d\n", component_label->status);
    921 
    922 		row = component_label->row;
    923 		column = component_label->column;
    924 
    925 		if ((row < 0) || (row >= raidPtrs[unit]->numRow) ||
    926 		    (column < 0) || (column >= raidPtrs[unit]->numCol)) {
    927 			return(EINVAL);
    928 		}
    929 
    930 		/* XXX this isn't allowed to do anything for now :-) */
    931 #if 0
    932 		raidwrite_component_label(
    933                             raidPtrs[unit]->Disks[row][column].dev,
    934 			    raidPtrs[unit]->raid_cinfo[row][column].ci_vp,
    935 			    component_label );
    936 #endif
    937 		return (0);
    938 
    939 	case RAIDFRAME_INIT_LABELS:
    940 		component_label = (RF_ComponentLabel_t *) data;
    941 		/*
    942 		   we only want the serial number from
    943 		   the above.  We get all the rest of the information
    944 		   from the config that was used to create this RAID
    945 		   set.
    946 		   */
    947 
    948 		raidPtrs[unit]->serial_number = component_label->serial_number;
    949 		/* current version number */
    950 		ci_label.version = RF_COMPONENT_LABEL_VERSION;
    951 		ci_label.serial_number = component_label->serial_number;
    952 		ci_label.mod_counter = raidPtrs[unit]->mod_counter;
    953 		ci_label.num_rows = raidPtrs[unit]->numRow;
    954 		ci_label.num_columns = raidPtrs[unit]->numCol;
    955 		ci_label.clean = RF_RAID_DIRTY; /* not clean */
    956 		ci_label.status = rf_ds_optimal; /* "It's good!" */
    957 
    958 		for(row=0;row<raidPtrs[unit]->numRow;row++) {
    959 			ci_label.row = row;
    960 			for(column=0;column<raidPtrs[unit]->numCol;column++) {
    961 				ci_label.column = column;
    962 				raidwrite_component_label(
    963 				  raidPtrs[unit]->Disks[row][column].dev,
    964 				  raidPtrs[unit]->raid_cinfo[row][column].ci_vp,
    965 				  &ci_label );
    966 			}
    967 		}
    968 
    969 		return (retcode);
    970 
    971 		/* initialize all parity */
    972 	case RAIDFRAME_REWRITEPARITY:
    973 
    974 		if (raidPtrs[unit]->Layout.map->faultsTolerated == 0) {
    975 			/* Parity for RAID 0 is trivially correct */
    976 			raidPtrs[unit]->parity_good = RF_RAID_CLEAN;
    977 			return(0);
    978 		}
    979 
    980 		/* borrow the thread of the requesting process */
    981 
    982 		s = splbio();
    983 		retcode = rf_RewriteParity(raidPtrs[unit]);
    984 		splx(s);
    985 		/* return I/O Error if the parity rewrite fails */
    986 
    987 		if (retcode) {
    988 			retcode = EIO;
    989 		} else {
    990 			/* set the clean bit!  If we shutdown correctly,
    991 			 the clean bit on each component label will get
    992 			 set */
    993 			raidPtrs[unit]->parity_good = RF_RAID_CLEAN;
    994 		}
    995 		return (retcode);
    996 
    997 
    998 	case RAIDFRAME_ADD_HOT_SPARE:
    999 		sparePtr = (RF_SingleComponent_t *) data;
   1000 		memcpy( &hot_spare, sparePtr, sizeof(RF_SingleComponent_t));
   1001 		printf("Adding spare\n");
   1002 		retcode = rf_add_hot_spare(raidPtrs[unit], &hot_spare);
   1003 		return(retcode);
   1004 
   1005 	case RAIDFRAME_REMOVE_HOT_SPARE:
   1006 		return(retcode);
   1007 
   1008 	case RAIDFRAME_REBUILD_IN_PLACE:
   1009 
   1010 		if (raidPtrs[unit]->Layout.map->faultsTolerated == 0) {
   1011 			/* Can't do this on a RAID 0!! */
   1012 			return(EINVAL);
   1013 		}
   1014 
   1015 		componentPtr = (RF_SingleComponent_t *) data;
   1016 		memcpy( &component, componentPtr,
   1017 			sizeof(RF_SingleComponent_t));
   1018 		row = component.row;
   1019 		column = component.column;
   1020 		printf("Rebuild: %d %d\n",row, column);
   1021 		if ((row < 0) || (row >= raidPtrs[unit]->numRow) ||
   1022 		    (column < 0) || (column >= raidPtrs[unit]->numCol)) {
   1023 			return(EINVAL);
   1024 		}
   1025 		printf("Attempting a rebuild in place\n");
   1026 		s = splbio();
   1027 		retcode = rf_ReconstructInPlace(raidPtrs[unit], row, column);
   1028 		splx(s);
   1029 		return(retcode);
   1030 
   1031 	case RAIDFRAME_GET_INFO:
   1032 		{
   1033 			RF_Raid_t *raid = raidPtrs[unit];
   1034 			RF_DeviceConfig_t *cfg, **ucfgp;
   1035 			int     i, j, d;
   1036 
   1037 			if (!raid->valid)
   1038 				return (ENODEV);
   1039 			ucfgp = (RF_DeviceConfig_t **) data;
   1040 			RF_Malloc(cfg, sizeof(RF_DeviceConfig_t),
   1041 				  (RF_DeviceConfig_t *));
   1042 			if (cfg == NULL)
   1043 				return (ENOMEM);
   1044 			bzero((char *) cfg, sizeof(RF_DeviceConfig_t));
   1045 			cfg->rows = raid->numRow;
   1046 			cfg->cols = raid->numCol;
   1047 			cfg->ndevs = raid->numRow * raid->numCol;
   1048 			if (cfg->ndevs >= RF_MAX_DISKS) {
   1049 				cfg->ndevs = 0;
   1050 				return (ENOMEM);
   1051 			}
   1052 			cfg->nspares = raid->numSpare;
   1053 			if (cfg->nspares >= RF_MAX_DISKS) {
   1054 				cfg->nspares = 0;
   1055 				return (ENOMEM);
   1056 			}
   1057 			cfg->maxqdepth = raid->maxQueueDepth;
   1058 			d = 0;
   1059 			for (i = 0; i < cfg->rows; i++) {
   1060 				for (j = 0; j < cfg->cols; j++) {
   1061 					cfg->devs[d] = raid->Disks[i][j];
   1062 					d++;
   1063 				}
   1064 			}
   1065 			for (j = cfg->cols, i = 0; i < cfg->nspares; i++, j++) {
   1066 				cfg->spares[i] = raid->Disks[0][j];
   1067 			}
   1068 			retcode = copyout((caddr_t) cfg, (caddr_t) * ucfgp,
   1069 					  sizeof(RF_DeviceConfig_t));
   1070 			RF_Free(cfg, sizeof(RF_DeviceConfig_t));
   1071 
   1072 			return (retcode);
   1073 		}
   1074 		break;
   1075 	case RAIDFRAME_CHECK_PARITY:
   1076 		*(int *) data = raidPtrs[unit]->parity_good;
   1077 		return (0);
   1078 	case RAIDFRAME_RESET_ACCTOTALS:
   1079 		{
   1080 			RF_Raid_t *raid = raidPtrs[unit];
   1081 
   1082 			bzero(&raid->acc_totals, sizeof(raid->acc_totals));
   1083 			return (0);
   1084 		}
   1085 		break;
   1086 
   1087 	case RAIDFRAME_GET_ACCTOTALS:
   1088 		{
   1089 			RF_AccTotals_t *totals = (RF_AccTotals_t *) data;
   1090 			RF_Raid_t *raid = raidPtrs[unit];
   1091 
   1092 			*totals = raid->acc_totals;
   1093 			return (0);
   1094 		}
   1095 		break;
   1096 
   1097 	case RAIDFRAME_KEEP_ACCTOTALS:
   1098 		{
   1099 			RF_Raid_t *raid = raidPtrs[unit];
   1100 			int    *keep = (int *) data;
   1101 
   1102 			raid->keep_acc_totals = *keep;
   1103 			return (0);
   1104 		}
   1105 		break;
   1106 
   1107 	case RAIDFRAME_GET_SIZE:
   1108 		*(int *) data = raidPtrs[unit]->totalSectors;
   1109 		return (0);
   1110 
   1111 #define RAIDFRAME_RECON 1
   1112 		/* XXX The above should probably be set somewhere else!! GO */
   1113 #if RAIDFRAME_RECON > 0
   1114 
   1115 		/* fail a disk & optionally start reconstruction */
   1116 	case RAIDFRAME_FAIL_DISK:
   1117 
   1118 		if (raidPtrs[unit]->Layout.map->faultsTolerated == 0) {
   1119 			/* Can't do this on a RAID 0!! */
   1120 			return(EINVAL);
   1121 		}
   1122 
   1123 		rr = (struct rf_recon_req *) data;
   1124 
   1125 		if (rr->row < 0 || rr->row >= raidPtrs[unit]->numRow
   1126 		    || rr->col < 0 || rr->col >= raidPtrs[unit]->numCol)
   1127 			return (EINVAL);
   1128 
   1129 		printf("raid%d: Failing the disk: row: %d col: %d\n",
   1130 		       unit, rr->row, rr->col);
   1131 
   1132 		/* make a copy of the recon request so that we don't rely on
   1133 		 * the user's buffer */
   1134 		RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
   1135 		bcopy(rr, rrcopy, sizeof(*rr));
   1136 		rrcopy->raidPtr = (void *) raidPtrs[unit];
   1137 
   1138 		LOCK_RECON_Q_MUTEX();
   1139 		rrcopy->next = recon_queue;
   1140 		recon_queue = rrcopy;
   1141 		wakeup(&recon_queue);
   1142 		UNLOCK_RECON_Q_MUTEX();
   1143 
   1144 		return (0);
   1145 
   1146 		/* invoke a copyback operation after recon on whatever disk
   1147 		 * needs it, if any */
   1148 	case RAIDFRAME_COPYBACK:
   1149 
   1150 		if (raidPtrs[unit]->Layout.map->faultsTolerated == 0) {
   1151 			/* This makes no sense on a RAID 0!! */
   1152 			return(EINVAL);
   1153 		}
   1154 
   1155 		/* borrow the current thread to get this done */
   1156 
   1157 		s = splbio();
   1158 		rf_CopybackReconstructedData(raidPtrs[unit]);
   1159 		splx(s);
   1160 		return (0);
   1161 
   1162 		/* return the percentage completion of reconstruction */
   1163 	case RAIDFRAME_CHECKRECON:
   1164 		if (raidPtrs[unit]->Layout.map->faultsTolerated == 0) {
   1165 			/* This makes no sense on a RAID 0 */
   1166 			return(EINVAL);
   1167 		}
   1168 
   1169 		row = *(int *) data;
   1170 		if (row < 0 || row >= raidPtrs[unit]->numRow)
   1171 			return (EINVAL);
   1172 		if (raidPtrs[unit]->status[row] != rf_rs_reconstructing)
   1173 			*(int *) data = 100;
   1174 		else
   1175 			*(int *) data = raidPtrs[unit]->reconControl[row]->percentComplete;
   1176 		return (0);
   1177 
   1178 		/* the sparetable daemon calls this to wait for the kernel to
   1179 		 * need a spare table. this ioctl does not return until a
   1180 		 * spare table is needed. XXX -- calling mpsleep here in the
   1181 		 * ioctl code is almost certainly wrong and evil. -- XXX XXX
   1182 		 * -- I should either compute the spare table in the kernel,
   1183 		 * or have a different -- XXX XXX -- interface (a different
   1184 		 * character device) for delivering the table          -- XXX */
   1185 #if 0
   1186 	case RAIDFRAME_SPARET_WAIT:
   1187 		RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1188 		while (!rf_sparet_wait_queue)
   1189 			mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE);
   1190 		waitreq = rf_sparet_wait_queue;
   1191 		rf_sparet_wait_queue = rf_sparet_wait_queue->next;
   1192 		RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1193 
   1194 		*((RF_SparetWait_t *) data) = *waitreq;	/* structure assignment */
   1195 
   1196 		RF_Free(waitreq, sizeof(*waitreq));
   1197 		return (0);
   1198 
   1199 
   1200 		/* wakes up a process waiting on SPARET_WAIT and puts an error
   1201 		 * code in it that will cause the dameon to exit */
   1202 	case RAIDFRAME_ABORT_SPARET_WAIT:
   1203 		RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
   1204 		waitreq->fcol = -1;
   1205 		RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1206 		waitreq->next = rf_sparet_wait_queue;
   1207 		rf_sparet_wait_queue = waitreq;
   1208 		RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1209 		wakeup(&rf_sparet_wait_queue);
   1210 		return (0);
   1211 
   1212 		/* used by the spare table daemon to deliver a spare table
   1213 		 * into the kernel */
   1214 	case RAIDFRAME_SEND_SPARET:
   1215 
   1216 		/* install the spare table */
   1217 		retcode = rf_SetSpareTable(raidPtrs[unit], *(void **) data);
   1218 
   1219 		/* respond to the requestor.  the return status of the spare
   1220 		 * table installation is passed in the "fcol" field */
   1221 		RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
   1222 		waitreq->fcol = retcode;
   1223 		RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1224 		waitreq->next = rf_sparet_resp_queue;
   1225 		rf_sparet_resp_queue = waitreq;
   1226 		wakeup(&rf_sparet_resp_queue);
   1227 		RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1228 
   1229 		return (retcode);
   1230 #endif
   1231 
   1232 
   1233 #endif				/* RAIDFRAME_RECON > 0 */
   1234 
   1235 	default:
   1236 		break;		/* fall through to the os-specific code below */
   1237 
   1238 	}
   1239 
   1240 	if (!raidPtrs[unit]->valid)
   1241 		return (EINVAL);
   1242 
   1243 	/*
   1244 	 * Add support for "regular" device ioctls here.
   1245 	 */
   1246 
   1247 	switch (cmd) {
   1248 	case DIOCGDINFO:
   1249 		db1_printf(("DIOCGDINFO %d %d\n", (int) dev, (int) DISKPART(dev)));
   1250 		*(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
   1251 		break;
   1252 
   1253 	case DIOCGPART:
   1254 		db1_printf(("DIOCGPART: %d %d\n", (int) dev, (int) DISKPART(dev)));
   1255 		((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
   1256 		((struct partinfo *) data)->part =
   1257 		    &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
   1258 		break;
   1259 
   1260 	case DIOCWDINFO:
   1261 		db1_printf(("DIOCWDINFO\n"));
   1262 	case DIOCSDINFO:
   1263 		db1_printf(("DIOCSDINFO\n"));
   1264 		if ((error = raidlock(rs)) != 0)
   1265 			return (error);
   1266 
   1267 		rs->sc_flags |= RAIDF_LABELLING;
   1268 
   1269 		error = setdisklabel(rs->sc_dkdev.dk_label,
   1270 		    (struct disklabel *) data, 0, rs->sc_dkdev.dk_cpulabel);
   1271 		if (error == 0) {
   1272 			if (cmd == DIOCWDINFO)
   1273 				error = writedisklabel(RAIDLABELDEV(dev),
   1274 				    raidstrategy, rs->sc_dkdev.dk_label,
   1275 				    rs->sc_dkdev.dk_cpulabel);
   1276 		}
   1277 		rs->sc_flags &= ~RAIDF_LABELLING;
   1278 
   1279 		raidunlock(rs);
   1280 
   1281 		if (error)
   1282 			return (error);
   1283 		break;
   1284 
   1285 	case DIOCWLABEL:
   1286 		db1_printf(("DIOCWLABEL\n"));
   1287 		if (*(int *) data != 0)
   1288 			rs->sc_flags |= RAIDF_WLABEL;
   1289 		else
   1290 			rs->sc_flags &= ~RAIDF_WLABEL;
   1291 		break;
   1292 
   1293 	case DIOCGDEFLABEL:
   1294 		db1_printf(("DIOCGDEFLABEL\n"));
   1295 		raidgetdefaultlabel(raidPtrs[unit], rs,
   1296 		    (struct disklabel *) data);
   1297 		break;
   1298 
   1299 	default:
   1300 		retcode = ENOTTY;	/* XXXX ?? OR EINVAL ? */
   1301 	}
   1302 	return (retcode);
   1303 
   1304 }
   1305 
   1306 
   1307 /* raidinit -- complete the rest of the initialization for the
   1308    RAIDframe device.  */
   1309 
   1310 
   1311 static int
   1312 raidinit(dev, raidPtr, unit)
   1313 	dev_t   dev;
   1314 	RF_Raid_t *raidPtr;
   1315 	int     unit;
   1316 {
   1317 	int     retcode;
   1318 	/* int ix; */
   1319 	/* struct raidbuf *raidbp; */
   1320 	struct raid_softc *rs;
   1321 
   1322 	retcode = 0;
   1323 
   1324 	rs = &raid_softc[unit];
   1325 	pool_init(&rs->sc_cbufpool, sizeof(struct raidbuf), 0,
   1326 		  0, 0, "raidpl", 0, NULL, NULL, M_RAIDFRAME);
   1327 
   1328 
   1329 	/* XXX should check return code first... */
   1330 	rs->sc_flags |= RAIDF_INITED;
   1331 
   1332 	sprintf(rs->sc_xname, "raid%d", unit);	/* XXX doesn't check bounds. */
   1333 
   1334 	rs->sc_dkdev.dk_name = rs->sc_xname;
   1335 
   1336 	/* disk_attach actually creates space for the CPU disklabel, among
   1337 	 * other things, so it's critical to call this *BEFORE* we try putzing
   1338 	 * with disklabels. */
   1339 
   1340 	disk_attach(&rs->sc_dkdev);
   1341 
   1342 	/* XXX There may be a weird interaction here between this, and
   1343 	 * protectedSectors, as used in RAIDframe.  */
   1344 
   1345 	rs->sc_size = raidPtr->totalSectors;
   1346 	rs->sc_dev = dev;
   1347 
   1348 	return (retcode);
   1349 }
   1350 
   1351 /*
   1352  * This kernel thread never exits.  It is created once, and persists
   1353  * until the system reboots.
   1354  */
   1355 
   1356 void
   1357 rf_ReconKernelThread()
   1358 {
   1359 	struct rf_recon_req *req;
   1360 	int     s;
   1361 
   1362 	/* XXX not sure what spl() level we should be at here... probably
   1363 	 * splbio() */
   1364 	s = splbio();
   1365 
   1366 	while (1) {
   1367 		/* grab the next reconstruction request from the queue */
   1368 		LOCK_RECON_Q_MUTEX();
   1369 		while (!recon_queue) {
   1370 			UNLOCK_RECON_Q_MUTEX();
   1371 			tsleep(&recon_queue, PRIBIO,
   1372 			       "raidframe recon", 0);
   1373 			LOCK_RECON_Q_MUTEX();
   1374 		}
   1375 		req = recon_queue;
   1376 		recon_queue = recon_queue->next;
   1377 		UNLOCK_RECON_Q_MUTEX();
   1378 
   1379 		/*
   1380 	         * If flags specifies that we should start recon, this call
   1381 	         * will not return until reconstruction completes, fails,
   1382 		 * or is aborted.
   1383 	         */
   1384 		rf_FailDisk((RF_Raid_t *) req->raidPtr, req->row, req->col,
   1385 		    ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
   1386 
   1387 		RF_Free(req, sizeof(*req));
   1388 	}
   1389 }
   1390 /* wake up the daemon & tell it to get us a spare table
   1391  * XXX
   1392  * the entries in the queues should be tagged with the raidPtr
   1393  * so that in the extremely rare case that two recons happen at once,
   1394  * we know for which device were requesting a spare table
   1395  * XXX
   1396  */
   1397 int
   1398 rf_GetSpareTableFromDaemon(req)
   1399 	RF_SparetWait_t *req;
   1400 {
   1401 	int     retcode;
   1402 
   1403 	RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1404 	req->next = rf_sparet_wait_queue;
   1405 	rf_sparet_wait_queue = req;
   1406 	wakeup(&rf_sparet_wait_queue);
   1407 
   1408 	/* mpsleep unlocks the mutex */
   1409 	while (!rf_sparet_resp_queue) {
   1410 		tsleep(&rf_sparet_resp_queue, PRIBIO,
   1411 		    "raidframe getsparetable", 0);
   1412 	}
   1413 	req = rf_sparet_resp_queue;
   1414 	rf_sparet_resp_queue = req->next;
   1415 	RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1416 
   1417 	retcode = req->fcol;
   1418 	RF_Free(req, sizeof(*req));	/* this is not the same req as we
   1419 					 * alloc'd */
   1420 	return (retcode);
   1421 }
   1422 /* a wrapper around rf_DoAccess that extracts appropriate info from the
   1423  * bp & passes it down.
   1424  * any calls originating in the kernel must use non-blocking I/O
   1425  * do some extra sanity checking to return "appropriate" error values for
   1426  * certain conditions (to make some standard utilities work)
   1427  */
   1428 int
   1429 rf_DoAccessKernel(raidPtr, bp, flags, cbFunc, cbArg)
   1430 	RF_Raid_t *raidPtr;
   1431 	struct buf *bp;
   1432 	RF_RaidAccessFlags_t flags;
   1433 	void    (*cbFunc) (struct buf *);
   1434 	void   *cbArg;
   1435 {
   1436 	RF_SectorCount_t num_blocks, pb, sum;
   1437 	RF_RaidAddr_t raid_addr;
   1438 	int     retcode;
   1439 	struct partition *pp;
   1440 	daddr_t blocknum;
   1441 	int     unit;
   1442 	struct raid_softc *rs;
   1443 	int     do_async;
   1444 
   1445 	/* XXX The dev_t used here should be for /dev/[r]raid* !!! */
   1446 
   1447 	unit = raidPtr->raidid;
   1448 	rs = &raid_softc[unit];
   1449 
   1450 	/* Ok, for the bp we have here, bp->b_blkno is relative to the
   1451 	 * partition.. Need to make it absolute to the underlying device.. */
   1452 
   1453 	blocknum = bp->b_blkno;
   1454 	if (DISKPART(bp->b_dev) != RAW_PART) {
   1455 		pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
   1456 		blocknum += pp->p_offset;
   1457 		db1_printf(("updated: %d %d\n", DISKPART(bp->b_dev),
   1458 			pp->p_offset));
   1459 	} else {
   1460 		db1_printf(("Is raw..\n"));
   1461 	}
   1462 	db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno, (int) blocknum));
   1463 
   1464 	db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
   1465 	db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
   1466 
   1467 	/* *THIS* is where we adjust what block we're going to... but DO NOT
   1468 	 * TOUCH bp->b_blkno!!! */
   1469 	raid_addr = blocknum;
   1470 
   1471 	num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
   1472 	pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
   1473 	sum = raid_addr + num_blocks + pb;
   1474 	if (1 || rf_debugKernelAccess) {
   1475 		db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
   1476 			(int) raid_addr, (int) sum, (int) num_blocks,
   1477 			(int) pb, (int) bp->b_resid));
   1478 	}
   1479 	if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
   1480 	    || (sum < num_blocks) || (sum < pb)) {
   1481 		bp->b_error = ENOSPC;
   1482 		bp->b_flags |= B_ERROR;
   1483 		bp->b_resid = bp->b_bcount;
   1484 		biodone(bp);
   1485 		return (bp->b_error);
   1486 	}
   1487 	/*
   1488 	 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
   1489 	 */
   1490 
   1491 	if (bp->b_bcount & raidPtr->sectorMask) {
   1492 		bp->b_error = EINVAL;
   1493 		bp->b_flags |= B_ERROR;
   1494 		bp->b_resid = bp->b_bcount;
   1495 		biodone(bp);
   1496 		return (bp->b_error);
   1497 	}
   1498 	db1_printf(("Calling DoAccess..\n"));
   1499 
   1500 
   1501 	/* Put a throttle on the number of requests we handle simultanously */
   1502 
   1503 	RF_LOCK_MUTEX(raidPtr->mutex);
   1504 
   1505 	while(raidPtr->openings <= 0) {
   1506 		RF_UNLOCK_MUTEX(raidPtr->mutex);
   1507 		(void)tsleep(&raidPtr->openings, PRIBIO, "rfdwait", 0);
   1508 		RF_LOCK_MUTEX(raidPtr->mutex);
   1509 	}
   1510 	raidPtr->openings--;
   1511 
   1512 	RF_UNLOCK_MUTEX(raidPtr->mutex);
   1513 
   1514 	/*
   1515 	 * Everything is async.
   1516 	 */
   1517 	do_async = 1;
   1518 
   1519 	/* don't ever condition on bp->b_flags & B_WRITE.  always condition on
   1520 	 * B_READ instead */
   1521 	retcode = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
   1522 	    RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
   1523 	    do_async, raid_addr, num_blocks,
   1524 	    bp->b_un.b_addr,
   1525 	    bp, NULL, NULL, RF_DAG_NONBLOCKING_IO | flags,
   1526 	    NULL, cbFunc, cbArg);
   1527 #if 0
   1528 	db1_printf(("After call to DoAccess: 0x%x 0x%x %d\n", bp,
   1529 		bp->b_data, (int) bp->b_resid));
   1530 #endif
   1531 
   1532 	return (retcode);
   1533 }
   1534 /* invoke an I/O from kernel mode.  Disk queue should be locked upon entry */
   1535 
   1536 int
   1537 rf_DispatchKernelIO(queue, req)
   1538 	RF_DiskQueue_t *queue;
   1539 	RF_DiskQueueData_t *req;
   1540 {
   1541 	int     op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
   1542 	struct buf *bp;
   1543 	struct raidbuf *raidbp = NULL;
   1544 	struct raid_softc *rs;
   1545 	int     unit;
   1546 
   1547 	/* XXX along with the vnode, we also need the softc associated with
   1548 	 * this device.. */
   1549 
   1550 	req->queue = queue;
   1551 
   1552 	unit = queue->raidPtr->raidid;
   1553 
   1554 	db1_printf(("DispatchKernelIO unit: %d\n", unit));
   1555 
   1556 	if (unit >= numraid) {
   1557 		printf("Invalid unit number: %d %d\n", unit, numraid);
   1558 		panic("Invalid Unit number in rf_DispatchKernelIO\n");
   1559 	}
   1560 	rs = &raid_softc[unit];
   1561 
   1562 	/* XXX is this the right place? */
   1563 	disk_busy(&rs->sc_dkdev);
   1564 
   1565 	bp = req->bp;
   1566 #if 1
   1567 	/* XXX when there is a physical disk failure, someone is passing us a
   1568 	 * buffer that contains old stuff!!  Attempt to deal with this problem
   1569 	 * without taking a performance hit... (not sure where the real bug
   1570 	 * is.  It's buried in RAIDframe somewhere) :-(  GO ) */
   1571 
   1572 	if (bp->b_flags & B_ERROR) {
   1573 		bp->b_flags &= ~B_ERROR;
   1574 	}
   1575 	if (bp->b_error != 0) {
   1576 		bp->b_error = 0;
   1577 	}
   1578 #endif
   1579 	raidbp = RAIDGETBUF(rs);
   1580 
   1581 	raidbp->rf_flags = 0;	/* XXX not really used anywhere... */
   1582 
   1583 	/*
   1584 	 * context for raidiodone
   1585 	 */
   1586 	raidbp->rf_obp = bp;
   1587 	raidbp->req = req;
   1588 
   1589 	switch (req->type) {
   1590 	case RF_IO_TYPE_NOP:	/* used primarily to unlock a locked queue */
   1591 		/* Dprintf2("rf_DispatchKernelIO: NOP to r %d c %d\n",
   1592 		 * queue->row, queue->col); */
   1593 		/* XXX need to do something extra here.. */
   1594 		/* I'm leaving this in, as I've never actually seen it used,
   1595 		 * and I'd like folks to report it... GO */
   1596 		printf(("WAKEUP CALLED\n"));
   1597 		queue->numOutstanding++;
   1598 
   1599 		/* XXX need to glue the original buffer into this??  */
   1600 
   1601 		KernelWakeupFunc(&raidbp->rf_buf);
   1602 		break;
   1603 
   1604 	case RF_IO_TYPE_READ:
   1605 	case RF_IO_TYPE_WRITE:
   1606 
   1607 		if (req->tracerec) {
   1608 			RF_ETIMER_START(req->tracerec->timer);
   1609 		}
   1610 		InitBP(&raidbp->rf_buf, queue->rf_cinfo->ci_vp,
   1611 		    op | bp->b_flags, queue->rf_cinfo->ci_dev,
   1612 		    req->sectorOffset, req->numSector,
   1613 		    req->buf, KernelWakeupFunc, (void *) req,
   1614 		    queue->raidPtr->logBytesPerSector, req->b_proc);
   1615 
   1616 		if (rf_debugKernelAccess) {
   1617 			db1_printf(("dispatch: bp->b_blkno = %ld\n",
   1618 				(long) bp->b_blkno));
   1619 		}
   1620 		queue->numOutstanding++;
   1621 		queue->last_deq_sector = req->sectorOffset;
   1622 		/* acc wouldn't have been let in if there were any pending
   1623 		 * reqs at any other priority */
   1624 		queue->curPriority = req->priority;
   1625 		/* Dprintf3("rf_DispatchKernelIO: %c to row %d col %d\n",
   1626 		 * req->type, queue->row, queue->col); */
   1627 
   1628 		db1_printf(("Going for %c to unit %d row %d col %d\n",
   1629 			req->type, unit, queue->row, queue->col));
   1630 		db1_printf(("sector %d count %d (%d bytes) %d\n",
   1631 			(int) req->sectorOffset, (int) req->numSector,
   1632 			(int) (req->numSector <<
   1633 			    queue->raidPtr->logBytesPerSector),
   1634 			(int) queue->raidPtr->logBytesPerSector));
   1635 		if ((raidbp->rf_buf.b_flags & B_READ) == 0) {
   1636 			raidbp->rf_buf.b_vp->v_numoutput++;
   1637 		}
   1638 		VOP_STRATEGY(&raidbp->rf_buf);
   1639 
   1640 		break;
   1641 
   1642 	default:
   1643 		panic("bad req->type in rf_DispatchKernelIO");
   1644 	}
   1645 	db1_printf(("Exiting from DispatchKernelIO\n"));
   1646 	return (0);
   1647 }
   1648 /* this is the callback function associated with a I/O invoked from
   1649    kernel code.
   1650  */
   1651 static void
   1652 KernelWakeupFunc(vbp)
   1653 	struct buf *vbp;
   1654 {
   1655 	RF_DiskQueueData_t *req = NULL;
   1656 	RF_DiskQueue_t *queue;
   1657 	struct raidbuf *raidbp = (struct raidbuf *) vbp;
   1658 	struct buf *bp;
   1659 	struct raid_softc *rs;
   1660 	int     unit;
   1661 	register int s;
   1662 
   1663 	s = splbio();		/* XXX */
   1664 	db1_printf(("recovering the request queue:\n"));
   1665 	req = raidbp->req;
   1666 
   1667 	bp = raidbp->rf_obp;
   1668 #if 0
   1669 	db1_printf(("bp=0x%x\n", bp));
   1670 #endif
   1671 
   1672 	queue = (RF_DiskQueue_t *) req->queue;
   1673 
   1674 	if (raidbp->rf_buf.b_flags & B_ERROR) {
   1675 #if 0
   1676 		printf("Setting bp->b_flags!!! %d\n", raidbp->rf_buf.b_error);
   1677 #endif
   1678 		bp->b_flags |= B_ERROR;
   1679 		bp->b_error = raidbp->rf_buf.b_error ?
   1680 		    raidbp->rf_buf.b_error : EIO;
   1681 	}
   1682 #if 0
   1683 	db1_printf(("raidbp->rf_buf.b_bcount=%d\n", (int) raidbp->rf_buf.b_bcount));
   1684 	db1_printf(("raidbp->rf_buf.b_bufsize=%d\n", (int) raidbp->rf_buf.b_bufsize));
   1685 	db1_printf(("raidbp->rf_buf.b_resid=%d\n", (int) raidbp->rf_buf.b_resid));
   1686 	db1_printf(("raidbp->rf_buf.b_data=0x%x\n", raidbp->rf_buf.b_data));
   1687 #endif
   1688 
   1689 	/* XXX methinks this could be wrong... */
   1690 #if 1
   1691 	bp->b_resid = raidbp->rf_buf.b_resid;
   1692 #endif
   1693 
   1694 	if (req->tracerec) {
   1695 		RF_ETIMER_STOP(req->tracerec->timer);
   1696 		RF_ETIMER_EVAL(req->tracerec->timer);
   1697 		RF_LOCK_MUTEX(rf_tracing_mutex);
   1698 		req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
   1699 		req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
   1700 		req->tracerec->num_phys_ios++;
   1701 		RF_UNLOCK_MUTEX(rf_tracing_mutex);
   1702 	}
   1703 	bp->b_bcount = raidbp->rf_buf.b_bcount;	/* XXXX ?? */
   1704 
   1705 	unit = queue->raidPtr->raidid;	/* *Much* simpler :-> */
   1706 
   1707 
   1708 	/* XXX Ok, let's get aggressive... If B_ERROR is set, let's go
   1709 	 * ballistic, and mark the component as hosed... */
   1710 #if 1
   1711 	if (bp->b_flags & B_ERROR) {
   1712 		/* Mark the disk as dead */
   1713 		/* but only mark it once... */
   1714 		if (queue->raidPtr->Disks[queue->row][queue->col].status ==
   1715 		    rf_ds_optimal) {
   1716 			printf("raid%d: IO Error.  Marking %s as failed.\n",
   1717 			    unit, queue->raidPtr->Disks[queue->row][queue->col].devname);
   1718 			queue->raidPtr->Disks[queue->row][queue->col].status =
   1719 			    rf_ds_failed;
   1720 			queue->raidPtr->status[queue->row] = rf_rs_degraded;
   1721 			queue->raidPtr->numFailures++;
   1722 			/* XXX here we should bump the version number for each component, and write that data out */
   1723 		} else {	/* Disk is already dead... */
   1724 			/* printf("Disk already marked as dead!\n"); */
   1725 		}
   1726 
   1727 	}
   1728 #endif
   1729 
   1730 	rs = &raid_softc[unit];
   1731 	RAIDPUTBUF(rs, raidbp);
   1732 
   1733 
   1734 	if (bp->b_resid == 0) {
   1735 		db1_printf(("Disk is no longer busy for this buffer... %d %ld %ld\n",
   1736 			unit, bp->b_resid, bp->b_bcount));
   1737 		/* XXX is this the right place for a disk_unbusy()??!??!?!? */
   1738 		disk_unbusy(&rs->sc_dkdev, (bp->b_bcount - bp->b_resid));
   1739 	} else {
   1740 		db1_printf(("b_resid is still %ld\n", bp->b_resid));
   1741 	}
   1742 
   1743 	rf_DiskIOComplete(queue, req, (bp->b_flags & B_ERROR) ? 1 : 0);
   1744 	(req->CompleteFunc) (req->argument, (bp->b_flags & B_ERROR) ? 1 : 0);
   1745 	/* printf("Exiting KernelWakeupFunc\n"); */
   1746 
   1747 	splx(s);		/* XXX */
   1748 }
   1749 
   1750 
   1751 
   1752 /*
   1753  * initialize a buf structure for doing an I/O in the kernel.
   1754  */
   1755 static void
   1756 InitBP(
   1757     struct buf * bp,
   1758     struct vnode * b_vp,
   1759     unsigned rw_flag,
   1760     dev_t dev,
   1761     RF_SectorNum_t startSect,
   1762     RF_SectorCount_t numSect,
   1763     caddr_t buf,
   1764     void (*cbFunc) (struct buf *),
   1765     void *cbArg,
   1766     int logBytesPerSector,
   1767     struct proc * b_proc)
   1768 {
   1769 	/* bp->b_flags       = B_PHYS | rw_flag; */
   1770 	bp->b_flags = B_CALL | rw_flag;	/* XXX need B_PHYS here too??? */
   1771 	bp->b_bcount = numSect << logBytesPerSector;
   1772 	bp->b_bufsize = bp->b_bcount;
   1773 	bp->b_error = 0;
   1774 	bp->b_dev = dev;
   1775 	db1_printf(("bp->b_dev is %d\n", dev));
   1776 	bp->b_un.b_addr = buf;
   1777 #if 0
   1778 	db1_printf(("bp->b_data=0x%x\n", bp->b_data));
   1779 #endif
   1780 
   1781 	bp->b_blkno = startSect;
   1782 	bp->b_resid = bp->b_bcount;	/* XXX is this right!??!?!! */
   1783 	db1_printf(("b_bcount is: %d\n", (int) bp->b_bcount));
   1784 	if (bp->b_bcount == 0) {
   1785 		panic("bp->b_bcount is zero in InitBP!!\n");
   1786 	}
   1787 	bp->b_proc = b_proc;
   1788 	bp->b_iodone = cbFunc;
   1789 	bp->b_vp = b_vp;
   1790 
   1791 }
   1792 
   1793 static void
   1794 raidgetdefaultlabel(raidPtr, rs, lp)
   1795 	RF_Raid_t *raidPtr;
   1796 	struct raid_softc *rs;
   1797 	struct disklabel *lp;
   1798 {
   1799 	db1_printf(("Building a default label...\n"));
   1800 	bzero(lp, sizeof(*lp));
   1801 
   1802 	/* fabricate a label... */
   1803 	lp->d_secperunit = raidPtr->totalSectors;
   1804 	lp->d_secsize = raidPtr->bytesPerSector;
   1805 	lp->d_nsectors = 1024 * (1024 / raidPtr->bytesPerSector);
   1806 	lp->d_ntracks = 1;
   1807 	lp->d_ncylinders = raidPtr->totalSectors / lp->d_nsectors;
   1808 	lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
   1809 
   1810 	strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
   1811 	lp->d_type = DTYPE_RAID;
   1812 	strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
   1813 	lp->d_rpm = 3600;
   1814 	lp->d_interleave = 1;
   1815 	lp->d_flags = 0;
   1816 
   1817 	lp->d_partitions[RAW_PART].p_offset = 0;
   1818 	lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors;
   1819 	lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
   1820 	lp->d_npartitions = RAW_PART + 1;
   1821 
   1822 	lp->d_magic = DISKMAGIC;
   1823 	lp->d_magic2 = DISKMAGIC;
   1824 	lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
   1825 
   1826 }
   1827 /*
   1828  * Read the disklabel from the raid device.  If one is not present, fake one
   1829  * up.
   1830  */
   1831 static void
   1832 raidgetdisklabel(dev)
   1833 	dev_t   dev;
   1834 {
   1835 	int     unit = raidunit(dev);
   1836 	struct raid_softc *rs = &raid_softc[unit];
   1837 	char   *errstring;
   1838 	struct disklabel *lp = rs->sc_dkdev.dk_label;
   1839 	struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel;
   1840 	RF_Raid_t *raidPtr;
   1841 
   1842 	db1_printf(("Getting the disklabel...\n"));
   1843 
   1844 	bzero(clp, sizeof(*clp));
   1845 
   1846 	raidPtr = raidPtrs[unit];
   1847 
   1848 	raidgetdefaultlabel(raidPtr, rs, lp);
   1849 
   1850 	/*
   1851 	 * Call the generic disklabel extraction routine.
   1852 	 */
   1853 	errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
   1854 	    rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
   1855 	if (errstring)
   1856 		raidmakedisklabel(rs);
   1857 	else {
   1858 		int     i;
   1859 		struct partition *pp;
   1860 
   1861 		/*
   1862 		 * Sanity check whether the found disklabel is valid.
   1863 		 *
   1864 		 * This is necessary since total size of the raid device
   1865 		 * may vary when an interleave is changed even though exactly
   1866 		 * same componets are used, and old disklabel may used
   1867 		 * if that is found.
   1868 		 */
   1869 		if (lp->d_secperunit != rs->sc_size)
   1870 			printf("WARNING: %s: "
   1871 			    "total sector size in disklabel (%d) != "
   1872 			    "the size of raid (%ld)\n", rs->sc_xname,
   1873 			    lp->d_secperunit, (long) rs->sc_size);
   1874 		for (i = 0; i < lp->d_npartitions; i++) {
   1875 			pp = &lp->d_partitions[i];
   1876 			if (pp->p_offset + pp->p_size > rs->sc_size)
   1877 				printf("WARNING: %s: end of partition `%c' "
   1878 				    "exceeds the size of raid (%ld)\n",
   1879 				    rs->sc_xname, 'a' + i, (long) rs->sc_size);
   1880 		}
   1881 	}
   1882 
   1883 }
   1884 /*
   1885  * Take care of things one might want to take care of in the event
   1886  * that a disklabel isn't present.
   1887  */
   1888 static void
   1889 raidmakedisklabel(rs)
   1890 	struct raid_softc *rs;
   1891 {
   1892 	struct disklabel *lp = rs->sc_dkdev.dk_label;
   1893 	db1_printf(("Making a label..\n"));
   1894 
   1895 	/*
   1896 	 * For historical reasons, if there's no disklabel present
   1897 	 * the raw partition must be marked FS_BSDFFS.
   1898 	 */
   1899 
   1900 	lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
   1901 
   1902 	strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
   1903 
   1904 	lp->d_checksum = dkcksum(lp);
   1905 }
   1906 /*
   1907  * Lookup the provided name in the filesystem.  If the file exists,
   1908  * is a valid block device, and isn't being used by anyone else,
   1909  * set *vpp to the file's vnode.
   1910  * You'll find the original of this in ccd.c
   1911  */
   1912 int
   1913 raidlookup(path, p, vpp)
   1914 	char   *path;
   1915 	struct proc *p;
   1916 	struct vnode **vpp;	/* result */
   1917 {
   1918 	struct nameidata nd;
   1919 	struct vnode *vp;
   1920 	struct vattr va;
   1921 	int     error;
   1922 
   1923 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, path, p);
   1924 	if ((error = vn_open(&nd, FREAD | FWRITE, 0)) != 0) {
   1925 #ifdef DEBUG
   1926 		printf("RAIDframe: vn_open returned %d\n", error);
   1927 #endif
   1928 		return (error);
   1929 	}
   1930 	vp = nd.ni_vp;
   1931 	if (vp->v_usecount > 1) {
   1932 		VOP_UNLOCK(vp, 0);
   1933 		(void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
   1934 		return (EBUSY);
   1935 	}
   1936 	if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)) != 0) {
   1937 		VOP_UNLOCK(vp, 0);
   1938 		(void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
   1939 		return (error);
   1940 	}
   1941 	/* XXX: eventually we should handle VREG, too. */
   1942 	if (va.va_type != VBLK) {
   1943 		VOP_UNLOCK(vp, 0);
   1944 		(void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
   1945 		return (ENOTBLK);
   1946 	}
   1947 	VOP_UNLOCK(vp, 0);
   1948 	*vpp = vp;
   1949 	return (0);
   1950 }
   1951 /*
   1952  * Wait interruptibly for an exclusive lock.
   1953  *
   1954  * XXX
   1955  * Several drivers do this; it should be abstracted and made MP-safe.
   1956  * (Hmm... where have we seen this warning before :->  GO )
   1957  */
   1958 static int
   1959 raidlock(rs)
   1960 	struct raid_softc *rs;
   1961 {
   1962 	int     error;
   1963 
   1964 	while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
   1965 		rs->sc_flags |= RAIDF_WANTED;
   1966 		if ((error =
   1967 			tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
   1968 			return (error);
   1969 	}
   1970 	rs->sc_flags |= RAIDF_LOCKED;
   1971 	return (0);
   1972 }
   1973 /*
   1974  * Unlock and wake up any waiters.
   1975  */
   1976 static void
   1977 raidunlock(rs)
   1978 	struct raid_softc *rs;
   1979 {
   1980 
   1981 	rs->sc_flags &= ~RAIDF_LOCKED;
   1982 	if ((rs->sc_flags & RAIDF_WANTED) != 0) {
   1983 		rs->sc_flags &= ~RAIDF_WANTED;
   1984 		wakeup(rs);
   1985 	}
   1986 }
   1987 
   1988 
   1989 #define RF_COMPONENT_INFO_OFFSET  16384 /* bytes */
   1990 #define RF_COMPONENT_INFO_SIZE     1024 /* bytes */
   1991 
   1992 int
   1993 raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter)
   1994 {
   1995 	RF_ComponentLabel_t component_label;
   1996 	raidread_component_label(dev, b_vp, &component_label);
   1997 	component_label.mod_counter = mod_counter;
   1998 	component_label.clean = RF_RAID_CLEAN;
   1999 	raidwrite_component_label(dev, b_vp, &component_label);
   2000 	return(0);
   2001 }
   2002 
   2003 
   2004 int
   2005 raidmarkdirty(dev_t dev, struct vnode *b_vp, int mod_counter)
   2006 {
   2007 	RF_ComponentLabel_t component_label;
   2008 	raidread_component_label(dev, b_vp, &component_label);
   2009 	component_label.mod_counter = mod_counter;
   2010 	component_label.clean = RF_RAID_DIRTY;
   2011 	raidwrite_component_label(dev, b_vp, &component_label);
   2012 	return(0);
   2013 }
   2014 
   2015 /* ARGSUSED */
   2016 int
   2017 raidread_component_label(dev, b_vp, component_label, bshift, bsize)
   2018 	dev_t dev;
   2019 	struct vnode *b_vp;
   2020 	RF_ComponentLabel_t *component_label;
   2021 	int bshift;
   2022 	int bsize;
   2023 {
   2024 	struct buf *bp;
   2025 	int error;
   2026 
   2027 	/* XXX should probably ensure that we don't try to do this if
   2028 	   someone has changed rf_protected_sectors. */
   2029 
   2030 	/* get a block of the appropriate size... */
   2031 	bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
   2032 	if (bshift < 0) {
   2033 		error = EINVAL;
   2034 		goto out;
   2035 	}
   2036 	bp->b_dev = dev;
   2037 	bp->b_bshift = bshift;
   2038 	bp->b_bsize = blocksize(bshift);
   2039 
   2040 	/* get our ducks in a row for the read */
   2041 	bp->b_blkno = btodb(RF_COMPONENT_INFO_OFFSET, bshift);
   2042  	bp->b_resid = btodb(RF_COMPONENT_INFO_SIZE , bshift);
   2043 	bp->b_bcount = RF_COMPONENT_INFO_SIZE;
   2044 	bp->b_flags = B_BUSY | B_READ;
   2045 
   2046 	(*bdevsw[major(bp->b_dev)].d_strategy)(bp);
   2047 
   2048 	error = biowait(bp);
   2049 
   2050 out:
   2051 	if (!error) {
   2052 		memcpy(component_label, bp->b_un.b_addr,
   2053 		       sizeof(RF_ComponentLabel_t));
   2054 #if 0
   2055 		printf("raidread_component_label: got component label:\n");
   2056 		printf("Version: %d\n",component_label->version);
   2057 		printf("Serial Number: %d\n",component_label->serial_number);
   2058 		printf("Mod counter: %d\n",component_label->mod_counter);
   2059 		printf("Row: %d\n", component_label->row);
   2060 		printf("Column: %d\n", component_label->column);
   2061 		printf("Num Rows: %d\n", component_label->num_rows);
   2062 		printf("Num Columns: %d\n", component_label->num_columns);
   2063 		printf("Clean: %d\n", component_label->clean);
   2064 		printf("Status: %d\n", component_label->status);
   2065 #endif
   2066         } else {
   2067 		printf("Failed to read RAID component label!\n");
   2068 	}
   2069 
   2070         bp->b_flags = B_INVAL | B_AGE;
   2071 	brelse(bp);
   2072 	return(error);
   2073 }
   2074 /* ARGSUSED */
   2075 int
   2076 raidwrite_component_label(dev, b_vp, component_label, bshift, bsize)
   2077 	dev_t dev;
   2078 	struct vnode *b_vp;
   2079 	RF_ComponentLabel_t *component_label;
   2080 	int bshift;
   2081 	int bsize;
   2082 {
   2083 	struct buf *bp;
   2084 	int error;
   2085 
   2086 	/* get a block of the appropriate size... */
   2087 	bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
   2088 	if (bshift < 0) {
   2089 		error = EINVAL;
   2090 		goto out;
   2091 	}
   2092 	bp->b_dev = dev;
   2093 	bp->b_bshift = bshift;
   2094 	bp->b_bsize = blocksize(bshift);
   2095 
   2096 	/* get our ducks in a row for the write */
   2097 	bp->b_blkno = btodb(RF_COMPONENT_INFO_OFFSET, bshift);
   2098  	bp->b_resid = btodb(RF_COMPONENT_INFO_SIZE, bshift);
   2099 	bp->b_bcount = RF_COMPONENT_INFO_SIZE;
   2100 	bp->b_flags = B_BUSY | B_WRITE;
   2101 
   2102 	memset( bp->b_un.b_addr, 0, RF_COMPONENT_INFO_SIZE );
   2103 
   2104 	memcpy( bp->b_un.b_addr, component_label, sizeof(RF_ComponentLabel_t));
   2105 
   2106 	(*bdevsw[major(bp->b_dev)].d_strategy)(bp);
   2107 	error = biowait(bp);
   2108 
   2109 out:
   2110         bp->b_flags = B_INVAL | B_AGE;
   2111 	brelse(bp);
   2112 	if (error) {
   2113 		printf("Failed to write RAID component info!\n");
   2114 	}
   2115 
   2116 	return(error);
   2117 }
   2118 
   2119 void
   2120 rf_markalldirty( raidPtr )
   2121 	RF_Raid_t *raidPtr;
   2122 {
   2123 	RF_ComponentLabel_t c_label;
   2124 	int r,c;
   2125 
   2126 	raidPtr->mod_counter++;
   2127 	for (r = 0; r < raidPtr->numRow; r++) {
   2128 		for (c = 0; c < raidPtr->numCol; c++) {
   2129 			if (raidPtr->Disks[r][c].status != rf_ds_failed) {
   2130 				raidread_component_label(
   2131 					raidPtr->Disks[r][c].dev,
   2132 					raidPtr->raid_cinfo[r][c].ci_vp,
   2133 					&c_label);
   2134 				if (c_label.status == rf_ds_spared) {
   2135 					/* XXX do something special...
   2136 					 but whatever you do, don't
   2137 					 try to access it!! */
   2138 				} else {
   2139 #if 0
   2140 				c_label.status =
   2141 					raidPtr->Disks[r][c].status;
   2142 				raidwrite_component_label(
   2143 					raidPtr->Disks[r][c].dev,
   2144 					raidPtr->raid_cinfo[r][c].ci_vp,
   2145 					&c_label);
   2146 #endif
   2147 				raidmarkdirty(
   2148 				       raidPtr->Disks[r][c].dev,
   2149 				       raidPtr->raid_cinfo[r][c].ci_vp,
   2150 				       raidPtr->mod_counter);
   2151 				}
   2152 			}
   2153 		}
   2154 	}
   2155 	/* printf("Component labels marked dirty.\n"); */
   2156 #if 0
   2157 	for( c = 0; c < raidPtr->numSpare ; c++) {
   2158 		sparecol = raidPtr->numCol + c;
   2159 		if (raidPtr->Disks[r][sparecol].status == rf_ds_used_spare) {
   2160 			/*
   2161 
   2162 			   XXX this is where we get fancy and map this spare
   2163 			   into it's correct spot in the array.
   2164 
   2165 			 */
   2166 			/*
   2167 
   2168 			   we claim this disk is "optimal" if it's
   2169 			   rf_ds_used_spare, as that means it should be
   2170 			   directly substitutable for the disk it replaced.
   2171 			   We note that too...
   2172 
   2173 			 */
   2174 
   2175 			for(i=0;i<raidPtr->numRow;i++) {
   2176 				for(j=0;j<raidPtr->numCol;j++) {
   2177 					if ((raidPtr->Disks[i][j].spareRow ==
   2178 					     r) &&
   2179 					    (raidPtr->Disks[i][j].spareCol ==
   2180 					     sparecol)) {
   2181 						srow = r;
   2182 						scol = sparecol;
   2183 						break;
   2184 					}
   2185 				}
   2186 			}
   2187 
   2188 			raidread_component_label(
   2189 				      raidPtr->Disks[r][sparecol].dev,
   2190 				      raidPtr->raid_cinfo[r][sparecol].ci_vp,
   2191 				      &c_label);
   2192 			/* make sure status is noted */
   2193 			c_label.version = RF_COMPONENT_LABEL_VERSION;
   2194 			c_label.mod_counter = raidPtr->mod_counter;
   2195 			c_label.serial_number = raidPtr->serial_number;
   2196 			c_label.row = srow;
   2197 			c_label.column = scol;
   2198 			c_label.num_rows = raidPtr->numRow;
   2199 			c_label.num_columns = raidPtr->numCol;
   2200 			c_label.clean = RF_RAID_DIRTY; /* changed in a bit*/
   2201 			c_label.status = rf_ds_optimal;
   2202 			raidwrite_component_label(
   2203 				      raidPtr->Disks[r][sparecol].dev,
   2204 				      raidPtr->raid_cinfo[r][sparecol].ci_vp,
   2205 				      &c_label);
   2206 			raidmarkclean( raidPtr->Disks[r][sparecol].dev,
   2207 			              raidPtr->raid_cinfo[r][sparecol].ci_vp);
   2208 		}
   2209 	}
   2210 
   2211 #endif
   2212 }
   2213 
   2214 
   2215 void
   2216 rf_update_component_labels( raidPtr )
   2217 	RF_Raid_t *raidPtr;
   2218 {
   2219 	RF_ComponentLabel_t c_label;
   2220 	int sparecol;
   2221 	int r,c;
   2222 	int i,j;
   2223 	int srow, scol;
   2224 
   2225 	srow = -1;
   2226 	scol = -1;
   2227 
   2228 	/* XXX should do extra checks to make sure things really are clean,
   2229 	   rather than blindly setting the clean bit... */
   2230 
   2231 	raidPtr->mod_counter++;
   2232 
   2233 	for (r = 0; r < raidPtr->numRow; r++) {
   2234 		for (c = 0; c < raidPtr->numCol; c++) {
   2235 			if (raidPtr->Disks[r][c].status == rf_ds_optimal) {
   2236 				raidread_component_label(
   2237 					raidPtr->Disks[r][c].dev,
   2238 					raidPtr->raid_cinfo[r][c].ci_vp,
   2239 					&c_label);
   2240 				/* make sure status is noted */
   2241 				c_label.status = rf_ds_optimal;
   2242 				raidwrite_component_label(
   2243 					raidPtr->Disks[r][c].dev,
   2244 					raidPtr->raid_cinfo[r][c].ci_vp,
   2245 					&c_label);
   2246 				if (raidPtr->parity_good == RF_RAID_CLEAN) {
   2247 					raidmarkclean(
   2248 					      raidPtr->Disks[r][c].dev,
   2249 					      raidPtr->raid_cinfo[r][c].ci_vp,
   2250 					      raidPtr->mod_counter);
   2251 				}
   2252 			}
   2253 			/* else we don't touch it.. */
   2254 #if 0
   2255 			else if (raidPtr->Disks[r][c].status !=
   2256 				   rf_ds_failed) {
   2257 				raidread_component_label(
   2258 					raidPtr->Disks[r][c].dev,
   2259 					raidPtr->raid_cinfo[r][c].ci_vp,
   2260 					&c_label);
   2261 				/* make sure status is noted */
   2262 				c_label.status =
   2263 					raidPtr->Disks[r][c].status;
   2264 				raidwrite_component_label(
   2265 					raidPtr->Disks[r][c].dev,
   2266 					raidPtr->raid_cinfo[r][c].ci_vp,
   2267 					&c_label);
   2268 				if (raidPtr->parity_good == RF_RAID_CLEAN) {
   2269 					raidmarkclean(
   2270 					      raidPtr->Disks[r][c].dev,
   2271 					      raidPtr->raid_cinfo[r][c].ci_vp,
   2272 					      raidPtr->mod_counter);
   2273 				}
   2274 			}
   2275 #endif
   2276 		}
   2277 	}
   2278 
   2279 	for( c = 0; c < raidPtr->numSpare ; c++) {
   2280 		sparecol = raidPtr->numCol + c;
   2281 		if (raidPtr->Disks[0][sparecol].status == rf_ds_used_spare) {
   2282 			/*
   2283 
   2284 			   we claim this disk is "optimal" if it's
   2285 			   rf_ds_used_spare, as that means it should be
   2286 			   directly substitutable for the disk it replaced.
   2287 			   We note that too...
   2288 
   2289 			 */
   2290 
   2291 			for(i=0;i<raidPtr->numRow;i++) {
   2292 				for(j=0;j<raidPtr->numCol;j++) {
   2293 					if ((raidPtr->Disks[i][j].spareRow ==
   2294 					     0) &&
   2295 					    (raidPtr->Disks[i][j].spareCol ==
   2296 					     sparecol)) {
   2297 						srow = i;
   2298 						scol = j;
   2299 						break;
   2300 					}
   2301 				}
   2302 			}
   2303 
   2304 			raidread_component_label(
   2305 				      raidPtr->Disks[0][sparecol].dev,
   2306 				      raidPtr->raid_cinfo[0][sparecol].ci_vp,
   2307 				      &c_label);
   2308 			/* make sure status is noted */
   2309 			c_label.version = RF_COMPONENT_LABEL_VERSION;
   2310 			c_label.mod_counter = raidPtr->mod_counter;
   2311 			c_label.serial_number = raidPtr->serial_number;
   2312 			c_label.row = srow;
   2313 			c_label.column = scol;
   2314 			c_label.num_rows = raidPtr->numRow;
   2315 			c_label.num_columns = raidPtr->numCol;
   2316 			c_label.clean = RF_RAID_DIRTY; /* changed in a bit*/
   2317 			c_label.status = rf_ds_optimal;
   2318 			raidwrite_component_label(
   2319 				      raidPtr->Disks[0][sparecol].dev,
   2320 				      raidPtr->raid_cinfo[0][sparecol].ci_vp,
   2321 				      &c_label);
   2322 			if (raidPtr->parity_good == RF_RAID_CLEAN) {
   2323 				raidmarkclean( raidPtr->Disks[0][sparecol].dev,
   2324 			              raidPtr->raid_cinfo[0][sparecol].ci_vp,
   2325 					       raidPtr->mod_counter);
   2326 			}
   2327 		}
   2328 	}
   2329 	/* 	printf("Component labels updated\n"); */
   2330 }
   2331