Home | History | Annotate | Line # | Download | only in raidframe
rf_netbsdkintf.c revision 1.53
      1 /*	$NetBSD: rf_netbsdkintf.c,v 1.53 2000/02/22 23:13:15 oster Exp $	*/
      2 /*-
      3  * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
      4  * All rights reserved.
      5  *
      6  * This code is derived from software contributed to The NetBSD Foundation
      7  * by Greg Oster; Jason R. Thorpe.
      8  *
      9  * Redistribution and use in source and binary forms, with or without
     10  * modification, are permitted provided that the following conditions
     11  * are met:
     12  * 1. Redistributions of source code must retain the above copyright
     13  *    notice, this list of conditions and the following disclaimer.
     14  * 2. Redistributions in binary form must reproduce the above copyright
     15  *    notice, this list of conditions and the following disclaimer in the
     16  *    documentation and/or other materials provided with the distribution.
     17  * 3. All advertising materials mentioning features or use of this software
     18  *    must display the following acknowledgement:
     19  *        This product includes software developed by the NetBSD
     20  *        Foundation, Inc. and its contributors.
     21  * 4. Neither the name of The NetBSD Foundation nor the names of its
     22  *    contributors may be used to endorse or promote products derived
     23  *    from this software without specific prior written permission.
     24  *
     25  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     26  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     27  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     28  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     29  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     30  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     31  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     32  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     33  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     34  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     35  * POSSIBILITY OF SUCH DAMAGE.
     36  */
     37 
     38 /*
     39  * Copyright (c) 1988 University of Utah.
     40  * Copyright (c) 1990, 1993
     41  *      The Regents of the University of California.  All rights reserved.
     42  *
     43  * This code is derived from software contributed to Berkeley by
     44  * the Systems Programming Group of the University of Utah Computer
     45  * Science Department.
     46  *
     47  * Redistribution and use in source and binary forms, with or without
     48  * modification, are permitted provided that the following conditions
     49  * are met:
     50  * 1. Redistributions of source code must retain the above copyright
     51  *    notice, this list of conditions and the following disclaimer.
     52  * 2. Redistributions in binary form must reproduce the above copyright
     53  *    notice, this list of conditions and the following disclaimer in the
     54  *    documentation and/or other materials provided with the distribution.
     55  * 3. All advertising materials mentioning features or use of this software
     56  *    must display the following acknowledgement:
     57  *      This product includes software developed by the University of
     58  *      California, Berkeley and its contributors.
     59  * 4. Neither the name of the University nor the names of its contributors
     60  *    may be used to endorse or promote products derived from this software
     61  *    without specific prior written permission.
     62  *
     63  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     64  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     65  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     66  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     67  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     68  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     69  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     70  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     71  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     72  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     73  * SUCH DAMAGE.
     74  *
     75  * from: Utah $Hdr: cd.c 1.6 90/11/28$
     76  *
     77  *      @(#)cd.c        8.2 (Berkeley) 11/16/93
     78  */
     79 
     80 
     81 
     82 
     83 /*
     84  * Copyright (c) 1995 Carnegie-Mellon University.
     85  * All rights reserved.
     86  *
     87  * Authors: Mark Holland, Jim Zelenka
     88  *
     89  * Permission to use, copy, modify and distribute this software and
     90  * its documentation is hereby granted, provided that both the copyright
     91  * notice and this permission notice appear in all copies of the
     92  * software, derivative works or modified versions, and any portions
     93  * thereof, and that both notices appear in supporting documentation.
     94  *
     95  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
     96  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
     97  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
     98  *
     99  * Carnegie Mellon requests users of this software to return to
    100  *
    101  *  Software Distribution Coordinator  or  Software.Distribution (at) CS.CMU.EDU
    102  *  School of Computer Science
    103  *  Carnegie Mellon University
    104  *  Pittsburgh PA 15213-3890
    105  *
    106  * any improvements or extensions that they make and grant Carnegie the
    107  * rights to redistribute these changes.
    108  */
    109 
    110 /***********************************************************
    111  *
    112  * rf_kintf.c -- the kernel interface routines for RAIDframe
    113  *
    114  ***********************************************************/
    115 
    116 #include <sys/errno.h>
    117 #include <sys/param.h>
    118 #include <sys/pool.h>
    119 #include <sys/queue.h>
    120 #include <sys/disk.h>
    121 #include <sys/device.h>
    122 #include <sys/stat.h>
    123 #include <sys/ioctl.h>
    124 #include <sys/fcntl.h>
    125 #include <sys/systm.h>
    126 #include <sys/namei.h>
    127 #include <sys/vnode.h>
    128 #include <sys/param.h>
    129 #include <sys/types.h>
    130 #include <machine/types.h>
    131 #include <sys/disklabel.h>
    132 #include <sys/conf.h>
    133 #include <sys/lock.h>
    134 #include <sys/buf.h>
    135 #include <sys/user.h>
    136 
    137 #include "raid.h"
    138 #include "rf_raid.h"
    139 #include "rf_raidframe.h"
    140 #include "rf_copyback.h"
    141 #include "rf_dag.h"
    142 #include "rf_dagflags.h"
    143 #include "rf_diskqueue.h"
    144 #include "rf_acctrace.h"
    145 #include "rf_etimer.h"
    146 #include "rf_general.h"
    147 #include "rf_debugMem.h"
    148 #include "rf_kintf.h"
    149 #include "rf_options.h"
    150 #include "rf_driver.h"
    151 #include "rf_parityscan.h"
    152 #include "rf_debugprint.h"
    153 #include "rf_threadstuff.h"
    154 
    155 int     rf_kdebug_level = 0;
    156 
    157 #ifdef DEBUG
    158 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
    159 #else				/* DEBUG */
    160 #define db1_printf(a) { }
    161 #endif				/* DEBUG */
    162 
    163 static RF_Raid_t **raidPtrs;	/* global raid device descriptors */
    164 
    165 RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex)
    166 
    167 static RF_SparetWait_t *rf_sparet_wait_queue;	/* requests to install a
    168 						 * spare table */
    169 static RF_SparetWait_t *rf_sparet_resp_queue;	/* responses from
    170 						 * installation process */
    171 
    172 /* prototypes */
    173 static void KernelWakeupFunc(struct buf * bp);
    174 static void InitBP(struct buf * bp, struct vnode *, unsigned rw_flag,
    175 		   dev_t dev, RF_SectorNum_t startSect,
    176 		   RF_SectorCount_t numSect, caddr_t buf,
    177 		   void (*cbFunc) (struct buf *), void *cbArg,
    178 		   int logBytesPerSector, struct proc * b_proc);
    179 static int raidinit __P((dev_t, RF_Raid_t *, int));
    180 
    181 void raidattach __P((int));
    182 int raidsize __P((dev_t));
    183 int raidopen __P((dev_t, int, int, struct proc *));
    184 int raidclose __P((dev_t, int, int, struct proc *));
    185 int raidioctl __P((dev_t, u_long, caddr_t, int, struct proc *));
    186 int raidwrite __P((dev_t, struct uio *, int));
    187 int raidread __P((dev_t, struct uio *, int));
    188 void raidstrategy __P((struct buf *));
    189 int raiddump __P((dev_t, daddr_t, caddr_t, size_t));
    190 
    191 /*
    192  * Pilfered from ccd.c
    193  */
    194 
    195 struct raidbuf {
    196 	struct buf rf_buf;	/* new I/O buf.  MUST BE FIRST!!! */
    197 	struct buf *rf_obp;	/* ptr. to original I/O buf */
    198 	int     rf_flags;	/* misc. flags */
    199 	RF_DiskQueueData_t *req;/* the request that this was part of.. */
    200 };
    201 
    202 
    203 #define RAIDGETBUF(rs) pool_get(&(rs)->sc_cbufpool, PR_NOWAIT)
    204 #define	RAIDPUTBUF(rs, cbp) pool_put(&(rs)->sc_cbufpool, cbp)
    205 
    206 /* XXX Not sure if the following should be replacing the raidPtrs above,
    207    or if it should be used in conjunction with that...
    208    Note: Don't use sc_dev until the raidinit(0,_,_) call in
    209    rf_auto_config_set() actually passes in a real dev_t!  */
    210 
    211 struct raid_softc {
    212 	int     sc_flags;	/* flags */
    213 	int     sc_cflags;	/* configuration flags */
    214 	size_t  sc_size;        /* size of the raid device */
    215 	dev_t   sc_dev;	        /* our device.. */
    216 	char    sc_xname[20];	/* XXX external name */
    217 	struct disk sc_dkdev;	/* generic disk device info */
    218 	struct pool sc_cbufpool;	/* component buffer pool */
    219 	struct buf_queue buf_queue;	/* used for the device queue */
    220 };
    221 /* sc_flags */
    222 #define RAIDF_INITED	0x01	/* unit has been initialized */
    223 #define RAIDF_WLABEL	0x02	/* label area is writable */
    224 #define RAIDF_LABELLING	0x04	/* unit is currently being labelled */
    225 #define RAIDF_WANTED	0x40	/* someone is waiting to obtain a lock */
    226 #define RAIDF_LOCKED	0x80	/* unit is locked */
    227 
    228 #define	raidunit(x)	DISKUNIT(x)
    229 int numraid = 0;
    230 
    231 /*
    232  * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
    233  * Be aware that large numbers can allow the driver to consume a lot of
    234  * kernel memory, especially on writes, and in degraded mode reads.
    235  *
    236  * For example: with a stripe width of 64 blocks (32k) and 5 disks,
    237  * a single 64K write will typically require 64K for the old data,
    238  * 64K for the old parity, and 64K for the new parity, for a total
    239  * of 192K (if the parity buffer is not re-used immediately).
    240  * Even it if is used immedately, that's still 128K, which when multiplied
    241  * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
    242  *
    243  * Now in degraded mode, for example, a 64K read on the above setup may
    244  * require data reconstruction, which will require *all* of the 4 remaining
    245  * disks to participate -- 4 * 32K/disk == 128K again.
    246  */
    247 
    248 #ifndef RAIDOUTSTANDING
    249 #define RAIDOUTSTANDING   6
    250 #endif
    251 
    252 #define RAIDLABELDEV(dev)	\
    253 	(MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
    254 
    255 /* declared here, and made public, for the benefit of KVM stuff.. */
    256 struct raid_softc *raid_softc;
    257 
    258 static void raidgetdefaultlabel __P((RF_Raid_t *, struct raid_softc *,
    259 				     struct disklabel *));
    260 static void raidgetdisklabel __P((dev_t));
    261 static void raidmakedisklabel __P((struct raid_softc *));
    262 
    263 static int raidlock __P((struct raid_softc *));
    264 static void raidunlock __P((struct raid_softc *));
    265 
    266 static void rf_markalldirty __P((RF_Raid_t *));
    267 void rf_mountroot_hook __P((struct device *));
    268 
    269 struct device *raidrootdev;
    270 struct cfdata cf_raidrootdev;
    271 struct cfdriver cfdrv;
    272 /* XXX these should be moved up */
    273 #include "rf_configure.h"
    274 #include <sys/reboot.h>
    275 
    276 void rf_ReconThread __P((struct rf_recon_req *));
    277 /* XXX what I want is: */
    278 /*void rf_ReconThread __P((RF_Raid_t *raidPtr));  */
    279 void rf_RewriteParityThread __P((RF_Raid_t *raidPtr));
    280 void rf_CopybackThread __P((RF_Raid_t *raidPtr));
    281 void rf_ReconstructInPlaceThread __P((struct rf_recon_req *));
    282 void rf_buildroothack __P((void *));
    283 
    284 RF_AutoConfig_t *rf_find_raid_components __P((void));
    285 void print_component_label __P((RF_ComponentLabel_t *));
    286 RF_ConfigSet_t *rf_create_auto_sets __P((RF_AutoConfig_t *));
    287 static int rf_does_it_fit __P((RF_ConfigSet_t *,RF_AutoConfig_t *));
    288 static int rf_reasonable_label __P((RF_ComponentLabel_t *));
    289 void rf_create_configuration __P((RF_AutoConfig_t *,RF_Config_t *,
    290 				  RF_Raid_t *));
    291 int rf_set_autoconfig __P((RF_Raid_t *, int));
    292 int rf_set_rootpartition __P((RF_Raid_t *, int));
    293 void rf_release_all_vps __P((RF_ConfigSet_t *));
    294 void rf_cleanup_config_set __P((RF_ConfigSet_t *));
    295 int rf_have_enough_components __P((RF_ConfigSet_t *));
    296 int rf_auto_config_set __P((RF_ConfigSet_t *, int *));
    297 
    298 static int raidautoconfig = 0; /* Debugging, mostly.  Set to 0 to not
    299 				  allow autoconfig to take place */
    300 /* XXX ugly hack. */
    301 const char *raid_rooty = "raid0";
    302 extern struct device *booted_device;
    303 
    304 void
    305 raidattach(num)
    306 	int     num;
    307 {
    308 	int raidID;
    309 	int i, rc;
    310 	RF_AutoConfig_t *ac_list; /* autoconfig list */
    311 	RF_ConfigSet_t *config_sets;
    312 
    313 #ifdef DEBUG
    314 	printf("raidattach: Asked for %d units\n", num);
    315 #endif
    316 
    317 	if (num <= 0) {
    318 #ifdef DIAGNOSTIC
    319 		panic("raidattach: count <= 0");
    320 #endif
    321 		return;
    322 	}
    323 	/* This is where all the initialization stuff gets done. */
    324 
    325 	numraid = num;
    326 
    327 	/* Make some space for requested number of units... */
    328 
    329 	RF_Calloc(raidPtrs, num, sizeof(RF_Raid_t *), (RF_Raid_t **));
    330 	if (raidPtrs == NULL) {
    331 		panic("raidPtrs is NULL!!\n");
    332 	}
    333 
    334 	rc = rf_mutex_init(&rf_sparet_wait_mutex);
    335 	if (rc) {
    336 		RF_PANIC();
    337 	}
    338 
    339 	rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
    340 
    341 	for (i = 0; i < numraid; i++)
    342 		raidPtrs[i] = NULL;
    343 	rc = rf_BootRaidframe();
    344 	if (rc == 0)
    345 		printf("Kernelized RAIDframe activated\n");
    346 	else
    347 		panic("Serious error booting RAID!!\n");
    348 
    349 	/* put together some datastructures like the CCD device does.. This
    350 	 * lets us lock the device and what-not when it gets opened. */
    351 
    352 	raid_softc = (struct raid_softc *)
    353 		malloc(num * sizeof(struct raid_softc),
    354 		       M_RAIDFRAME, M_NOWAIT);
    355 	if (raid_softc == NULL) {
    356 		printf("WARNING: no memory for RAIDframe driver\n");
    357 		return;
    358 	}
    359 
    360 	bzero(raid_softc, num * sizeof(struct raid_softc));
    361 
    362 	raidrootdev = (struct device *)malloc(num * sizeof(struct device),
    363 					      M_RAIDFRAME, M_NOWAIT);
    364 	if (raidrootdev == NULL) {
    365 		panic("No memory for RAIDframe driver!!?!?!\n");
    366 	}
    367 
    368 	for (raidID = 0; raidID < num; raidID++) {
    369 		BUFQ_INIT(&raid_softc[raidID].buf_queue);
    370 
    371 		raidrootdev[raidID].dv_class  = DV_DISK;
    372 		raidrootdev[raidID].dv_cfdata = NULL;
    373 		raidrootdev[raidID].dv_unit   = raidID;
    374 		raidrootdev[raidID].dv_parent = NULL;
    375 		raidrootdev[raidID].dv_flags  = 0;
    376 		sprintf(raidrootdev[raidID].dv_xname,"raid%d",raidID);
    377 
    378 		RF_Calloc(raidPtrs[raidID], 1, sizeof(RF_Raid_t),
    379 			  (RF_Raid_t *));
    380 		if (raidPtrs[raidID] == NULL) {
    381 			printf("WARNING: raidPtrs[%d] is NULL\n", raidID);
    382 			numraid = raidID;
    383 			return;
    384 		}
    385 	}
    386 
    387 if (raidautoconfig) {
    388 	/* 1. locate all RAID components on the system */
    389 
    390 #if DEBUG
    391 	printf("Searching for raid components...\n");
    392 #endif
    393 	ac_list = rf_find_raid_components();
    394 
    395 	/* 2. sort them into their respective sets */
    396 
    397 	config_sets = rf_create_auto_sets(ac_list);
    398 
    399 	/* 3. evaluate each set and configure the valid ones
    400 	   This gets done in rf_buildroothack() */
    401 
    402 	/* schedule the creation of the thread to do the
    403 	   "/ on RAID" stuff */
    404 
    405 	kthread_create(rf_buildroothack,config_sets);
    406 
    407 	/* 4. make sure we get our mud.. I mean root.. hooks in.. */
    408 	/* XXXX pick raid0 for now... and this should be only done
    409 	   if we find something that's bootable!!! */
    410 #if 0
    411 	mountroothook_establish(rf_mountroot_hook, &raidrootdev[0]);
    412 #endif
    413 	if (boothowto & RB_ASKNAME) {
    414 		/* We don't auto-config... */
    415 	} else {
    416 		/* They didn't ask, and we found something bootable... */
    417 		/* XXX pretend for now.. */
    418 #if 0
    419  		booted_device = &raidrootdev[0];
    420 #endif
    421 	}
    422 }
    423 
    424 }
    425 
    426 void
    427 rf_buildroothack(arg)
    428 	void *arg;
    429 {
    430 	RF_ConfigSet_t *config_sets = arg;
    431 	RF_ConfigSet_t *cset;
    432 	RF_ConfigSet_t *next_cset;
    433 	int retcode;
    434 	int raidID;
    435 	int rootID;
    436 	int num_root;
    437 
    438 	num_root = 0;
    439 	cset = config_sets;
    440 	while(cset != NULL ) {
    441 		next_cset = cset->next;
    442 		if (rf_have_enough_components(cset) &&
    443 		    cset->ac->clabel->autoconfigure==1) {
    444 			retcode = rf_auto_config_set(cset,&raidID);
    445 			if (!retcode) {
    446 				if (cset->rootable) {
    447 					rootID = raidID;
    448 					num_root++;
    449 				}
    450 			} else {
    451 				/* The autoconfig didn't work :( */
    452 #if DEBUG
    453 				printf("Autoconfig failed with code %d for raid%d\n", retcode, raidID);
    454 #endif
    455 				rf_release_all_vps(cset);
    456 #if DEBUG
    457 				printf("Done cleanup\n");
    458 #endif
    459 			}
    460 		} else {
    461 			/* we're not autoconfiguring this set...
    462 			   release the associated resources */
    463 #if DEBUG
    464 			printf("Releasing vp's\n");
    465 #endif
    466 			rf_release_all_vps(cset);
    467 #if DEBUG
    468 			printf("Done.\n");
    469 #endif
    470 		}
    471 		/* cleanup */
    472 #if DEBUG
    473 		printf("Cleaning up config set\n");
    474 #endif
    475 		rf_cleanup_config_set(cset);
    476 #if DEBUG
    477 		printf("Done cleanup\n");
    478 #endif
    479 		cset = next_cset;
    480 	}
    481 	if (boothowto & RB_ASKNAME) {
    482 		/* We don't auto-config... */
    483 	} else {
    484 		/* They didn't ask, and we found something bootable... */
    485 		/* XXX pretend for now.. */
    486 		if (num_root == 1) {
    487 #if 1
    488 			booted_device = &raidrootdev[rootID];
    489 #endif
    490 		} else if (num_root > 1) {
    491 			/* we can't guess.. require the user to answer... */
    492 			boothowto |= RB_ASKNAME;
    493 		}
    494 	}
    495 }
    496 
    497 
    498 int
    499 raidsize(dev)
    500 	dev_t   dev;
    501 {
    502 	struct raid_softc *rs;
    503 	struct disklabel *lp;
    504 	int     part, unit, omask, size;
    505 
    506 	unit = raidunit(dev);
    507 	if (unit >= numraid)
    508 		return (-1);
    509 	rs = &raid_softc[unit];
    510 
    511 	if ((rs->sc_flags & RAIDF_INITED) == 0)
    512 		return (-1);
    513 
    514 	part = DISKPART(dev);
    515 	omask = rs->sc_dkdev.dk_openmask & (1 << part);
    516 	lp = rs->sc_dkdev.dk_label;
    517 
    518 	if (omask == 0 && raidopen(dev, 0, S_IFBLK, curproc))
    519 		return (-1);
    520 
    521 	if (lp->d_partitions[part].p_fstype != FS_SWAP)
    522 		size = -1;
    523 	else
    524 		size = lp->d_partitions[part].p_size *
    525 		    (lp->d_secsize / DEV_BSIZE);
    526 
    527 	if (omask == 0 && raidclose(dev, 0, S_IFBLK, curproc))
    528 		return (-1);
    529 
    530 	return (size);
    531 
    532 }
    533 
    534 int
    535 raiddump(dev, blkno, va, size)
    536 	dev_t   dev;
    537 	daddr_t blkno;
    538 	caddr_t va;
    539 	size_t  size;
    540 {
    541 	/* Not implemented. */
    542 	return ENXIO;
    543 }
    544 /* ARGSUSED */
    545 int
    546 raidopen(dev, flags, fmt, p)
    547 	dev_t   dev;
    548 	int     flags, fmt;
    549 	struct proc *p;
    550 {
    551 	int     unit = raidunit(dev);
    552 	struct raid_softc *rs;
    553 	struct disklabel *lp;
    554 	int     part, pmask;
    555 	int     error = 0;
    556 
    557 	if (unit >= numraid)
    558 		return (ENXIO);
    559 	rs = &raid_softc[unit];
    560 
    561 	if ((error = raidlock(rs)) != 0)
    562 		return (error);
    563 	lp = rs->sc_dkdev.dk_label;
    564 
    565 	part = DISKPART(dev);
    566 	pmask = (1 << part);
    567 
    568 	db1_printf(("Opening raid device number: %d partition: %d\n",
    569 		unit, part));
    570 
    571 
    572 	if ((rs->sc_flags & RAIDF_INITED) &&
    573 	    (rs->sc_dkdev.dk_openmask == 0))
    574 		raidgetdisklabel(dev);
    575 
    576 	/* make sure that this partition exists */
    577 
    578 	if (part != RAW_PART) {
    579 		db1_printf(("Not a raw partition..\n"));
    580 		if (((rs->sc_flags & RAIDF_INITED) == 0) ||
    581 		    ((part >= lp->d_npartitions) ||
    582 			(lp->d_partitions[part].p_fstype == FS_UNUSED))) {
    583 			error = ENXIO;
    584 			raidunlock(rs);
    585 			db1_printf(("Bailing out...\n"));
    586 			return (error);
    587 		}
    588 	}
    589 	/* Prevent this unit from being unconfigured while open. */
    590 	switch (fmt) {
    591 	case S_IFCHR:
    592 		rs->sc_dkdev.dk_copenmask |= pmask;
    593 		break;
    594 
    595 	case S_IFBLK:
    596 		rs->sc_dkdev.dk_bopenmask |= pmask;
    597 		break;
    598 	}
    599 
    600 	if ((rs->sc_dkdev.dk_openmask == 0) &&
    601 	    ((rs->sc_flags & RAIDF_INITED) != 0)) {
    602 		/* First one... mark things as dirty... Note that we *MUST*
    603 		 have done a configure before this.  I DO NOT WANT TO BE
    604 		 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
    605 		 THAT THEY BELONG TOGETHER!!!!! */
    606 		/* XXX should check to see if we're only open for reading
    607 		   here... If so, we needn't do this, but then need some
    608 		   other way of keeping track of what's happened.. */
    609 
    610 		rf_markalldirty( raidPtrs[unit] );
    611 	}
    612 
    613 
    614 	rs->sc_dkdev.dk_openmask =
    615 	    rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
    616 
    617 	raidunlock(rs);
    618 
    619 	return (error);
    620 
    621 
    622 }
    623 /* ARGSUSED */
    624 int
    625 raidclose(dev, flags, fmt, p)
    626 	dev_t   dev;
    627 	int     flags, fmt;
    628 	struct proc *p;
    629 {
    630 	int     unit = raidunit(dev);
    631 	struct raid_softc *rs;
    632 	int     error = 0;
    633 	int     part;
    634 
    635 	if (unit >= numraid)
    636 		return (ENXIO);
    637 	rs = &raid_softc[unit];
    638 
    639 	if ((error = raidlock(rs)) != 0)
    640 		return (error);
    641 
    642 	part = DISKPART(dev);
    643 
    644 	/* ...that much closer to allowing unconfiguration... */
    645 	switch (fmt) {
    646 	case S_IFCHR:
    647 		rs->sc_dkdev.dk_copenmask &= ~(1 << part);
    648 		break;
    649 
    650 	case S_IFBLK:
    651 		rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
    652 		break;
    653 	}
    654 	rs->sc_dkdev.dk_openmask =
    655 	    rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
    656 
    657 	if ((rs->sc_dkdev.dk_openmask == 0) &&
    658 	    ((rs->sc_flags & RAIDF_INITED) != 0)) {
    659 		/* Last one... device is not unconfigured yet.
    660 		   Device shutdown has taken care of setting the
    661 		   clean bits if RAIDF_INITED is not set
    662 		   mark things as clean... */
    663 		rf_update_component_labels( raidPtrs[unit] );
    664 	}
    665 
    666 	raidunlock(rs);
    667 	return (0);
    668 
    669 }
    670 
    671 void
    672 raidstrategy(bp)
    673 	register struct buf *bp;
    674 {
    675 	register int s;
    676 
    677 	unsigned int raidID = raidunit(bp->b_dev);
    678 	RF_Raid_t *raidPtr;
    679 	struct raid_softc *rs = &raid_softc[raidID];
    680 	struct disklabel *lp;
    681 	int     wlabel;
    682 
    683 	if ((rs->sc_flags & RAIDF_INITED) ==0) {
    684 		bp->b_error = ENXIO;
    685 		bp->b_flags = B_ERROR;
    686 		bp->b_resid = bp->b_bcount;
    687 		biodone(bp);
    688 		return;
    689 	}
    690 	if (raidID >= numraid || !raidPtrs[raidID]) {
    691 		bp->b_error = ENODEV;
    692 		bp->b_flags |= B_ERROR;
    693 		bp->b_resid = bp->b_bcount;
    694 		biodone(bp);
    695 		return;
    696 	}
    697 	raidPtr = raidPtrs[raidID];
    698 	if (!raidPtr->valid) {
    699 		bp->b_error = ENODEV;
    700 		bp->b_flags |= B_ERROR;
    701 		bp->b_resid = bp->b_bcount;
    702 		biodone(bp);
    703 		return;
    704 	}
    705 	if (bp->b_bcount == 0) {
    706 		db1_printf(("b_bcount is zero..\n"));
    707 		biodone(bp);
    708 		return;
    709 	}
    710 	lp = rs->sc_dkdev.dk_label;
    711 
    712 	/*
    713 	 * Do bounds checking and adjust transfer.  If there's an
    714 	 * error, the bounds check will flag that for us.
    715 	 */
    716 
    717 	wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
    718 	if (DISKPART(bp->b_dev) != RAW_PART)
    719 		if (bounds_check_with_label(bp, lp, wlabel) <= 0) {
    720 			db1_printf(("Bounds check failed!!:%d %d\n",
    721 				(int) bp->b_blkno, (int) wlabel));
    722 			biodone(bp);
    723 			return;
    724 		}
    725 	s = splbio();
    726 
    727 	bp->b_resid = 0;
    728 
    729 	/* stuff it onto our queue */
    730 	BUFQ_INSERT_TAIL(&rs->buf_queue, bp);
    731 
    732 	raidstart(raidPtrs[raidID]);
    733 
    734 	splx(s);
    735 }
    736 /* ARGSUSED */
    737 int
    738 raidread(dev, uio, flags)
    739 	dev_t   dev;
    740 	struct uio *uio;
    741 	int     flags;
    742 {
    743 	int     unit = raidunit(dev);
    744 	struct raid_softc *rs;
    745 	int     part;
    746 
    747 	if (unit >= numraid)
    748 		return (ENXIO);
    749 	rs = &raid_softc[unit];
    750 
    751 	if ((rs->sc_flags & RAIDF_INITED) == 0)
    752 		return (ENXIO);
    753 	part = DISKPART(dev);
    754 
    755 	db1_printf(("raidread: unit: %d partition: %d\n", unit, part));
    756 
    757 	return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
    758 
    759 }
    760 /* ARGSUSED */
    761 int
    762 raidwrite(dev, uio, flags)
    763 	dev_t   dev;
    764 	struct uio *uio;
    765 	int     flags;
    766 {
    767 	int     unit = raidunit(dev);
    768 	struct raid_softc *rs;
    769 
    770 	if (unit >= numraid)
    771 		return (ENXIO);
    772 	rs = &raid_softc[unit];
    773 
    774 	if ((rs->sc_flags & RAIDF_INITED) == 0)
    775 		return (ENXIO);
    776 	db1_printf(("raidwrite\n"));
    777 	return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
    778 
    779 }
    780 
    781 int
    782 raidioctl(dev, cmd, data, flag, p)
    783 	dev_t   dev;
    784 	u_long  cmd;
    785 	caddr_t data;
    786 	int     flag;
    787 	struct proc *p;
    788 {
    789 	int     unit = raidunit(dev);
    790 	int     error = 0;
    791 	int     part, pmask;
    792 	struct raid_softc *rs;
    793 	RF_Config_t *k_cfg, *u_cfg;
    794 	RF_Raid_t *raidPtr;
    795 	RF_RaidDisk_t *diskPtr;
    796 	RF_AccTotals_t *totals;
    797 	RF_DeviceConfig_t *d_cfg, **ucfgp;
    798 	u_char *specific_buf;
    799 	int retcode = 0;
    800 	int row;
    801 	int column;
    802 	struct rf_recon_req *rrcopy, *rr;
    803 	RF_ComponentLabel_t *clabel;
    804 	RF_ComponentLabel_t ci_label;
    805 	RF_ComponentLabel_t **clabel_ptr;
    806 	RF_SingleComponent_t *sparePtr,*componentPtr;
    807 	RF_SingleComponent_t hot_spare;
    808 	RF_SingleComponent_t component;
    809 	int i, j, d;
    810 
    811 	if (unit >= numraid)
    812 		return (ENXIO);
    813 	rs = &raid_softc[unit];
    814 	raidPtr = raidPtrs[unit];
    815 
    816 	db1_printf(("raidioctl: %d %d %d %d\n", (int) dev,
    817 		(int) DISKPART(dev), (int) unit, (int) cmd));
    818 
    819 	/* Must be open for writes for these commands... */
    820 	switch (cmd) {
    821 	case DIOCSDINFO:
    822 	case DIOCWDINFO:
    823 	case DIOCWLABEL:
    824 		if ((flag & FWRITE) == 0)
    825 			return (EBADF);
    826 	}
    827 
    828 	/* Must be initialized for these... */
    829 	switch (cmd) {
    830 	case DIOCGDINFO:
    831 	case DIOCSDINFO:
    832 	case DIOCWDINFO:
    833 	case DIOCGPART:
    834 	case DIOCWLABEL:
    835 	case DIOCGDEFLABEL:
    836 	case RAIDFRAME_SHUTDOWN:
    837 	case RAIDFRAME_REWRITEPARITY:
    838 	case RAIDFRAME_GET_INFO:
    839 	case RAIDFRAME_RESET_ACCTOTALS:
    840 	case RAIDFRAME_GET_ACCTOTALS:
    841 	case RAIDFRAME_KEEP_ACCTOTALS:
    842 	case RAIDFRAME_GET_SIZE:
    843 	case RAIDFRAME_FAIL_DISK:
    844 	case RAIDFRAME_COPYBACK:
    845 	case RAIDFRAME_CHECK_RECON_STATUS:
    846 	case RAIDFRAME_GET_COMPONENT_LABEL:
    847 	case RAIDFRAME_SET_COMPONENT_LABEL:
    848 	case RAIDFRAME_ADD_HOT_SPARE:
    849 	case RAIDFRAME_REMOVE_HOT_SPARE:
    850 	case RAIDFRAME_INIT_LABELS:
    851 	case RAIDFRAME_REBUILD_IN_PLACE:
    852 	case RAIDFRAME_CHECK_PARITY:
    853 	case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
    854 	case RAIDFRAME_CHECK_COPYBACK_STATUS:
    855 	case RAIDFRAME_SET_AUTOCONFIG:
    856 	case RAIDFRAME_SET_ROOT:
    857 		if ((rs->sc_flags & RAIDF_INITED) == 0)
    858 			return (ENXIO);
    859 	}
    860 
    861 	switch (cmd) {
    862 
    863 		/* configure the system */
    864 	case RAIDFRAME_CONFIGURE:
    865 
    866 		if (raidPtr->valid) {
    867 			/* There is a valid RAID set running on this unit! */
    868 			printf("raid%d: Device already configured!\n",unit);
    869 		}
    870 
    871 		/* copy-in the configuration information */
    872 		/* data points to a pointer to the configuration structure */
    873 
    874 		u_cfg = *((RF_Config_t **) data);
    875 		RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
    876 		if (k_cfg == NULL) {
    877 			return (ENOMEM);
    878 		}
    879 		retcode = copyin((caddr_t) u_cfg, (caddr_t) k_cfg,
    880 		    sizeof(RF_Config_t));
    881 		if (retcode) {
    882 			RF_Free(k_cfg, sizeof(RF_Config_t));
    883 			db1_printf(("rf_ioctl: retcode=%d copyin.1\n",
    884 				retcode));
    885 			return (retcode);
    886 		}
    887 		/* allocate a buffer for the layout-specific data, and copy it
    888 		 * in */
    889 		if (k_cfg->layoutSpecificSize) {
    890 			if (k_cfg->layoutSpecificSize > 10000) {
    891 				/* sanity check */
    892 				RF_Free(k_cfg, sizeof(RF_Config_t));
    893 				return (EINVAL);
    894 			}
    895 			RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
    896 			    (u_char *));
    897 			if (specific_buf == NULL) {
    898 				RF_Free(k_cfg, sizeof(RF_Config_t));
    899 				return (ENOMEM);
    900 			}
    901 			retcode = copyin(k_cfg->layoutSpecific,
    902 			    (caddr_t) specific_buf,
    903 			    k_cfg->layoutSpecificSize);
    904 			if (retcode) {
    905 				RF_Free(k_cfg, sizeof(RF_Config_t));
    906 				RF_Free(specific_buf,
    907 					k_cfg->layoutSpecificSize);
    908 				db1_printf(("rf_ioctl: retcode=%d copyin.2\n",
    909 					retcode));
    910 				return (retcode);
    911 			}
    912 		} else
    913 			specific_buf = NULL;
    914 		k_cfg->layoutSpecific = specific_buf;
    915 
    916 		/* should do some kind of sanity check on the configuration.
    917 		 * Store the sum of all the bytes in the last byte? */
    918 
    919 		/* configure the system */
    920 
    921 		/*
    922 		 * Clear the entire RAID descriptor, just to make sure
    923 		 *  there is no stale data left in the case of a
    924 		 *  reconfiguration
    925 		 */
    926 		bzero((char *) raidPtr, sizeof(RF_Raid_t));
    927 		raidPtr->raidid = unit;
    928 
    929 		retcode = rf_Configure(raidPtr, k_cfg, NULL);
    930 
    931 		if (retcode == 0) {
    932 
    933 			/* allow this many simultaneous IO's to
    934 			   this RAID device */
    935 			raidPtr->openings = RAIDOUTSTANDING;
    936 
    937 			retcode = raidinit(dev, raidPtr, unit);
    938 			rf_markalldirty( raidPtr );
    939 		}
    940 		/* free the buffers.  No return code here. */
    941 		if (k_cfg->layoutSpecificSize) {
    942 			RF_Free(specific_buf, k_cfg->layoutSpecificSize);
    943 		}
    944 		RF_Free(k_cfg, sizeof(RF_Config_t));
    945 
    946 		return (retcode);
    947 
    948 		/* shutdown the system */
    949 	case RAIDFRAME_SHUTDOWN:
    950 
    951 		if ((error = raidlock(rs)) != 0)
    952 			return (error);
    953 
    954 		/*
    955 		 * If somebody has a partition mounted, we shouldn't
    956 		 * shutdown.
    957 		 */
    958 
    959 		part = DISKPART(dev);
    960 		pmask = (1 << part);
    961 		if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
    962 		    ((rs->sc_dkdev.dk_bopenmask & pmask) &&
    963 			(rs->sc_dkdev.dk_copenmask & pmask))) {
    964 			raidunlock(rs);
    965 			return (EBUSY);
    966 		}
    967 
    968 		retcode = rf_Shutdown(raidPtr);
    969 
    970 		pool_destroy(&rs->sc_cbufpool);
    971 
    972 		/* It's no longer initialized... */
    973 		rs->sc_flags &= ~RAIDF_INITED;
    974 
    975 		/* Detach the disk. */
    976 		disk_detach(&rs->sc_dkdev);
    977 
    978 		raidunlock(rs);
    979 
    980 		return (retcode);
    981 	case RAIDFRAME_GET_COMPONENT_LABEL:
    982 		clabel_ptr = (RF_ComponentLabel_t **) data;
    983 		/* need to read the component label for the disk indicated
    984 		   by row,column in clabel */
    985 
    986 		/* For practice, let's get it directly fromdisk, rather
    987 		   than from the in-core copy */
    988 		RF_Malloc( clabel, sizeof( RF_ComponentLabel_t ),
    989 			   (RF_ComponentLabel_t *));
    990 		if (clabel == NULL)
    991 			return (ENOMEM);
    992 
    993 		bzero((char *) clabel, sizeof(RF_ComponentLabel_t));
    994 
    995 		retcode = copyin( *clabel_ptr, clabel,
    996 				  sizeof(RF_ComponentLabel_t));
    997 
    998 		if (retcode) {
    999 			RF_Free( clabel, sizeof(RF_ComponentLabel_t));
   1000 			return(retcode);
   1001 		}
   1002 
   1003 		row = clabel->row;
   1004 		column = clabel->column;
   1005 
   1006 		if ((row < 0) || (row >= raidPtr->numRow) ||
   1007 		    (column < 0) || (column >= raidPtr->numCol)) {
   1008 			RF_Free( clabel, sizeof(RF_ComponentLabel_t));
   1009 			return(EINVAL);
   1010 		}
   1011 
   1012 		raidread_component_label(raidPtr->Disks[row][column].dev,
   1013 				raidPtr->raid_cinfo[row][column].ci_vp,
   1014 				clabel );
   1015 
   1016 		retcode = copyout((caddr_t) clabel,
   1017 				  (caddr_t) *clabel_ptr,
   1018 				  sizeof(RF_ComponentLabel_t));
   1019 		RF_Free( clabel, sizeof(RF_ComponentLabel_t));
   1020 		return (retcode);
   1021 
   1022 	case RAIDFRAME_SET_COMPONENT_LABEL:
   1023 		clabel = (RF_ComponentLabel_t *) data;
   1024 
   1025 		/* XXX check the label for valid stuff... */
   1026 		/* Note that some things *should not* get modified --
   1027 		   the user should be re-initing the labels instead of
   1028 		   trying to patch things.
   1029 		   */
   1030 
   1031 		printf("Got component label:\n");
   1032 		printf("Version: %d\n",clabel->version);
   1033 		printf("Serial Number: %d\n",clabel->serial_number);
   1034 		printf("Mod counter: %d\n",clabel->mod_counter);
   1035 		printf("Row: %d\n", clabel->row);
   1036 		printf("Column: %d\n", clabel->column);
   1037 		printf("Num Rows: %d\n", clabel->num_rows);
   1038 		printf("Num Columns: %d\n", clabel->num_columns);
   1039 		printf("Clean: %d\n", clabel->clean);
   1040 		printf("Status: %d\n", clabel->status);
   1041 
   1042 		row = clabel->row;
   1043 		column = clabel->column;
   1044 
   1045 		if ((row < 0) || (row >= raidPtr->numRow) ||
   1046 		    (column < 0) || (column >= raidPtr->numCol)) {
   1047 			return(EINVAL);
   1048 		}
   1049 
   1050 		/* XXX this isn't allowed to do anything for now :-) */
   1051 
   1052 		/* XXX and before it is, we need to fill in the rest
   1053 		   of the fields!?!?!?! */
   1054 #if 0
   1055 		raidwrite_component_label(
   1056                             raidPtr->Disks[row][column].dev,
   1057 			    raidPtr->raid_cinfo[row][column].ci_vp,
   1058 			    clabel );
   1059 #endif
   1060 		return (0);
   1061 
   1062 	case RAIDFRAME_INIT_LABELS:
   1063 		clabel = (RF_ComponentLabel_t *) data;
   1064 		/*
   1065 		   we only want the serial number from
   1066 		   the above.  We get all the rest of the information
   1067 		   from the config that was used to create this RAID
   1068 		   set.
   1069 		   */
   1070 
   1071 		raidPtr->serial_number = clabel->serial_number;
   1072 
   1073 		raid_init_component_label(raidPtr, &ci_label);
   1074 		ci_label.serial_number = clabel->serial_number;
   1075 
   1076 		for(row=0;row<raidPtr->numRow;row++) {
   1077 			ci_label.row = row;
   1078 			for(column=0;column<raidPtr->numCol;column++) {
   1079 				diskPtr = &raidPtr->Disks[row][column];
   1080 				ci_label.blockSize = diskPtr->blockSize;
   1081 				ci_label.numBlocks = diskPtr->numBlocks;
   1082 				ci_label.partitionSize = diskPtr->partitionSize;
   1083 				ci_label.column = column;
   1084 				raidwrite_component_label(
   1085 				  raidPtr->Disks[row][column].dev,
   1086 				  raidPtr->raid_cinfo[row][column].ci_vp,
   1087 				  &ci_label );
   1088 			}
   1089 		}
   1090 
   1091 		return (retcode);
   1092 	case RAIDFRAME_SET_AUTOCONFIG:
   1093 		d = rf_set_autoconfig(raidPtr, *data);
   1094 		printf("New autoconfig value is: %d\n", d);
   1095 		*data = d;
   1096 		return (retcode);
   1097 
   1098 	case RAIDFRAME_SET_ROOT:
   1099 		d = rf_set_rootpartition(raidPtr, *data);
   1100 		printf("New rootpartition value is: %d\n", d);
   1101 		*data = d;
   1102 		return (retcode);
   1103 
   1104 		/* initialize all parity */
   1105 	case RAIDFRAME_REWRITEPARITY:
   1106 
   1107 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1108 			/* Parity for RAID 0 is trivially correct */
   1109 			raidPtr->parity_good = RF_RAID_CLEAN;
   1110 			return(0);
   1111 		}
   1112 
   1113 		if (raidPtr->parity_rewrite_in_progress == 1) {
   1114 			/* Re-write is already in progress! */
   1115 			return(EINVAL);
   1116 		}
   1117 
   1118 		retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
   1119 					   rf_RewriteParityThread,
   1120 					   raidPtr,"raid_parity");
   1121 		return (retcode);
   1122 
   1123 
   1124 	case RAIDFRAME_ADD_HOT_SPARE:
   1125 		sparePtr = (RF_SingleComponent_t *) data;
   1126 		memcpy( &hot_spare, sparePtr, sizeof(RF_SingleComponent_t));
   1127 		printf("Adding spare\n");
   1128 		retcode = rf_add_hot_spare(raidPtr, &hot_spare);
   1129 		return(retcode);
   1130 
   1131 	case RAIDFRAME_REMOVE_HOT_SPARE:
   1132 		return(retcode);
   1133 
   1134 	case RAIDFRAME_REBUILD_IN_PLACE:
   1135 
   1136 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1137 			/* Can't do this on a RAID 0!! */
   1138 			return(EINVAL);
   1139 		}
   1140 
   1141 		if (raidPtr->recon_in_progress == 1) {
   1142 			/* a reconstruct is already in progress! */
   1143 			return(EINVAL);
   1144 		}
   1145 
   1146 		componentPtr = (RF_SingleComponent_t *) data;
   1147 		memcpy( &component, componentPtr,
   1148 			sizeof(RF_SingleComponent_t));
   1149 		row = component.row;
   1150 		column = component.column;
   1151 		printf("Rebuild: %d %d\n",row, column);
   1152 		if ((row < 0) || (row >= raidPtr->numRow) ||
   1153 		    (column < 0) || (column >= raidPtr->numCol)) {
   1154 			return(EINVAL);
   1155 		}
   1156 
   1157 		RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
   1158 		if (rrcopy == NULL)
   1159 			return(ENOMEM);
   1160 
   1161 		rrcopy->raidPtr = (void *) raidPtr;
   1162 		rrcopy->row = row;
   1163 		rrcopy->col = column;
   1164 
   1165 		retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
   1166 					   rf_ReconstructInPlaceThread,
   1167 					   rrcopy,"raid_reconip");
   1168 		return(retcode);
   1169 
   1170 	case RAIDFRAME_GET_INFO:
   1171 		if (!raidPtr->valid)
   1172 			return (ENODEV);
   1173 		ucfgp = (RF_DeviceConfig_t **) data;
   1174 		RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t),
   1175 			  (RF_DeviceConfig_t *));
   1176 		if (d_cfg == NULL)
   1177 			return (ENOMEM);
   1178 		bzero((char *) d_cfg, sizeof(RF_DeviceConfig_t));
   1179 		d_cfg->rows = raidPtr->numRow;
   1180 		d_cfg->cols = raidPtr->numCol;
   1181 		d_cfg->ndevs = raidPtr->numRow * raidPtr->numCol;
   1182 		if (d_cfg->ndevs >= RF_MAX_DISKS) {
   1183 			RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
   1184 			return (ENOMEM);
   1185 		}
   1186 		d_cfg->nspares = raidPtr->numSpare;
   1187 		if (d_cfg->nspares >= RF_MAX_DISKS) {
   1188 			RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
   1189 			return (ENOMEM);
   1190 		}
   1191 		d_cfg->maxqdepth = raidPtr->maxQueueDepth;
   1192 		d = 0;
   1193 		for (i = 0; i < d_cfg->rows; i++) {
   1194 			for (j = 0; j < d_cfg->cols; j++) {
   1195 				d_cfg->devs[d] = raidPtr->Disks[i][j];
   1196 				d++;
   1197 			}
   1198 		}
   1199 		for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) {
   1200 			d_cfg->spares[i] = raidPtr->Disks[0][j];
   1201 		}
   1202 		retcode = copyout((caddr_t) d_cfg, (caddr_t) * ucfgp,
   1203 				  sizeof(RF_DeviceConfig_t));
   1204 		RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
   1205 
   1206 		return (retcode);
   1207 
   1208 	case RAIDFRAME_CHECK_PARITY:
   1209 		*(int *) data = raidPtr->parity_good;
   1210 		return (0);
   1211 
   1212 	case RAIDFRAME_RESET_ACCTOTALS:
   1213 		bzero(&raidPtr->acc_totals, sizeof(raidPtr->acc_totals));
   1214 		return (0);
   1215 
   1216 	case RAIDFRAME_GET_ACCTOTALS:
   1217 		totals = (RF_AccTotals_t *) data;
   1218 		*totals = raidPtr->acc_totals;
   1219 		return (0);
   1220 
   1221 	case RAIDFRAME_KEEP_ACCTOTALS:
   1222 		raidPtr->keep_acc_totals = *(int *)data;
   1223 		return (0);
   1224 
   1225 	case RAIDFRAME_GET_SIZE:
   1226 		*(int *) data = raidPtr->totalSectors;
   1227 		return (0);
   1228 
   1229 		/* fail a disk & optionally start reconstruction */
   1230 	case RAIDFRAME_FAIL_DISK:
   1231 
   1232 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1233 			/* Can't do this on a RAID 0!! */
   1234 			return(EINVAL);
   1235 		}
   1236 
   1237 		rr = (struct rf_recon_req *) data;
   1238 
   1239 		if (rr->row < 0 || rr->row >= raidPtr->numRow
   1240 		    || rr->col < 0 || rr->col >= raidPtr->numCol)
   1241 			return (EINVAL);
   1242 
   1243 		printf("raid%d: Failing the disk: row: %d col: %d\n",
   1244 		       unit, rr->row, rr->col);
   1245 
   1246 		/* make a copy of the recon request so that we don't rely on
   1247 		 * the user's buffer */
   1248 		RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
   1249 		if (rrcopy == NULL)
   1250 			return(ENOMEM);
   1251 		bcopy(rr, rrcopy, sizeof(*rr));
   1252 		rrcopy->raidPtr = (void *) raidPtr;
   1253 
   1254 		retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
   1255 					   rf_ReconThread,
   1256 					   rrcopy,"raid_recon");
   1257 		return (0);
   1258 
   1259 		/* invoke a copyback operation after recon on whatever disk
   1260 		 * needs it, if any */
   1261 	case RAIDFRAME_COPYBACK:
   1262 
   1263 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1264 			/* This makes no sense on a RAID 0!! */
   1265 			return(EINVAL);
   1266 		}
   1267 
   1268 		if (raidPtr->copyback_in_progress == 1) {
   1269 			/* Copyback is already in progress! */
   1270 			return(EINVAL);
   1271 		}
   1272 
   1273 		retcode = RF_CREATE_THREAD(raidPtr->copyback_thread,
   1274 					   rf_CopybackThread,
   1275 					   raidPtr,"raid_copyback");
   1276 		return (retcode);
   1277 
   1278 		/* return the percentage completion of reconstruction */
   1279 	case RAIDFRAME_CHECK_RECON_STATUS:
   1280 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1281 			/* This makes no sense on a RAID 0 */
   1282 			return(EINVAL);
   1283 		}
   1284 		row = 0; /* XXX we only consider a single row... */
   1285 		if (raidPtr->status[row] != rf_rs_reconstructing)
   1286 			*(int *) data = 100;
   1287 		else
   1288 			*(int *) data = raidPtr->reconControl[row]->percentComplete;
   1289 		return (0);
   1290 
   1291 	case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
   1292 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1293 			/* This makes no sense on a RAID 0 */
   1294 			return(EINVAL);
   1295 		}
   1296 		if (raidPtr->parity_rewrite_in_progress == 1) {
   1297 			*(int *) data = 100 * raidPtr->parity_rewrite_stripes_done / raidPtr->Layout.numStripe;
   1298 		} else {
   1299 			*(int *) data = 100;
   1300 		}
   1301 		return (0);
   1302 
   1303 	case RAIDFRAME_CHECK_COPYBACK_STATUS:
   1304 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1305 			/* This makes no sense on a RAID 0 */
   1306 			return(EINVAL);
   1307 		}
   1308 		if (raidPtr->copyback_in_progress == 1) {
   1309 			*(int *) data = 100 * raidPtr->copyback_stripes_done /
   1310 				raidPtr->Layout.numStripe;
   1311 		} else {
   1312 			*(int *) data = 100;
   1313 		}
   1314 		return (0);
   1315 
   1316 
   1317 		/* the sparetable daemon calls this to wait for the kernel to
   1318 		 * need a spare table. this ioctl does not return until a
   1319 		 * spare table is needed. XXX -- calling mpsleep here in the
   1320 		 * ioctl code is almost certainly wrong and evil. -- XXX XXX
   1321 		 * -- I should either compute the spare table in the kernel,
   1322 		 * or have a different -- XXX XXX -- interface (a different
   1323 		 * character device) for delivering the table     -- XXX */
   1324 #if 0
   1325 	case RAIDFRAME_SPARET_WAIT:
   1326 		RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1327 		while (!rf_sparet_wait_queue)
   1328 			mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE);
   1329 		waitreq = rf_sparet_wait_queue;
   1330 		rf_sparet_wait_queue = rf_sparet_wait_queue->next;
   1331 		RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1332 
   1333 		/* structure assignment */
   1334 		*((RF_SparetWait_t *) data) = *waitreq;
   1335 
   1336 		RF_Free(waitreq, sizeof(*waitreq));
   1337 		return (0);
   1338 
   1339 		/* wakes up a process waiting on SPARET_WAIT and puts an error
   1340 		 * code in it that will cause the dameon to exit */
   1341 	case RAIDFRAME_ABORT_SPARET_WAIT:
   1342 		RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
   1343 		waitreq->fcol = -1;
   1344 		RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1345 		waitreq->next = rf_sparet_wait_queue;
   1346 		rf_sparet_wait_queue = waitreq;
   1347 		RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1348 		wakeup(&rf_sparet_wait_queue);
   1349 		return (0);
   1350 
   1351 		/* used by the spare table daemon to deliver a spare table
   1352 		 * into the kernel */
   1353 	case RAIDFRAME_SEND_SPARET:
   1354 
   1355 		/* install the spare table */
   1356 		retcode = rf_SetSpareTable(raidPtr, *(void **) data);
   1357 
   1358 		/* respond to the requestor.  the return status of the spare
   1359 		 * table installation is passed in the "fcol" field */
   1360 		RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
   1361 		waitreq->fcol = retcode;
   1362 		RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1363 		waitreq->next = rf_sparet_resp_queue;
   1364 		rf_sparet_resp_queue = waitreq;
   1365 		wakeup(&rf_sparet_resp_queue);
   1366 		RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1367 
   1368 		return (retcode);
   1369 #endif
   1370 
   1371 	default:
   1372 		break; /* fall through to the os-specific code below */
   1373 
   1374 	}
   1375 
   1376 	if (!raidPtr->valid)
   1377 		return (EINVAL);
   1378 
   1379 	/*
   1380 	 * Add support for "regular" device ioctls here.
   1381 	 */
   1382 
   1383 	switch (cmd) {
   1384 	case DIOCGDINFO:
   1385 		*(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
   1386 		break;
   1387 
   1388 	case DIOCGPART:
   1389 		((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
   1390 		((struct partinfo *) data)->part =
   1391 		    &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
   1392 		break;
   1393 
   1394 	case DIOCWDINFO:
   1395 	case DIOCSDINFO:
   1396 		if ((error = raidlock(rs)) != 0)
   1397 			return (error);
   1398 
   1399 		rs->sc_flags |= RAIDF_LABELLING;
   1400 
   1401 		error = setdisklabel(rs->sc_dkdev.dk_label,
   1402 		    (struct disklabel *) data, 0, rs->sc_dkdev.dk_cpulabel);
   1403 		if (error == 0) {
   1404 			if (cmd == DIOCWDINFO)
   1405 				error = writedisklabel(RAIDLABELDEV(dev),
   1406 				    raidstrategy, rs->sc_dkdev.dk_label,
   1407 				    rs->sc_dkdev.dk_cpulabel);
   1408 		}
   1409 		rs->sc_flags &= ~RAIDF_LABELLING;
   1410 
   1411 		raidunlock(rs);
   1412 
   1413 		if (error)
   1414 			return (error);
   1415 		break;
   1416 
   1417 	case DIOCWLABEL:
   1418 		if (*(int *) data != 0)
   1419 			rs->sc_flags |= RAIDF_WLABEL;
   1420 		else
   1421 			rs->sc_flags &= ~RAIDF_WLABEL;
   1422 		break;
   1423 
   1424 	case DIOCGDEFLABEL:
   1425 		raidgetdefaultlabel(raidPtr, rs,
   1426 		    (struct disklabel *) data);
   1427 		break;
   1428 
   1429 	default:
   1430 		retcode = ENOTTY;
   1431 	}
   1432 	return (retcode);
   1433 
   1434 }
   1435 
   1436 
   1437 /* raidinit -- complete the rest of the initialization for the
   1438    RAIDframe device.  */
   1439 
   1440 
   1441 static int
   1442 raidinit(dev, raidPtr, unit)
   1443 	dev_t   dev;
   1444 	RF_Raid_t *raidPtr;
   1445 	int     unit;
   1446 {
   1447 	int     retcode;
   1448 	struct raid_softc *rs;
   1449 
   1450 	retcode = 0;
   1451 
   1452 	rs = &raid_softc[unit];
   1453 	pool_init(&rs->sc_cbufpool, sizeof(struct raidbuf), 0,
   1454 		  0, 0, "raidpl", 0, NULL, NULL, M_RAIDFRAME);
   1455 
   1456 
   1457 	/* XXX should check return code first... */
   1458 	rs->sc_flags |= RAIDF_INITED;
   1459 
   1460 	sprintf(rs->sc_xname, "raid%d", unit);	/* XXX doesn't check bounds. */
   1461 
   1462 	rs->sc_dkdev.dk_name = rs->sc_xname;
   1463 
   1464 	/* disk_attach actually creates space for the CPU disklabel, among
   1465 	 * other things, so it's critical to call this *BEFORE* we try putzing
   1466 	 * with disklabels. */
   1467 
   1468 	disk_attach(&rs->sc_dkdev);
   1469 
   1470 	/* XXX There may be a weird interaction here between this, and
   1471 	 * protectedSectors, as used in RAIDframe.  */
   1472 
   1473 	rs->sc_size = raidPtr->totalSectors;
   1474 	rs->sc_dev = dev;
   1475 
   1476 	return (retcode);
   1477 }
   1478 
   1479 /* wake up the daemon & tell it to get us a spare table
   1480  * XXX
   1481  * the entries in the queues should be tagged with the raidPtr
   1482  * so that in the extremely rare case that two recons happen at once,
   1483  * we know for which device were requesting a spare table
   1484  * XXX
   1485  *
   1486  * XXX This code is not currently used. GO
   1487  */
   1488 int
   1489 rf_GetSpareTableFromDaemon(req)
   1490 	RF_SparetWait_t *req;
   1491 {
   1492 	int     retcode;
   1493 
   1494 	RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1495 	req->next = rf_sparet_wait_queue;
   1496 	rf_sparet_wait_queue = req;
   1497 	wakeup(&rf_sparet_wait_queue);
   1498 
   1499 	/* mpsleep unlocks the mutex */
   1500 	while (!rf_sparet_resp_queue) {
   1501 		tsleep(&rf_sparet_resp_queue, PRIBIO,
   1502 		    "raidframe getsparetable", 0);
   1503 	}
   1504 	req = rf_sparet_resp_queue;
   1505 	rf_sparet_resp_queue = req->next;
   1506 	RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1507 
   1508 	retcode = req->fcol;
   1509 	RF_Free(req, sizeof(*req));	/* this is not the same req as we
   1510 					 * alloc'd */
   1511 	return (retcode);
   1512 }
   1513 
   1514 /* a wrapper around rf_DoAccess that extracts appropriate info from the
   1515  * bp & passes it down.
   1516  * any calls originating in the kernel must use non-blocking I/O
   1517  * do some extra sanity checking to return "appropriate" error values for
   1518  * certain conditions (to make some standard utilities work)
   1519  *
   1520  * Formerly known as: rf_DoAccessKernel
   1521  */
   1522 void
   1523 raidstart(raidPtr)
   1524 	RF_Raid_t *raidPtr;
   1525 {
   1526 	RF_SectorCount_t num_blocks, pb, sum;
   1527 	RF_RaidAddr_t raid_addr;
   1528 	int     retcode;
   1529 	struct partition *pp;
   1530 	daddr_t blocknum;
   1531 	int     unit;
   1532 	struct raid_softc *rs;
   1533 	int     do_async;
   1534 	struct buf *bp;
   1535 
   1536 	unit = raidPtr->raidid;
   1537 	rs = &raid_softc[unit];
   1538 
   1539 	/* Check to see if we're at the limit... */
   1540 	RF_LOCK_MUTEX(raidPtr->mutex);
   1541 	while (raidPtr->openings > 0) {
   1542 		RF_UNLOCK_MUTEX(raidPtr->mutex);
   1543 
   1544 		/* get the next item, if any, from the queue */
   1545 		if ((bp = BUFQ_FIRST(&rs->buf_queue)) == NULL) {
   1546 			/* nothing more to do */
   1547 			return;
   1548 		}
   1549 		BUFQ_REMOVE(&rs->buf_queue, bp);
   1550 
   1551 		/* Ok, for the bp we have here, bp->b_blkno is relative to the
   1552 		 * partition.. Need to make it absolute to the underlying
   1553 		 * device.. */
   1554 
   1555 		blocknum = bp->b_blkno;
   1556 		if (DISKPART(bp->b_dev) != RAW_PART) {
   1557 			pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
   1558 			blocknum += pp->p_offset;
   1559 		}
   1560 
   1561 		db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
   1562 			    (int) blocknum));
   1563 
   1564 		db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
   1565 		db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
   1566 
   1567 		/* *THIS* is where we adjust what block we're going to...
   1568 		 * but DO NOT TOUCH bp->b_blkno!!! */
   1569 		raid_addr = blocknum;
   1570 
   1571 		num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
   1572 		pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
   1573 		sum = raid_addr + num_blocks + pb;
   1574 		if (1 || rf_debugKernelAccess) {
   1575 			db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
   1576 				    (int) raid_addr, (int) sum, (int) num_blocks,
   1577 				    (int) pb, (int) bp->b_resid));
   1578 		}
   1579 		if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
   1580 		    || (sum < num_blocks) || (sum < pb)) {
   1581 			bp->b_error = ENOSPC;
   1582 			bp->b_flags |= B_ERROR;
   1583 			bp->b_resid = bp->b_bcount;
   1584 			biodone(bp);
   1585 			RF_LOCK_MUTEX(raidPtr->mutex);
   1586 			continue;
   1587 		}
   1588 		/*
   1589 		 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
   1590 		 */
   1591 
   1592 		if (bp->b_bcount & raidPtr->sectorMask) {
   1593 			bp->b_error = EINVAL;
   1594 			bp->b_flags |= B_ERROR;
   1595 			bp->b_resid = bp->b_bcount;
   1596 			biodone(bp);
   1597 			RF_LOCK_MUTEX(raidPtr->mutex);
   1598 			continue;
   1599 
   1600 		}
   1601 		db1_printf(("Calling DoAccess..\n"));
   1602 
   1603 
   1604 		RF_LOCK_MUTEX(raidPtr->mutex);
   1605 		raidPtr->openings--;
   1606 		RF_UNLOCK_MUTEX(raidPtr->mutex);
   1607 
   1608 		/*
   1609 		 * Everything is async.
   1610 		 */
   1611 		do_async = 1;
   1612 
   1613 		/* don't ever condition on bp->b_flags & B_WRITE.
   1614 		 * always condition on B_READ instead */
   1615 
   1616 		/* XXX we're still at splbio() here... do we *really*
   1617 		   need to be? */
   1618 
   1619 
   1620 		retcode = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
   1621 				      RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
   1622 				      do_async, raid_addr, num_blocks,
   1623 				      bp->b_un.b_addr, bp, NULL, NULL,
   1624 				      RF_DAG_NONBLOCKING_IO, NULL, NULL, NULL);
   1625 
   1626 
   1627 		RF_LOCK_MUTEX(raidPtr->mutex);
   1628 	}
   1629 	RF_UNLOCK_MUTEX(raidPtr->mutex);
   1630 }
   1631 
   1632 
   1633 
   1634 
   1635 /* invoke an I/O from kernel mode.  Disk queue should be locked upon entry */
   1636 
   1637 int
   1638 rf_DispatchKernelIO(queue, req)
   1639 	RF_DiskQueue_t *queue;
   1640 	RF_DiskQueueData_t *req;
   1641 {
   1642 	int     op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
   1643 	struct buf *bp;
   1644 	struct raidbuf *raidbp = NULL;
   1645 	struct raid_softc *rs;
   1646 	int     unit;
   1647 	int s;
   1648 
   1649 	s=0;
   1650 	/* s = splbio();*/ /* want to test this */
   1651 	/* XXX along with the vnode, we also need the softc associated with
   1652 	 * this device.. */
   1653 
   1654 	req->queue = queue;
   1655 
   1656 	unit = queue->raidPtr->raidid;
   1657 
   1658 	db1_printf(("DispatchKernelIO unit: %d\n", unit));
   1659 
   1660 	if (unit >= numraid) {
   1661 		printf("Invalid unit number: %d %d\n", unit, numraid);
   1662 		panic("Invalid Unit number in rf_DispatchKernelIO\n");
   1663 	}
   1664 	rs = &raid_softc[unit];
   1665 
   1666 	/* XXX is this the right place? */
   1667 	disk_busy(&rs->sc_dkdev);
   1668 
   1669 	bp = req->bp;
   1670 #if 1
   1671 	/* XXX when there is a physical disk failure, someone is passing us a
   1672 	 * buffer that contains old stuff!!  Attempt to deal with this problem
   1673 	 * without taking a performance hit... (not sure where the real bug
   1674 	 * is.  It's buried in RAIDframe somewhere) :-(  GO ) */
   1675 
   1676 	if (bp->b_flags & B_ERROR) {
   1677 		bp->b_flags &= ~B_ERROR;
   1678 	}
   1679 	if (bp->b_error != 0) {
   1680 		bp->b_error = 0;
   1681 	}
   1682 #endif
   1683 	raidbp = RAIDGETBUF(rs);
   1684 
   1685 	raidbp->rf_flags = 0;	/* XXX not really used anywhere... */
   1686 
   1687 	/*
   1688 	 * context for raidiodone
   1689 	 */
   1690 	raidbp->rf_obp = bp;
   1691 	raidbp->req = req;
   1692 
   1693 	LIST_INIT(&raidbp->rf_buf.b_dep);
   1694 
   1695 	switch (req->type) {
   1696 	case RF_IO_TYPE_NOP:	/* used primarily to unlock a locked queue */
   1697 		/* XXX need to do something extra here.. */
   1698 		/* I'm leaving this in, as I've never actually seen it used,
   1699 		 * and I'd like folks to report it... GO */
   1700 		printf(("WAKEUP CALLED\n"));
   1701 		queue->numOutstanding++;
   1702 
   1703 		/* XXX need to glue the original buffer into this??  */
   1704 
   1705 		KernelWakeupFunc(&raidbp->rf_buf);
   1706 		break;
   1707 
   1708 	case RF_IO_TYPE_READ:
   1709 	case RF_IO_TYPE_WRITE:
   1710 
   1711 		if (req->tracerec) {
   1712 			RF_ETIMER_START(req->tracerec->timer);
   1713 		}
   1714 		InitBP(&raidbp->rf_buf, queue->rf_cinfo->ci_vp,
   1715 		    op | bp->b_flags, queue->rf_cinfo->ci_dev,
   1716 		    req->sectorOffset, req->numSector,
   1717 		    req->buf, KernelWakeupFunc, (void *) req,
   1718 		    queue->raidPtr->logBytesPerSector, req->b_proc);
   1719 
   1720 		if (rf_debugKernelAccess) {
   1721 			db1_printf(("dispatch: bp->b_blkno = %ld\n",
   1722 				(long) bp->b_blkno));
   1723 		}
   1724 		queue->numOutstanding++;
   1725 		queue->last_deq_sector = req->sectorOffset;
   1726 		/* acc wouldn't have been let in if there were any pending
   1727 		 * reqs at any other priority */
   1728 		queue->curPriority = req->priority;
   1729 
   1730 		db1_printf(("Going for %c to unit %d row %d col %d\n",
   1731 			req->type, unit, queue->row, queue->col));
   1732 		db1_printf(("sector %d count %d (%d bytes) %d\n",
   1733 			(int) req->sectorOffset, (int) req->numSector,
   1734 			(int) (req->numSector <<
   1735 			    queue->raidPtr->logBytesPerSector),
   1736 			(int) queue->raidPtr->logBytesPerSector));
   1737 		if ((raidbp->rf_buf.b_flags & B_READ) == 0) {
   1738 			raidbp->rf_buf.b_vp->v_numoutput++;
   1739 		}
   1740 		VOP_STRATEGY(&raidbp->rf_buf);
   1741 
   1742 		break;
   1743 
   1744 	default:
   1745 		panic("bad req->type in rf_DispatchKernelIO");
   1746 	}
   1747 	db1_printf(("Exiting from DispatchKernelIO\n"));
   1748 	/* splx(s); */ /* want to test this */
   1749 	return (0);
   1750 }
   1751 /* this is the callback function associated with a I/O invoked from
   1752    kernel code.
   1753  */
   1754 static void
   1755 KernelWakeupFunc(vbp)
   1756 	struct buf *vbp;
   1757 {
   1758 	RF_DiskQueueData_t *req = NULL;
   1759 	RF_DiskQueue_t *queue;
   1760 	struct raidbuf *raidbp = (struct raidbuf *) vbp;
   1761 	struct buf *bp;
   1762 	struct raid_softc *rs;
   1763 	int     unit;
   1764 	register int s;
   1765 
   1766 	s = splbio();
   1767 	db1_printf(("recovering the request queue:\n"));
   1768 	req = raidbp->req;
   1769 
   1770 	bp = raidbp->rf_obp;
   1771 
   1772 	queue = (RF_DiskQueue_t *) req->queue;
   1773 
   1774 	if (raidbp->rf_buf.b_flags & B_ERROR) {
   1775 		bp->b_flags |= B_ERROR;
   1776 		bp->b_error = raidbp->rf_buf.b_error ?
   1777 		    raidbp->rf_buf.b_error : EIO;
   1778 	}
   1779 
   1780 	/* XXX methinks this could be wrong... */
   1781 #if 1
   1782 	bp->b_resid = raidbp->rf_buf.b_resid;
   1783 #endif
   1784 
   1785 	if (req->tracerec) {
   1786 		RF_ETIMER_STOP(req->tracerec->timer);
   1787 		RF_ETIMER_EVAL(req->tracerec->timer);
   1788 		RF_LOCK_MUTEX(rf_tracing_mutex);
   1789 		req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
   1790 		req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
   1791 		req->tracerec->num_phys_ios++;
   1792 		RF_UNLOCK_MUTEX(rf_tracing_mutex);
   1793 	}
   1794 	bp->b_bcount = raidbp->rf_buf.b_bcount;	/* XXXX ?? */
   1795 
   1796 	unit = queue->raidPtr->raidid;	/* *Much* simpler :-> */
   1797 
   1798 
   1799 	/* XXX Ok, let's get aggressive... If B_ERROR is set, let's go
   1800 	 * ballistic, and mark the component as hosed... */
   1801 
   1802 	if (bp->b_flags & B_ERROR) {
   1803 		/* Mark the disk as dead */
   1804 		/* but only mark it once... */
   1805 		if (queue->raidPtr->Disks[queue->row][queue->col].status ==
   1806 		    rf_ds_optimal) {
   1807 			printf("raid%d: IO Error.  Marking %s as failed.\n",
   1808 			    unit, queue->raidPtr->Disks[queue->row][queue->col].devname);
   1809 			queue->raidPtr->Disks[queue->row][queue->col].status =
   1810 			    rf_ds_failed;
   1811 			queue->raidPtr->status[queue->row] = rf_rs_degraded;
   1812 			queue->raidPtr->numFailures++;
   1813 			/* XXX here we should bump the version number for each component, and write that data out */
   1814 		} else {	/* Disk is already dead... */
   1815 			/* printf("Disk already marked as dead!\n"); */
   1816 		}
   1817 
   1818 	}
   1819 
   1820 	rs = &raid_softc[unit];
   1821 	RAIDPUTBUF(rs, raidbp);
   1822 
   1823 
   1824 	if (bp->b_resid == 0) {
   1825 		/* XXX is this the right place for a disk_unbusy()??!??!?!? */
   1826 		disk_unbusy(&rs->sc_dkdev, (bp->b_bcount - bp->b_resid));
   1827 	}
   1828 
   1829 	rf_DiskIOComplete(queue, req, (bp->b_flags & B_ERROR) ? 1 : 0);
   1830 	(req->CompleteFunc) (req->argument, (bp->b_flags & B_ERROR) ? 1 : 0);
   1831 
   1832 	splx(s);
   1833 }
   1834 
   1835 
   1836 
   1837 /*
   1838  * initialize a buf structure for doing an I/O in the kernel.
   1839  */
   1840 static void
   1841 InitBP(
   1842     struct buf * bp,
   1843     struct vnode * b_vp,
   1844     unsigned rw_flag,
   1845     dev_t dev,
   1846     RF_SectorNum_t startSect,
   1847     RF_SectorCount_t numSect,
   1848     caddr_t buf,
   1849     void (*cbFunc) (struct buf *),
   1850     void *cbArg,
   1851     int logBytesPerSector,
   1852     struct proc * b_proc)
   1853 {
   1854 	/* bp->b_flags       = B_PHYS | rw_flag; */
   1855 	bp->b_flags = B_CALL | rw_flag;	/* XXX need B_PHYS here too??? */
   1856 	bp->b_bcount = numSect << logBytesPerSector;
   1857 	bp->b_bufsize = bp->b_bcount;
   1858 	bp->b_error = 0;
   1859 	bp->b_dev = dev;
   1860 	bp->b_un.b_addr = buf;
   1861 	bp->b_blkno = startSect;
   1862 	bp->b_resid = bp->b_bcount;	/* XXX is this right!??!?!! */
   1863 	if (bp->b_bcount == 0) {
   1864 		panic("bp->b_bcount is zero in InitBP!!\n");
   1865 	}
   1866 	bp->b_proc = b_proc;
   1867 	bp->b_iodone = cbFunc;
   1868 	bp->b_vp = b_vp;
   1869 
   1870 }
   1871 
   1872 static void
   1873 raidgetdefaultlabel(raidPtr, rs, lp)
   1874 	RF_Raid_t *raidPtr;
   1875 	struct raid_softc *rs;
   1876 	struct disklabel *lp;
   1877 {
   1878 	db1_printf(("Building a default label...\n"));
   1879 	bzero(lp, sizeof(*lp));
   1880 
   1881 	/* fabricate a label... */
   1882 	lp->d_secperunit = raidPtr->totalSectors;
   1883 	lp->d_secsize = raidPtr->bytesPerSector;
   1884 	lp->d_nsectors = raidPtr->Layout.dataSectorsPerStripe;
   1885 	lp->d_ntracks = 1;
   1886 	lp->d_ncylinders = raidPtr->totalSectors /
   1887 		(lp->d_nsectors * lp->d_ntracks);
   1888 	lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
   1889 
   1890 	strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
   1891 	lp->d_type = DTYPE_RAID;
   1892 	strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
   1893 	lp->d_rpm = 3600;
   1894 	lp->d_interleave = 1;
   1895 	lp->d_flags = 0;
   1896 
   1897 	lp->d_partitions[RAW_PART].p_offset = 0;
   1898 	lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors;
   1899 	lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
   1900 	lp->d_npartitions = RAW_PART + 1;
   1901 
   1902 	lp->d_magic = DISKMAGIC;
   1903 	lp->d_magic2 = DISKMAGIC;
   1904 	lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
   1905 
   1906 }
   1907 /*
   1908  * Read the disklabel from the raid device.  If one is not present, fake one
   1909  * up.
   1910  */
   1911 static void
   1912 raidgetdisklabel(dev)
   1913 	dev_t   dev;
   1914 {
   1915 	int     unit = raidunit(dev);
   1916 	struct raid_softc *rs = &raid_softc[unit];
   1917 	char   *errstring;
   1918 	struct disklabel *lp = rs->sc_dkdev.dk_label;
   1919 	struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel;
   1920 	RF_Raid_t *raidPtr;
   1921 
   1922 	db1_printf(("Getting the disklabel...\n"));
   1923 
   1924 	bzero(clp, sizeof(*clp));
   1925 
   1926 	raidPtr = raidPtrs[unit];
   1927 
   1928 	raidgetdefaultlabel(raidPtr, rs, lp);
   1929 
   1930 	/*
   1931 	 * Call the generic disklabel extraction routine.
   1932 	 */
   1933 	errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
   1934 	    rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
   1935 	if (errstring)
   1936 		raidmakedisklabel(rs);
   1937 	else {
   1938 		int     i;
   1939 		struct partition *pp;
   1940 
   1941 		/*
   1942 		 * Sanity check whether the found disklabel is valid.
   1943 		 *
   1944 		 * This is necessary since total size of the raid device
   1945 		 * may vary when an interleave is changed even though exactly
   1946 		 * same componets are used, and old disklabel may used
   1947 		 * if that is found.
   1948 		 */
   1949 		if (lp->d_secperunit != rs->sc_size)
   1950 			printf("WARNING: %s: "
   1951 			    "total sector size in disklabel (%d) != "
   1952 			    "the size of raid (%ld)\n", rs->sc_xname,
   1953 			    lp->d_secperunit, (long) rs->sc_size);
   1954 		for (i = 0; i < lp->d_npartitions; i++) {
   1955 			pp = &lp->d_partitions[i];
   1956 			if (pp->p_offset + pp->p_size > rs->sc_size)
   1957 				printf("WARNING: %s: end of partition `%c' "
   1958 				    "exceeds the size of raid (%ld)\n",
   1959 				    rs->sc_xname, 'a' + i, (long) rs->sc_size);
   1960 		}
   1961 	}
   1962 
   1963 }
   1964 /*
   1965  * Take care of things one might want to take care of in the event
   1966  * that a disklabel isn't present.
   1967  */
   1968 static void
   1969 raidmakedisklabel(rs)
   1970 	struct raid_softc *rs;
   1971 {
   1972 	struct disklabel *lp = rs->sc_dkdev.dk_label;
   1973 	db1_printf(("Making a label..\n"));
   1974 
   1975 	/*
   1976 	 * For historical reasons, if there's no disklabel present
   1977 	 * the raw partition must be marked FS_BSDFFS.
   1978 	 */
   1979 
   1980 	lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
   1981 
   1982 	strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
   1983 
   1984 	lp->d_checksum = dkcksum(lp);
   1985 }
   1986 /*
   1987  * Lookup the provided name in the filesystem.  If the file exists,
   1988  * is a valid block device, and isn't being used by anyone else,
   1989  * set *vpp to the file's vnode.
   1990  * You'll find the original of this in ccd.c
   1991  */
   1992 int
   1993 raidlookup(path, p, vpp)
   1994 	char   *path;
   1995 	struct proc *p;
   1996 	struct vnode **vpp;	/* result */
   1997 {
   1998 	struct nameidata nd;
   1999 	struct vnode *vp;
   2000 	struct vattr va;
   2001 	int     error;
   2002 
   2003 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, path, p);
   2004 	if ((error = vn_open(&nd, FREAD | FWRITE, 0)) != 0) {
   2005 #ifdef DEBUG
   2006 		printf("RAIDframe: vn_open returned %d\n", error);
   2007 #endif
   2008 		return (error);
   2009 	}
   2010 	vp = nd.ni_vp;
   2011 	if (vp->v_usecount > 1) {
   2012 		VOP_UNLOCK(vp, 0);
   2013 		(void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
   2014 		return (EBUSY);
   2015 	}
   2016 	if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)) != 0) {
   2017 		VOP_UNLOCK(vp, 0);
   2018 		(void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
   2019 		return (error);
   2020 	}
   2021 	/* XXX: eventually we should handle VREG, too. */
   2022 	if (va.va_type != VBLK) {
   2023 		VOP_UNLOCK(vp, 0);
   2024 		(void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
   2025 		return (ENOTBLK);
   2026 	}
   2027 	VOP_UNLOCK(vp, 0);
   2028 	*vpp = vp;
   2029 	return (0);
   2030 }
   2031 /*
   2032  * Wait interruptibly for an exclusive lock.
   2033  *
   2034  * XXX
   2035  * Several drivers do this; it should be abstracted and made MP-safe.
   2036  * (Hmm... where have we seen this warning before :->  GO )
   2037  */
   2038 static int
   2039 raidlock(rs)
   2040 	struct raid_softc *rs;
   2041 {
   2042 	int     error;
   2043 
   2044 	while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
   2045 		rs->sc_flags |= RAIDF_WANTED;
   2046 		if ((error =
   2047 			tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
   2048 			return (error);
   2049 	}
   2050 	rs->sc_flags |= RAIDF_LOCKED;
   2051 	return (0);
   2052 }
   2053 /*
   2054  * Unlock and wake up any waiters.
   2055  */
   2056 static void
   2057 raidunlock(rs)
   2058 	struct raid_softc *rs;
   2059 {
   2060 
   2061 	rs->sc_flags &= ~RAIDF_LOCKED;
   2062 	if ((rs->sc_flags & RAIDF_WANTED) != 0) {
   2063 		rs->sc_flags &= ~RAIDF_WANTED;
   2064 		wakeup(rs);
   2065 	}
   2066 }
   2067 
   2068 
   2069 #define RF_COMPONENT_INFO_OFFSET  16384 /* bytes */
   2070 #define RF_COMPONENT_INFO_SIZE     1024 /* bytes */
   2071 
   2072 int
   2073 raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter)
   2074 {
   2075 	RF_ComponentLabel_t clabel;
   2076 	raidread_component_label(dev, b_vp, &clabel);
   2077 	clabel.mod_counter = mod_counter;
   2078 	clabel.clean = RF_RAID_CLEAN;
   2079 	raidwrite_component_label(dev, b_vp, &clabel);
   2080 	return(0);
   2081 }
   2082 
   2083 
   2084 int
   2085 raidmarkdirty(dev_t dev, struct vnode *b_vp, int mod_counter)
   2086 {
   2087 	RF_ComponentLabel_t clabel;
   2088 	raidread_component_label(dev, b_vp, &clabel);
   2089 	clabel.mod_counter = mod_counter;
   2090 	clabel.clean = RF_RAID_DIRTY;
   2091 	raidwrite_component_label(dev, b_vp, &clabel);
   2092 	return(0);
   2093 }
   2094 
   2095 /* ARGSUSED */
   2096 int
   2097 raidread_component_label(dev, b_vp, clabel)
   2098 	dev_t dev;
   2099 	struct vnode *b_vp;
   2100 	RF_ComponentLabel_t *clabel;
   2101 {
   2102 	struct buf *bp;
   2103 	int error;
   2104 
   2105 	/* XXX should probably ensure that we don't try to do this if
   2106 	   someone has changed rf_protected_sectors. */
   2107 
   2108 	/* get a block of the appropriate size... */
   2109 	bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
   2110 	bp->b_dev = dev;
   2111 
   2112 	/* get our ducks in a row for the read */
   2113 	bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
   2114 	bp->b_bcount = RF_COMPONENT_INFO_SIZE;
   2115 	bp->b_flags = B_BUSY | B_READ;
   2116  	bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
   2117 
   2118 	(*bdevsw[major(bp->b_dev)].d_strategy)(bp);
   2119 
   2120 	error = biowait(bp);
   2121 
   2122 	if (!error) {
   2123 		memcpy(clabel, bp->b_un.b_addr,
   2124 		       sizeof(RF_ComponentLabel_t));
   2125 #if 0
   2126 		print_component_label( clabel );
   2127 #endif
   2128         } else {
   2129 #if 0
   2130 		printf("Failed to read RAID component label!\n");
   2131 #endif
   2132 	}
   2133 
   2134         bp->b_flags = B_INVAL | B_AGE;
   2135 	brelse(bp);
   2136 	return(error);
   2137 }
   2138 /* ARGSUSED */
   2139 int
   2140 raidwrite_component_label(dev, b_vp, clabel)
   2141 	dev_t dev;
   2142 	struct vnode *b_vp;
   2143 	RF_ComponentLabel_t *clabel;
   2144 {
   2145 	struct buf *bp;
   2146 	int error;
   2147 
   2148 	/* get a block of the appropriate size... */
   2149 	bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
   2150 	bp->b_dev = dev;
   2151 
   2152 	/* get our ducks in a row for the write */
   2153 	bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
   2154 	bp->b_bcount = RF_COMPONENT_INFO_SIZE;
   2155 	bp->b_flags = B_BUSY | B_WRITE;
   2156  	bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
   2157 
   2158 	memset( bp->b_un.b_addr, 0, RF_COMPONENT_INFO_SIZE );
   2159 
   2160 	memcpy( bp->b_un.b_addr, clabel, sizeof(RF_ComponentLabel_t));
   2161 
   2162 	(*bdevsw[major(bp->b_dev)].d_strategy)(bp);
   2163 	error = biowait(bp);
   2164         bp->b_flags = B_INVAL | B_AGE;
   2165 	brelse(bp);
   2166 	if (error) {
   2167 #if 1
   2168 		printf("Failed to write RAID component info!\n");
   2169 #endif
   2170 	}
   2171 
   2172 	return(error);
   2173 }
   2174 
   2175 void
   2176 rf_markalldirty( raidPtr )
   2177 	RF_Raid_t *raidPtr;
   2178 {
   2179 	RF_ComponentLabel_t clabel;
   2180 	int r,c;
   2181 
   2182 	raidPtr->mod_counter++;
   2183 	for (r = 0; r < raidPtr->numRow; r++) {
   2184 		for (c = 0; c < raidPtr->numCol; c++) {
   2185 			if (raidPtr->Disks[r][c].status != rf_ds_failed) {
   2186 				raidread_component_label(
   2187 					raidPtr->Disks[r][c].dev,
   2188 					raidPtr->raid_cinfo[r][c].ci_vp,
   2189 					&clabel);
   2190 				if (clabel.status == rf_ds_spared) {
   2191 					/* XXX do something special...
   2192 					 but whatever you do, don't
   2193 					 try to access it!! */
   2194 				} else {
   2195 #if 0
   2196 				clabel.status =
   2197 					raidPtr->Disks[r][c].status;
   2198 				raidwrite_component_label(
   2199 					raidPtr->Disks[r][c].dev,
   2200 					raidPtr->raid_cinfo[r][c].ci_vp,
   2201 					&clabel);
   2202 #endif
   2203 				raidmarkdirty(
   2204 				       raidPtr->Disks[r][c].dev,
   2205 				       raidPtr->raid_cinfo[r][c].ci_vp,
   2206 				       raidPtr->mod_counter);
   2207 				}
   2208 			}
   2209 		}
   2210 	}
   2211 	/* printf("Component labels marked dirty.\n"); */
   2212 #if 0
   2213 	for( c = 0; c < raidPtr->numSpare ; c++) {
   2214 		sparecol = raidPtr->numCol + c;
   2215 		if (raidPtr->Disks[r][sparecol].status == rf_ds_used_spare) {
   2216 			/*
   2217 
   2218 			   XXX this is where we get fancy and map this spare
   2219 			   into it's correct spot in the array.
   2220 
   2221 			 */
   2222 			/*
   2223 
   2224 			   we claim this disk is "optimal" if it's
   2225 			   rf_ds_used_spare, as that means it should be
   2226 			   directly substitutable for the disk it replaced.
   2227 			   We note that too...
   2228 
   2229 			 */
   2230 
   2231 			for(i=0;i<raidPtr->numRow;i++) {
   2232 				for(j=0;j<raidPtr->numCol;j++) {
   2233 					if ((raidPtr->Disks[i][j].spareRow ==
   2234 					     r) &&
   2235 					    (raidPtr->Disks[i][j].spareCol ==
   2236 					     sparecol)) {
   2237 						srow = r;
   2238 						scol = sparecol;
   2239 						break;
   2240 					}
   2241 				}
   2242 			}
   2243 
   2244 			raidread_component_label(
   2245 				      raidPtr->Disks[r][sparecol].dev,
   2246 				      raidPtr->raid_cinfo[r][sparecol].ci_vp,
   2247 				      &clabel);
   2248 			/* make sure status is noted */
   2249 			clabel.version = RF_COMPONENT_LABEL_VERSION;
   2250 			clabel.mod_counter = raidPtr->mod_counter;
   2251 			clabel.serial_number = raidPtr->serial_number;
   2252 			clabel.row = srow;
   2253 			clabel.column = scol;
   2254 			clabel.num_rows = raidPtr->numRow;
   2255 			clabel.num_columns = raidPtr->numCol;
   2256 			clabel.clean = RF_RAID_DIRTY; /* changed in a bit*/
   2257 			clabel.status = rf_ds_optimal;
   2258 			raidwrite_component_label(
   2259 				      raidPtr->Disks[r][sparecol].dev,
   2260 				      raidPtr->raid_cinfo[r][sparecol].ci_vp,
   2261 				      &clabel);
   2262 			raidmarkclean( raidPtr->Disks[r][sparecol].dev,
   2263 			              raidPtr->raid_cinfo[r][sparecol].ci_vp);
   2264 		}
   2265 	}
   2266 
   2267 #endif
   2268 }
   2269 
   2270 
   2271 void
   2272 rf_update_component_labels( raidPtr )
   2273 	RF_Raid_t *raidPtr;
   2274 {
   2275 	RF_ComponentLabel_t clabel;
   2276 	int sparecol;
   2277 	int r,c;
   2278 	int i,j;
   2279 	int srow, scol;
   2280 
   2281 	srow = -1;
   2282 	scol = -1;
   2283 
   2284 	/* XXX should do extra checks to make sure things really are clean,
   2285 	   rather than blindly setting the clean bit... */
   2286 
   2287 	raidPtr->mod_counter++;
   2288 
   2289 	for (r = 0; r < raidPtr->numRow; r++) {
   2290 		for (c = 0; c < raidPtr->numCol; c++) {
   2291 			if (raidPtr->Disks[r][c].status == rf_ds_optimal) {
   2292 				raidread_component_label(
   2293 					raidPtr->Disks[r][c].dev,
   2294 					raidPtr->raid_cinfo[r][c].ci_vp,
   2295 					&clabel);
   2296 				/* make sure status is noted */
   2297 				clabel.status = rf_ds_optimal;
   2298 				raidwrite_component_label(
   2299 					raidPtr->Disks[r][c].dev,
   2300 					raidPtr->raid_cinfo[r][c].ci_vp,
   2301 					&clabel);
   2302 				if (raidPtr->parity_good == RF_RAID_CLEAN) {
   2303 					raidmarkclean(
   2304 					      raidPtr->Disks[r][c].dev,
   2305 					      raidPtr->raid_cinfo[r][c].ci_vp,
   2306 					      raidPtr->mod_counter);
   2307 				}
   2308 			}
   2309 			/* else we don't touch it.. */
   2310 #if 0
   2311 			else if (raidPtr->Disks[r][c].status !=
   2312 				   rf_ds_failed) {
   2313 				raidread_component_label(
   2314 					raidPtr->Disks[r][c].dev,
   2315 					raidPtr->raid_cinfo[r][c].ci_vp,
   2316 					&clabel);
   2317 				/* make sure status is noted */
   2318 				clabel.status =
   2319 					raidPtr->Disks[r][c].status;
   2320 				raidwrite_component_label(
   2321 					raidPtr->Disks[r][c].dev,
   2322 					raidPtr->raid_cinfo[r][c].ci_vp,
   2323 					&clabel);
   2324 				if (raidPtr->parity_good == RF_RAID_CLEAN) {
   2325 					raidmarkclean(
   2326 					      raidPtr->Disks[r][c].dev,
   2327 					      raidPtr->raid_cinfo[r][c].ci_vp,
   2328 					      raidPtr->mod_counter);
   2329 				}
   2330 			}
   2331 #endif
   2332 		}
   2333 	}
   2334 
   2335 	for( c = 0; c < raidPtr->numSpare ; c++) {
   2336 		sparecol = raidPtr->numCol + c;
   2337 		if (raidPtr->Disks[0][sparecol].status == rf_ds_used_spare) {
   2338 			/*
   2339 
   2340 			   we claim this disk is "optimal" if it's
   2341 			   rf_ds_used_spare, as that means it should be
   2342 			   directly substitutable for the disk it replaced.
   2343 			   We note that too...
   2344 
   2345 			 */
   2346 
   2347 			for(i=0;i<raidPtr->numRow;i++) {
   2348 				for(j=0;j<raidPtr->numCol;j++) {
   2349 					if ((raidPtr->Disks[i][j].spareRow ==
   2350 					     0) &&
   2351 					    (raidPtr->Disks[i][j].spareCol ==
   2352 					     sparecol)) {
   2353 						srow = i;
   2354 						scol = j;
   2355 						break;
   2356 					}
   2357 				}
   2358 			}
   2359 
   2360 			raidread_component_label(
   2361 				      raidPtr->Disks[0][sparecol].dev,
   2362 				      raidPtr->raid_cinfo[0][sparecol].ci_vp,
   2363 				      &clabel);
   2364 			/* make sure status is noted */
   2365 			clabel.version = RF_COMPONENT_LABEL_VERSION;
   2366 			clabel.mod_counter = raidPtr->mod_counter;
   2367 			clabel.serial_number = raidPtr->serial_number;
   2368 			clabel.row = srow;
   2369 			clabel.column = scol;
   2370 			clabel.num_rows = raidPtr->numRow;
   2371 			clabel.num_columns = raidPtr->numCol;
   2372 			clabel.clean = RF_RAID_DIRTY; /* changed in a bit*/
   2373 			clabel.status = rf_ds_optimal;
   2374 			raidwrite_component_label(
   2375 				      raidPtr->Disks[0][sparecol].dev,
   2376 				      raidPtr->raid_cinfo[0][sparecol].ci_vp,
   2377 				      &clabel);
   2378 			if (raidPtr->parity_good == RF_RAID_CLEAN) {
   2379 				raidmarkclean( raidPtr->Disks[0][sparecol].dev,
   2380 			              raidPtr->raid_cinfo[0][sparecol].ci_vp,
   2381 					       raidPtr->mod_counter);
   2382 			}
   2383 		}
   2384 	}
   2385 	/* 	printf("Component labels updated\n"); */
   2386 }
   2387 
   2388 void
   2389 rf_ReconThread(req)
   2390 	struct rf_recon_req *req;
   2391 {
   2392 	int     s;
   2393 	RF_Raid_t *raidPtr;
   2394 
   2395 	s = splbio();
   2396 	raidPtr = (RF_Raid_t *) req->raidPtr;
   2397 	raidPtr->recon_in_progress = 1;
   2398 
   2399 	rf_FailDisk((RF_Raid_t *) req->raidPtr, req->row, req->col,
   2400 		    ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
   2401 
   2402 	/* XXX get rid of this! we don't need it at all.. */
   2403 	RF_Free(req, sizeof(*req));
   2404 
   2405 	raidPtr->recon_in_progress = 0;
   2406 	splx(s);
   2407 
   2408 	/* That's all... */
   2409 	kthread_exit(0);        /* does not return */
   2410 }
   2411 
   2412 void
   2413 rf_RewriteParityThread(raidPtr)
   2414 	RF_Raid_t *raidPtr;
   2415 {
   2416 	int retcode;
   2417 	int s;
   2418 
   2419 	raidPtr->parity_rewrite_in_progress = 1;
   2420 	s = splbio();
   2421 	retcode = rf_RewriteParity(raidPtr);
   2422 	splx(s);
   2423 	if (retcode) {
   2424 		printf("raid%d: Error re-writing parity!\n",raidPtr->raidid);
   2425 	} else {
   2426 		/* set the clean bit!  If we shutdown correctly,
   2427 		   the clean bit on each component label will get
   2428 		   set */
   2429 		raidPtr->parity_good = RF_RAID_CLEAN;
   2430 	}
   2431 	raidPtr->parity_rewrite_in_progress = 0;
   2432 
   2433 	/* That's all... */
   2434 	kthread_exit(0);        /* does not return */
   2435 }
   2436 
   2437 
   2438 void
   2439 rf_CopybackThread(raidPtr)
   2440 	RF_Raid_t *raidPtr;
   2441 {
   2442 	int s;
   2443 
   2444 	raidPtr->copyback_in_progress = 1;
   2445 	s = splbio();
   2446 	rf_CopybackReconstructedData(raidPtr);
   2447 	splx(s);
   2448 	raidPtr->copyback_in_progress = 0;
   2449 
   2450 	/* That's all... */
   2451 	kthread_exit(0);        /* does not return */
   2452 }
   2453 
   2454 
   2455 void
   2456 rf_ReconstructInPlaceThread(req)
   2457 	struct rf_recon_req *req;
   2458 {
   2459 	int retcode;
   2460 	int s;
   2461 	RF_Raid_t *raidPtr;
   2462 
   2463 	s = splbio();
   2464 	raidPtr = req->raidPtr;
   2465 	raidPtr->recon_in_progress = 1;
   2466 	retcode = rf_ReconstructInPlace(raidPtr, req->row, req->col);
   2467 	RF_Free(req, sizeof(*req));
   2468 	raidPtr->recon_in_progress = 0;
   2469 	splx(s);
   2470 
   2471 	/* That's all... */
   2472 	kthread_exit(0);        /* does not return */
   2473 }
   2474 
   2475 void
   2476 rf_mountroot_hook(dev)
   2477 	struct device *dev;
   2478 {
   2479 #if 1
   2480 	printf("rf_mountroot_hook called for %s\n",dev->dv_xname);
   2481 #endif
   2482 	if (boothowto & RB_ASKNAME) {
   2483 		/* We don't auto-config... */
   2484 	} else {
   2485 		/* They didn't ask, and we found something bootable... */
   2486 		/* XXX pretend for now.. */
   2487 if (raidautoconfig) {
   2488 		rootspec = raid_rooty;
   2489 }
   2490 	}
   2491 }
   2492 
   2493 
   2494 RF_AutoConfig_t *
   2495 rf_find_raid_components()
   2496 {
   2497 	struct devnametobdevmaj *dtobdm;
   2498 	struct vnode *vp;
   2499 	struct disklabel label;
   2500 	struct device *dv;
   2501 	char *cd_name;
   2502 	dev_t dev;
   2503 	int error;
   2504 	int i;
   2505 	int good_one;
   2506 	RF_ComponentLabel_t *clabel;
   2507 	RF_AutoConfig_t *ac_list;
   2508 	RF_AutoConfig_t *ac;
   2509 
   2510 
   2511 	/* initialize the AutoConfig list */
   2512 	ac_list = NULL;
   2513 
   2514 if (raidautoconfig) {
   2515 
   2516 	/* we begin by trolling through *all* the devices on the system */
   2517 
   2518 	for (dv = alldevs.tqh_first; dv != NULL;
   2519 	     dv = dv->dv_list.tqe_next) {
   2520 
   2521 		/* we are only interested in disks... */
   2522 		if (dv->dv_class != DV_DISK)
   2523 			continue;
   2524 
   2525 		/* we don't care about floppies... */
   2526 		if (!strcmp(dv->dv_cfdata->cf_driver->cd_name,"fd")) {
   2527 			continue;
   2528 		}
   2529 
   2530 		/* need to find the device_name_to_block_device_major stuff */
   2531 		cd_name = dv->dv_cfdata->cf_driver->cd_name;
   2532 		dtobdm = dev_name2blk;
   2533 		while (dtobdm->d_name && strcmp(dtobdm->d_name, cd_name)) {
   2534 			dtobdm++;
   2535 		}
   2536 
   2537 		/* get a vnode for the raw partition of this disk */
   2538 
   2539 		dev = MAKEDISKDEV(dtobdm->d_maj, dv->dv_unit, RAW_PART);
   2540 		if (bdevvp(dev, &vp))
   2541 			panic("RAID can't alloc vnode");
   2542 
   2543 		error = VOP_OPEN(vp, FREAD, NOCRED, 0);
   2544 
   2545 		if (error) {
   2546 			/* "Who cares."  Continue looking
   2547 			   for something that exists*/
   2548 			vput(vp);
   2549 			continue;
   2550 		}
   2551 
   2552 		/* Ok, the disk exists.  Go get the disklabel. */
   2553 		error = VOP_IOCTL(vp, DIOCGDINFO, (caddr_t)&label,
   2554 				  FREAD, NOCRED, 0);
   2555 		if (error) {
   2556 			/*
   2557 			 * XXX can't happen - open() would
   2558 			 * have errored out (or faked up one)
   2559 			 */
   2560 			printf("can't get label for dev %s%c (%d)!?!?\n",
   2561 			       dv->dv_xname, 'a' + RAW_PART, error);
   2562 		}
   2563 
   2564 		/* don't need this any more.  We'll allocate it again
   2565 		   a little later if we really do... */
   2566 		VOP_CLOSE(vp, FREAD, NOCRED, 0);
   2567 		vput(vp);
   2568 
   2569 		for (i=0; i < label.d_npartitions; i++) {
   2570 			/* We only support partitions marked as RAID */
   2571 			if (label.d_partitions[i].p_fstype != FS_RAID)
   2572 				continue;
   2573 
   2574 			dev = MAKEDISKDEV(dtobdm->d_maj, dv->dv_unit, i);
   2575 			if (bdevvp(dev, &vp))
   2576 				panic("RAID can't alloc vnode");
   2577 
   2578 			error = VOP_OPEN(vp, FREAD, NOCRED, 0);
   2579 			if (error) {
   2580 				/* Whatever... */
   2581 				vput(vp);
   2582 				continue;
   2583 			}
   2584 
   2585 			good_one = 0;
   2586 
   2587 			clabel = (RF_ComponentLabel_t *)
   2588 				malloc(sizeof(RF_ComponentLabel_t),
   2589 				       M_RAIDFRAME, M_NOWAIT);
   2590 			if (clabel == NULL) {
   2591 				/* XXX CLEANUP HERE */
   2592 				printf("RAID auto config: out of memory!\n");
   2593 				return(NULL); /* XXX probably should panic? */
   2594 			}
   2595 
   2596 			if (!raidread_component_label(dev, vp, clabel)) {
   2597 				/* Got the label.  Does it look reasonable? */
   2598 				if (rf_reasonable_label(clabel) &&
   2599 				    (clabel->partitionSize ==
   2600 				     label.d_partitions[i].p_size)) {
   2601 #if DEBUG
   2602 					printf("Component on: %s%c: %d\n",
   2603 					       dv->dv_xname, 'a'+i,
   2604 					       label.d_partitions[i].p_size);
   2605 					print_component_label(clabel);
   2606 #endif
   2607 					/* if it's reasonable, add it,
   2608 					   else ignore it. */
   2609 					ac = (RF_AutoConfig_t *)
   2610 						malloc(sizeof(RF_AutoConfig_t),
   2611 						       M_RAIDFRAME,
   2612 						       M_NOWAIT);
   2613 					if (ac == NULL) {
   2614 						/* XXX should panic?? */
   2615 						return(NULL);
   2616 					}
   2617 
   2618 					sprintf(ac->devname, "%s%c",
   2619 						dv->dv_xname, 'a'+i);
   2620 					ac->dev = dev;
   2621 					ac->vp = vp;
   2622 					ac->clabel = clabel;
   2623 					ac->next = ac_list;
   2624 					ac_list = ac;
   2625 					good_one = 1;
   2626 				}
   2627 			}
   2628 			if (!good_one) {
   2629 				/* cleanup */
   2630 				free(clabel, M_RAIDFRAME);
   2631 				VOP_CLOSE(vp, FREAD, NOCRED, 0);
   2632 				vput(vp);
   2633 			}
   2634 		}
   2635 	}
   2636 }
   2637 return(ac_list);
   2638 }
   2639 
   2640 static int
   2641 rf_reasonable_label(clabel)
   2642 	RF_ComponentLabel_t *clabel;
   2643 {
   2644 
   2645 	if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) ||
   2646 	     (clabel->version==RF_COMPONENT_LABEL_VERSION)) &&
   2647 	    ((clabel->clean == RF_RAID_CLEAN) ||
   2648 	     (clabel->clean == RF_RAID_DIRTY)) &&
   2649 	    clabel->row >=0 &&
   2650 	    clabel->column >= 0 &&
   2651 	    clabel->num_rows > 0 &&
   2652 	    clabel->num_columns > 0 &&
   2653 	    clabel->row < clabel->num_rows &&
   2654 	    clabel->column < clabel->num_columns &&
   2655 	    clabel->blockSize > 0 &&
   2656 	    clabel->numBlocks > 0) {
   2657 		/* label looks reasonable enough... */
   2658 		return(1);
   2659 	}
   2660 	return(0);
   2661 }
   2662 
   2663 
   2664 void
   2665 print_component_label(clabel)
   2666 	RF_ComponentLabel_t *clabel;
   2667 {
   2668 	printf("   Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
   2669 	       clabel->row, clabel->column,
   2670 	       clabel->num_rows, clabel->num_columns);
   2671 	printf("   Version: %d Serial Number: %d Mod Counter: %d\n",
   2672 	       clabel->version, clabel->serial_number,
   2673 	       clabel->mod_counter);
   2674 	printf("   Clean: %s Status: %d\n",
   2675 	       clabel->clean ? "Yes" : "No", clabel->status );
   2676 	printf("   sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
   2677 	       clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
   2678 	printf("   RAID Level: %c  blocksize: %d numBlocks: %d\n",
   2679 	       (char) clabel->parityConfig, clabel->blockSize,
   2680 	       clabel->numBlocks);
   2681 	printf("   Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No" );
   2682 	printf("   Last configured as: raid%d\n", clabel->last_unit );
   2683 #if 0
   2684 	   printf("   Config order: %d\n", clabel->config_order);
   2685 #endif
   2686 
   2687 }
   2688 
   2689 RF_ConfigSet_t *
   2690 rf_create_auto_sets(ac_list)
   2691 	RF_AutoConfig_t *ac_list;
   2692 {
   2693 	RF_AutoConfig_t *ac;
   2694 	RF_ConfigSet_t *config_sets;
   2695 	RF_ConfigSet_t *cset;
   2696 	RF_AutoConfig_t *ac_next;
   2697 
   2698 
   2699 	config_sets = NULL;
   2700 
   2701 	/* Go through the AutoConfig list, and figure out which components
   2702 	   belong to what sets.  */
   2703 	ac = ac_list;
   2704 	while(ac!=NULL) {
   2705 		/* we're going to putz with ac->next, so save it here
   2706 		   for use at the end of the loop */
   2707 		ac_next = ac->next;
   2708 
   2709 		if (config_sets == NULL) {
   2710 			/* will need at least this one... */
   2711 			config_sets = (RF_ConfigSet_t *)
   2712 				malloc(sizeof(RF_ConfigSet_t),
   2713 				       M_RAIDFRAME, M_NOWAIT);
   2714 			if (config_sets == NULL) {
   2715 				panic("rf_create_auto_sets: No memory!\n");
   2716 			}
   2717 			/* this one is easy :) */
   2718 			config_sets->ac = ac;
   2719 			config_sets->next = NULL;
   2720 			config_sets->rootable = 0;
   2721 			ac->next = NULL;
   2722 		} else {
   2723 			/* which set does this component fit into? */
   2724 			cset = config_sets;
   2725 			while(cset!=NULL) {
   2726 				if (rf_does_it_fit(cset, ac)) {
   2727 					/* looks like it matches */
   2728 					ac->next = cset->ac;
   2729 					cset->ac = ac;
   2730 					break;
   2731 				}
   2732 				cset = cset->next;
   2733 			}
   2734 			if (cset==NULL) {
   2735 				/* didn't find a match above... new set..*/
   2736 				cset = (RF_ConfigSet_t *)
   2737 					malloc(sizeof(RF_ConfigSet_t),
   2738 					       M_RAIDFRAME, M_NOWAIT);
   2739 				if (cset == NULL) {
   2740 					panic("rf_create_auto_sets: No memory!\n");
   2741 				}
   2742 				cset->ac = ac;
   2743 				ac->next = NULL;
   2744 				cset->next = config_sets;
   2745 				cset->rootable = 0;
   2746 				config_sets = cset;
   2747 			}
   2748 		}
   2749 		ac = ac_next;
   2750 	}
   2751 
   2752 
   2753 	return(config_sets);
   2754 }
   2755 
   2756 static int
   2757 rf_does_it_fit(cset, ac)
   2758 	RF_ConfigSet_t *cset;
   2759 	RF_AutoConfig_t *ac;
   2760 {
   2761 	RF_ComponentLabel_t *clabel1, *clabel2;
   2762 
   2763 	/* If this one matches the *first* one in the set, that's good
   2764 	   enough, since the other members of the set would have been
   2765 	   through here too... */
   2766 
   2767 	clabel1 = cset->ac->clabel;
   2768 	clabel2 = ac->clabel;
   2769 	if ((clabel1->version == clabel2->version) &&
   2770 	    (clabel1->serial_number == clabel2->serial_number) &&
   2771 	    (clabel1->mod_counter == clabel2->mod_counter) &&
   2772 	    (clabel1->num_rows == clabel2->num_rows) &&
   2773 	    (clabel1->num_columns == clabel2->num_columns) &&
   2774 	    (clabel1->sectPerSU == clabel2->sectPerSU) &&
   2775 	    (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
   2776 	    (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
   2777 	    (clabel1->parityConfig == clabel2->parityConfig) &&
   2778 	    (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
   2779 	    (clabel1->blockSize == clabel2->blockSize) &&
   2780 	    (clabel1->numBlocks == clabel2->numBlocks) &&
   2781 	    (clabel1->autoconfigure == clabel2->autoconfigure) &&
   2782 	    (clabel1->root_partition == clabel2->root_partition) &&
   2783 	    (clabel1->last_unit == clabel2->last_unit) &&
   2784 	    (clabel1->config_order == clabel2->config_order)) {
   2785 		/* if it get's here, it almost *has* to be a match */
   2786 	} else {
   2787 		/* it's not consistent with somebody in the set..
   2788 		   punt */
   2789 		return(0);
   2790 	}
   2791 	/* all was fine.. it must fit... */
   2792 	return(1);
   2793 }
   2794 
   2795 int
   2796 rf_have_enough_components(cset)
   2797 	RF_ConfigSet_t *cset;
   2798 {
   2799 	RF_AutoConfig_t *ac;
   2800 	RF_AutoConfig_t *auto_config;
   2801 	RF_ComponentLabel_t *clabel;
   2802 	int r,c;
   2803 	int num_rows;
   2804 	int num_cols;
   2805 	int num_missing;
   2806 
   2807 	/* check to see that we have enough 'live' components
   2808 	   of this set.  If so, we can configure it if necessary */
   2809 
   2810 	num_rows = cset->ac->clabel->num_rows;
   2811 	num_cols = cset->ac->clabel->num_columns;
   2812 
   2813 	/* XXX Check for duplicate components!?!?!? */
   2814 
   2815 	num_missing = 0;
   2816 	auto_config = cset->ac;
   2817 
   2818 	for(r=0; r<num_rows; r++) {
   2819 		for(c=0; c<num_cols; c++) {
   2820 			ac = auto_config;
   2821 			while(ac!=NULL) {
   2822 				if (ac->clabel==NULL) {
   2823 					/* big-time bad news. */
   2824 					goto fail;
   2825 				}
   2826 				if ((ac->clabel->row == r) &&
   2827 				    (ac->clabel->column == c)) {
   2828 					/* it's this one... */
   2829 #if DEBUG
   2830 					printf("Found: %s at %d,%d\n",
   2831 					       ac->devname,r,c);
   2832 #endif
   2833 					break;
   2834 				}
   2835 				ac=ac->next;
   2836 			}
   2837 			if (ac==NULL) {
   2838 				/* Didn't find one here! */
   2839 				num_missing++;
   2840 			}
   2841 		}
   2842 	}
   2843 
   2844 	clabel = cset->ac->clabel;
   2845 
   2846 	if (((clabel->parityConfig == '0') && (num_missing > 0)) ||
   2847 	    ((clabel->parityConfig == '1') && (num_missing > 1)) ||
   2848 	    ((clabel->parityConfig == '4') && (num_missing > 1)) ||
   2849 	    ((clabel->parityConfig == '5') && (num_missing > 1))) {
   2850 		/* XXX this needs to be made *much* more general */
   2851 		/* Too many failures */
   2852 		return(0);
   2853 	}
   2854 	/* otherwise, all is well, and we've got enough to take a kick
   2855 	   at autoconfiguring this set */
   2856 	return(1);
   2857 fail:
   2858 	return(0);
   2859 
   2860 }
   2861 
   2862 void
   2863 rf_create_configuration(ac,config,raidPtr)
   2864 	RF_AutoConfig_t *ac;
   2865 	RF_Config_t *config;
   2866 	RF_Raid_t *raidPtr;
   2867 {
   2868 	RF_ComponentLabel_t *clabel;
   2869 
   2870 	clabel = ac->clabel;
   2871 
   2872 	/* 1. Fill in the common stuff */
   2873 	config->numRow = clabel->num_rows;
   2874 	config->numCol = clabel->num_columns;
   2875 	config->numSpare = 0; /* XXX should this be set here? */
   2876 	config->sectPerSU = clabel->sectPerSU;
   2877 	config->SUsPerPU = clabel->SUsPerPU;
   2878 	config->SUsPerRU = clabel->SUsPerRU;
   2879 	config->parityConfig = clabel->parityConfig;
   2880 	/* XXX... */
   2881 	strcpy(config->diskQueueType,"fifo");
   2882 	config->maxOutstandingDiskReqs = clabel->maxOutstanding;
   2883 	config->layoutSpecificSize = 0; /* XXX ?? */
   2884 
   2885 	while(ac!=NULL) {
   2886 		/* row/col values will be in range due to the checks
   2887 		   in reasonable_label() */
   2888 		strcpy(config->devnames[ac->clabel->row][ac->clabel->column],
   2889 		       ac->devname);
   2890 		ac = ac->next;
   2891 	}
   2892 
   2893 }
   2894 
   2895 int
   2896 rf_set_autoconfig(raidPtr, new_value)
   2897 	RF_Raid_t *raidPtr;
   2898 	int new_value;
   2899 {
   2900 	RF_ComponentLabel_t clabel;
   2901 	struct vnode *vp;
   2902 	dev_t dev;
   2903 	int row, column;
   2904 
   2905 	for(row=0; row<raidPtr->numRow; row++) {
   2906 		for(column=0; column<raidPtr->numCol; column++) {
   2907 			dev = raidPtr->Disks[row][column].dev;
   2908 			vp = raidPtr->raid_cinfo[row][column].ci_vp;
   2909 			raidread_component_label(dev, vp, &clabel);
   2910 			clabel.autoconfigure = new_value;
   2911 			raidwrite_component_label(dev, vp, &clabel);
   2912 		}
   2913 	}
   2914 	return(new_value);
   2915 }
   2916 
   2917 int
   2918 rf_set_rootpartition(raidPtr, new_value)
   2919 	RF_Raid_t *raidPtr;
   2920 	int new_value;
   2921 {
   2922 	RF_ComponentLabel_t clabel;
   2923 	struct vnode *vp;
   2924 	dev_t dev;
   2925 	int row, column;
   2926 
   2927 	for(row=0; row<raidPtr->numRow; row++) {
   2928 		for(column=0; column<raidPtr->numCol; column++) {
   2929 			dev = raidPtr->Disks[row][column].dev;
   2930 			vp = raidPtr->raid_cinfo[row][column].ci_vp;
   2931 			raidread_component_label(dev, vp, &clabel);
   2932 			clabel.root_partition = new_value;
   2933 			raidwrite_component_label(dev, vp, &clabel);
   2934 		}
   2935 	}
   2936 	return(new_value);
   2937 }
   2938 
   2939 void
   2940 rf_release_all_vps(cset)
   2941 	RF_ConfigSet_t *cset;
   2942 {
   2943 	RF_AutoConfig_t *ac;
   2944 
   2945 	ac = cset->ac;
   2946 	while(ac!=NULL) {
   2947 		/* Close the vp, and give it back */
   2948 		if (ac->vp) {
   2949 			VOP_CLOSE(ac->vp, FREAD, NOCRED, 0);
   2950 			vput(ac->vp);
   2951 		}
   2952 		ac = ac->next;
   2953 	}
   2954 }
   2955 
   2956 
   2957 void
   2958 rf_cleanup_config_set(cset)
   2959 	RF_ConfigSet_t *cset;
   2960 {
   2961 	RF_AutoConfig_t *ac;
   2962 	RF_AutoConfig_t *next_ac;
   2963 
   2964 	ac = cset->ac;
   2965 	while(ac!=NULL) {
   2966 		next_ac = ac->next;
   2967 		/* nuke the label */
   2968 		free(ac->clabel, M_RAIDFRAME);
   2969 		/* cleanup the config structure */
   2970 		free(ac, M_RAIDFRAME);
   2971 		/* "next.." */
   2972 		ac = next_ac;
   2973 	}
   2974 	/* and, finally, nuke the config set */
   2975 	free(cset, M_RAIDFRAME);
   2976 }
   2977 
   2978 
   2979 void
   2980 raid_init_component_label(raidPtr, clabel)
   2981 	RF_Raid_t *raidPtr;
   2982 	RF_ComponentLabel_t *clabel;
   2983 {
   2984 	/* current version number */
   2985 	clabel->version = RF_COMPONENT_LABEL_VERSION;
   2986 	clabel->serial_number = clabel->serial_number;
   2987 	clabel->mod_counter = raidPtr->mod_counter;
   2988 	clabel->num_rows = raidPtr->numRow;
   2989 	clabel->num_columns = raidPtr->numCol;
   2990 	clabel->clean = RF_RAID_DIRTY; /* not clean */
   2991 	clabel->status = rf_ds_optimal; /* "It's good!" */
   2992 
   2993 	clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
   2994 	clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
   2995 	clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
   2996 	/* XXX not portable */
   2997 	clabel->parityConfig = raidPtr->Layout.map->parityConfig;
   2998 	/* XXX THIS SHOULD BE SET RIGHT!! */
   2999 	clabel->maxOutstanding = 100;
   3000 	clabel->autoconfigure = 0;
   3001 	clabel->root_partition = 0;
   3002 	clabel->last_unit = raidPtr->raidid;
   3003 	clabel->config_order = 0;
   3004 }
   3005 
   3006 int
   3007 rf_auto_config_set(cset,unit)
   3008 	RF_ConfigSet_t *cset;
   3009 	int *unit;
   3010 {
   3011 	RF_Raid_t *raidPtr;
   3012 	RF_Config_t *config;
   3013 	int raidID;
   3014 	int retcode;
   3015 
   3016 	printf("Starting autoconfigure on raid%d\n",raidID);
   3017 
   3018 	retcode = 0;
   3019 	*unit = -1;
   3020 
   3021 	/* 1. Create a config structure */
   3022 
   3023 	config = (RF_Config_t *)malloc(sizeof(RF_Config_t),
   3024 				       M_RAIDFRAME,
   3025 				       M_NOWAIT);
   3026 	if (config==NULL) {
   3027 		printf("Out of mem!?!?\n");
   3028 				/* XXX do something more intelligent here. */
   3029 		return(1);
   3030 	}
   3031 	/* XXX raidID needs to be set correctly.. */
   3032 
   3033 	/*
   3034 	   2. Figure out what RAID ID this one is supposed to live at
   3035 	   See if we can get the same RAID dev that it was configured
   3036 	   on last time..
   3037 	*/
   3038 
   3039 	raidID = cset->ac->clabel->last_unit;
   3040 	if ((raidID < 0) || (raidID >= numraid)) {
   3041 		/* let's not wander off into lala land. */
   3042 		raidID = numraid - 1;
   3043 	}
   3044 	if (raidPtrs[raidID]->valid != 0) {
   3045 
   3046 		/*
   3047 		   Nope... Go looking for an alternative...
   3048 		   Start high so we don't immediately use raid0 if that's
   3049 		   not taken.
   3050 		*/
   3051 
   3052 		for(raidID = numraid; raidID >= 0; raidID--) {
   3053 			if (raidPtrs[raidID]->valid == 0) {
   3054 				/* can use this one! */
   3055 				break;
   3056 			}
   3057 		}
   3058 	}
   3059 
   3060 	if (raidID < 0) {
   3061 		/* punt... */
   3062 		printf("Unable to auto configure this set!\n");
   3063 		printf("(Out of RAID devs!)\n");
   3064 		return(1);
   3065 	}
   3066 
   3067 	raidPtr = raidPtrs[raidID];
   3068 
   3069 	/* XXX all this stuff should be done SOMEWHERE ELSE! */
   3070 	raidPtr->raidid = raidID;
   3071 	raidPtr->openings = RAIDOUTSTANDING;
   3072 
   3073 	/* 3. Build the configuration structure */
   3074 	rf_create_configuration(cset->ac, config, raidPtr);
   3075 
   3076 	/* 4. Do the configuration */
   3077 	retcode = rf_Configure(raidPtr, config, cset->ac);
   3078 
   3079 	if (retcode == 0) {
   3080 #if DEBUG
   3081 		printf("Calling raidinit()\n");
   3082 #endif
   3083 				/* XXX the 0 below is bogus! */
   3084 		retcode = raidinit(0, raidPtrs[raidID], raidID);
   3085 		if (retcode) {
   3086 			printf("init returned: %d\n",retcode);
   3087 		}
   3088 		rf_markalldirty( raidPtrs[raidID] );
   3089 		if (cset->ac->clabel->root_partition==1) {
   3090 			/* everything configured just fine.  Make a note
   3091 			   that this set is eligible to be root. */
   3092 			cset->rootable = 1;
   3093 		}
   3094 	}
   3095 
   3096 	/* 5. Cleanup */
   3097 	free(config, M_RAIDFRAME);
   3098 
   3099 	*unit = raidID;
   3100 	return(retcode);
   3101 }
   3102