Home | History | Annotate | Line # | Download | only in raidframe
rf_netbsdkintf.c revision 1.117.6.2
      1 /*	$NetBSD: rf_netbsdkintf.c,v 1.117.6.2 2002/05/30 14:47:03 gehenna Exp $	*/
      2 /*-
      3  * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
      4  * All rights reserved.
      5  *
      6  * This code is derived from software contributed to The NetBSD Foundation
      7  * by Greg Oster; Jason R. Thorpe.
      8  *
      9  * Redistribution and use in source and binary forms, with or without
     10  * modification, are permitted provided that the following conditions
     11  * are met:
     12  * 1. Redistributions of source code must retain the above copyright
     13  *    notice, this list of conditions and the following disclaimer.
     14  * 2. Redistributions in binary form must reproduce the above copyright
     15  *    notice, this list of conditions and the following disclaimer in the
     16  *    documentation and/or other materials provided with the distribution.
     17  * 3. All advertising materials mentioning features or use of this software
     18  *    must display the following acknowledgement:
     19  *        This product includes software developed by the NetBSD
     20  *        Foundation, Inc. and its contributors.
     21  * 4. Neither the name of The NetBSD Foundation nor the names of its
     22  *    contributors may be used to endorse or promote products derived
     23  *    from this software without specific prior written permission.
     24  *
     25  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     26  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     27  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     28  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     29  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     30  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     31  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     32  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     33  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     34  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     35  * POSSIBILITY OF SUCH DAMAGE.
     36  */
     37 
     38 /*
     39  * Copyright (c) 1988 University of Utah.
     40  * Copyright (c) 1990, 1993
     41  *      The Regents of the University of California.  All rights reserved.
     42  *
     43  * This code is derived from software contributed to Berkeley by
     44  * the Systems Programming Group of the University of Utah Computer
     45  * Science Department.
     46  *
     47  * Redistribution and use in source and binary forms, with or without
     48  * modification, are permitted provided that the following conditions
     49  * are met:
     50  * 1. Redistributions of source code must retain the above copyright
     51  *    notice, this list of conditions and the following disclaimer.
     52  * 2. Redistributions in binary form must reproduce the above copyright
     53  *    notice, this list of conditions and the following disclaimer in the
     54  *    documentation and/or other materials provided with the distribution.
     55  * 3. All advertising materials mentioning features or use of this software
     56  *    must display the following acknowledgement:
     57  *      This product includes software developed by the University of
     58  *      California, Berkeley and its contributors.
     59  * 4. Neither the name of the University nor the names of its contributors
     60  *    may be used to endorse or promote products derived from this software
     61  *    without specific prior written permission.
     62  *
     63  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     64  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     65  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     66  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     67  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     68  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     69  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     70  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     71  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     72  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     73  * SUCH DAMAGE.
     74  *
     75  * from: Utah $Hdr: cd.c 1.6 90/11/28$
     76  *
     77  *      @(#)cd.c        8.2 (Berkeley) 11/16/93
     78  */
     79 
     80 
     81 
     82 
     83 /*
     84  * Copyright (c) 1995 Carnegie-Mellon University.
     85  * All rights reserved.
     86  *
     87  * Authors: Mark Holland, Jim Zelenka
     88  *
     89  * Permission to use, copy, modify and distribute this software and
     90  * its documentation is hereby granted, provided that both the copyright
     91  * notice and this permission notice appear in all copies of the
     92  * software, derivative works or modified versions, and any portions
     93  * thereof, and that both notices appear in supporting documentation.
     94  *
     95  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
     96  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
     97  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
     98  *
     99  * Carnegie Mellon requests users of this software to return to
    100  *
    101  *  Software Distribution Coordinator  or  Software.Distribution (at) CS.CMU.EDU
    102  *  School of Computer Science
    103  *  Carnegie Mellon University
    104  *  Pittsburgh PA 15213-3890
    105  *
    106  * any improvements or extensions that they make and grant Carnegie the
    107  * rights to redistribute these changes.
    108  */
    109 
    110 /***********************************************************
    111  *
    112  * rf_kintf.c -- the kernel interface routines for RAIDframe
    113  *
    114  ***********************************************************/
    115 
    116 #include <sys/cdefs.h>
    117 __KERNEL_RCSID(0, "$NetBSD: rf_netbsdkintf.c,v 1.117.6.2 2002/05/30 14:47:03 gehenna Exp $");
    118 
    119 #include <sys/param.h>
    120 #include <sys/errno.h>
    121 #include <sys/pool.h>
    122 #include <sys/queue.h>
    123 #include <sys/disk.h>
    124 #include <sys/device.h>
    125 #include <sys/stat.h>
    126 #include <sys/ioctl.h>
    127 #include <sys/fcntl.h>
    128 #include <sys/systm.h>
    129 #include <sys/namei.h>
    130 #include <sys/vnode.h>
    131 #include <sys/disklabel.h>
    132 #include <sys/conf.h>
    133 #include <sys/lock.h>
    134 #include <sys/buf.h>
    135 #include <sys/user.h>
    136 #include <sys/reboot.h>
    137 
    138 #include <dev/raidframe/raidframevar.h>
    139 #include <dev/raidframe/raidframeio.h>
    140 #include "raid.h"
    141 #include "opt_raid_autoconfig.h"
    142 #include "rf_raid.h"
    143 #include "rf_copyback.h"
    144 #include "rf_dag.h"
    145 #include "rf_dagflags.h"
    146 #include "rf_desc.h"
    147 #include "rf_diskqueue.h"
    148 #include "rf_acctrace.h"
    149 #include "rf_etimer.h"
    150 #include "rf_general.h"
    151 #include "rf_debugMem.h"
    152 #include "rf_kintf.h"
    153 #include "rf_options.h"
    154 #include "rf_driver.h"
    155 #include "rf_parityscan.h"
    156 #include "rf_debugprint.h"
    157 #include "rf_threadstuff.h"
    158 
    159 int     rf_kdebug_level = 0;
    160 
    161 #ifdef DEBUG
    162 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
    163 #else				/* DEBUG */
    164 #define db1_printf(a) { }
    165 #endif				/* DEBUG */
    166 
    167 static RF_Raid_t **raidPtrs;	/* global raid device descriptors */
    168 
    169 RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex)
    170 
    171 static RF_SparetWait_t *rf_sparet_wait_queue;	/* requests to install a
    172 						 * spare table */
    173 static RF_SparetWait_t *rf_sparet_resp_queue;	/* responses from
    174 						 * installation process */
    175 
    176 /* prototypes */
    177 static void KernelWakeupFunc(struct buf * bp);
    178 static void InitBP(struct buf * bp, struct vnode *, unsigned rw_flag,
    179 		   dev_t dev, RF_SectorNum_t startSect,
    180 		   RF_SectorCount_t numSect, caddr_t buf,
    181 		   void (*cbFunc) (struct buf *), void *cbArg,
    182 		   int logBytesPerSector, struct proc * b_proc);
    183 static void raidinit(RF_Raid_t *);
    184 
    185 void raidattach(int);
    186 
    187 dev_type_open(raidopen);
    188 dev_type_close(raidclose);
    189 dev_type_read(raidread);
    190 dev_type_write(raidwrite);
    191 dev_type_ioctl(raidioctl);
    192 dev_type_strategy(raidstrategy);
    193 dev_type_dump(raiddump);
    194 dev_type_size(raidsize);
    195 
    196 const struct bdevsw raid_bdevsw = {
    197 	raidopen, raidclose, raidstrategy, raidioctl,
    198 	raiddump, raidsize, D_DISK
    199 };
    200 
    201 const struct cdevsw raid_cdevsw = {
    202 	raidopen, raidclose, raidread, raidwrite, raidioctl,
    203 	nostop, notty, nopoll, nommap, D_DISK
    204 };
    205 
    206 /*
    207  * Pilfered from ccd.c
    208  */
    209 
    210 struct raidbuf {
    211 	struct buf rf_buf;	/* new I/O buf.  MUST BE FIRST!!! */
    212 	struct buf *rf_obp;	/* ptr. to original I/O buf */
    213 	int     rf_flags;	/* misc. flags */
    214 	RF_DiskQueueData_t *req;/* the request that this was part of.. */
    215 };
    216 
    217 /* component buffer pool */
    218 struct pool raidframe_cbufpool;
    219 
    220 #define RAIDGETBUF(rs) pool_get(&raidframe_cbufpool, PR_NOWAIT)
    221 #define	RAIDPUTBUF(rs, cbp) pool_put(&raidframe_cbufpool, cbp)
    222 
    223 /* XXX Not sure if the following should be replacing the raidPtrs above,
    224    or if it should be used in conjunction with that...
    225 */
    226 
    227 struct raid_softc {
    228 	int     sc_flags;	/* flags */
    229 	int     sc_cflags;	/* configuration flags */
    230 	size_t  sc_size;        /* size of the raid device */
    231 	char    sc_xname[20];	/* XXX external name */
    232 	struct disk sc_dkdev;	/* generic disk device info */
    233 	struct buf_queue buf_queue;	/* used for the device queue */
    234 };
    235 /* sc_flags */
    236 #define RAIDF_INITED	0x01	/* unit has been initialized */
    237 #define RAIDF_WLABEL	0x02	/* label area is writable */
    238 #define RAIDF_LABELLING	0x04	/* unit is currently being labelled */
    239 #define RAIDF_WANTED	0x40	/* someone is waiting to obtain a lock */
    240 #define RAIDF_LOCKED	0x80	/* unit is locked */
    241 
    242 #define	raidunit(x)	DISKUNIT(x)
    243 int numraid = 0;
    244 
    245 /*
    246  * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
    247  * Be aware that large numbers can allow the driver to consume a lot of
    248  * kernel memory, especially on writes, and in degraded mode reads.
    249  *
    250  * For example: with a stripe width of 64 blocks (32k) and 5 disks,
    251  * a single 64K write will typically require 64K for the old data,
    252  * 64K for the old parity, and 64K for the new parity, for a total
    253  * of 192K (if the parity buffer is not re-used immediately).
    254  * Even it if is used immediately, that's still 128K, which when multiplied
    255  * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
    256  *
    257  * Now in degraded mode, for example, a 64K read on the above setup may
    258  * require data reconstruction, which will require *all* of the 4 remaining
    259  * disks to participate -- 4 * 32K/disk == 128K again.
    260  */
    261 
    262 #ifndef RAIDOUTSTANDING
    263 #define RAIDOUTSTANDING   6
    264 #endif
    265 
    266 #define RAIDLABELDEV(dev)	\
    267 	(MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
    268 
    269 /* declared here, and made public, for the benefit of KVM stuff.. */
    270 struct raid_softc *raid_softc;
    271 
    272 static void raidgetdefaultlabel(RF_Raid_t *, struct raid_softc *,
    273 				     struct disklabel *);
    274 static void raidgetdisklabel(dev_t);
    275 static void raidmakedisklabel(struct raid_softc *);
    276 
    277 static int raidlock(struct raid_softc *);
    278 static void raidunlock(struct raid_softc *);
    279 
    280 static void rf_markalldirty(RF_Raid_t *);
    281 void rf_mountroot_hook(struct device *);
    282 
    283 struct device *raidrootdev;
    284 
    285 void rf_ReconThread(struct rf_recon_req *);
    286 /* XXX what I want is: */
    287 /*void rf_ReconThread(RF_Raid_t *raidPtr);  */
    288 void rf_RewriteParityThread(RF_Raid_t *raidPtr);
    289 void rf_CopybackThread(RF_Raid_t *raidPtr);
    290 void rf_ReconstructInPlaceThread(struct rf_recon_req *);
    291 void rf_buildroothack(void *);
    292 
    293 RF_AutoConfig_t *rf_find_raid_components(void);
    294 RF_ConfigSet_t *rf_create_auto_sets(RF_AutoConfig_t *);
    295 static int rf_does_it_fit(RF_ConfigSet_t *,RF_AutoConfig_t *);
    296 static int rf_reasonable_label(RF_ComponentLabel_t *);
    297 void rf_create_configuration(RF_AutoConfig_t *,RF_Config_t *, RF_Raid_t *);
    298 int rf_set_autoconfig(RF_Raid_t *, int);
    299 int rf_set_rootpartition(RF_Raid_t *, int);
    300 void rf_release_all_vps(RF_ConfigSet_t *);
    301 void rf_cleanup_config_set(RF_ConfigSet_t *);
    302 int rf_have_enough_components(RF_ConfigSet_t *);
    303 int rf_auto_config_set(RF_ConfigSet_t *, int *);
    304 
    305 static int raidautoconfig = 0; /* Debugging, mostly.  Set to 0 to not
    306 				  allow autoconfig to take place.
    307 			          Note that this is overridden by having
    308 			          RAID_AUTOCONFIG as an option in the
    309 			          kernel config file.  */
    310 
    311 void
    312 raidattach(num)
    313 	int     num;
    314 {
    315 	int raidID;
    316 	int i, rc;
    317 	RF_AutoConfig_t *ac_list; /* autoconfig list */
    318 	RF_ConfigSet_t *config_sets;
    319 
    320 #ifdef DEBUG
    321 	printf("raidattach: Asked for %d units\n", num);
    322 #endif
    323 
    324 	if (num <= 0) {
    325 #ifdef DIAGNOSTIC
    326 		panic("raidattach: count <= 0");
    327 #endif
    328 		return;
    329 	}
    330 	/* This is where all the initialization stuff gets done. */
    331 
    332 	numraid = num;
    333 
    334 	/* Make some space for requested number of units... */
    335 
    336 	RF_Calloc(raidPtrs, num, sizeof(RF_Raid_t *), (RF_Raid_t **));
    337 	if (raidPtrs == NULL) {
    338 		panic("raidPtrs is NULL!!\n");
    339 	}
    340 
    341 	/* Initialize the component buffer pool. */
    342 	pool_init(&raidframe_cbufpool, sizeof(struct raidbuf), 0,
    343 	    0, 0, "raidpl", NULL);
    344 
    345 	rc = rf_mutex_init(&rf_sparet_wait_mutex);
    346 	if (rc) {
    347 		RF_PANIC();
    348 	}
    349 
    350 	rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
    351 
    352 	for (i = 0; i < num; i++)
    353 		raidPtrs[i] = NULL;
    354 	rc = rf_BootRaidframe();
    355 	if (rc == 0)
    356 		printf("Kernelized RAIDframe activated\n");
    357 	else
    358 		panic("Serious error booting RAID!!\n");
    359 
    360 	/* put together some datastructures like the CCD device does.. This
    361 	 * lets us lock the device and what-not when it gets opened. */
    362 
    363 	raid_softc = (struct raid_softc *)
    364 		malloc(num * sizeof(struct raid_softc),
    365 		       M_RAIDFRAME, M_NOWAIT);
    366 	if (raid_softc == NULL) {
    367 		printf("WARNING: no memory for RAIDframe driver\n");
    368 		return;
    369 	}
    370 
    371 	memset(raid_softc, 0, num * sizeof(struct raid_softc));
    372 
    373 	raidrootdev = (struct device *)malloc(num * sizeof(struct device),
    374 					      M_RAIDFRAME, M_NOWAIT);
    375 	if (raidrootdev == NULL) {
    376 		panic("No memory for RAIDframe driver!!?!?!\n");
    377 	}
    378 
    379 	for (raidID = 0; raidID < num; raidID++) {
    380 		BUFQ_INIT(&raid_softc[raidID].buf_queue);
    381 
    382 		raidrootdev[raidID].dv_class  = DV_DISK;
    383 		raidrootdev[raidID].dv_cfdata = NULL;
    384 		raidrootdev[raidID].dv_unit   = raidID;
    385 		raidrootdev[raidID].dv_parent = NULL;
    386 		raidrootdev[raidID].dv_flags  = 0;
    387 		sprintf(raidrootdev[raidID].dv_xname,"raid%d",raidID);
    388 
    389 		RF_Calloc(raidPtrs[raidID], 1, sizeof(RF_Raid_t),
    390 			  (RF_Raid_t *));
    391 		if (raidPtrs[raidID] == NULL) {
    392 			printf("WARNING: raidPtrs[%d] is NULL\n", raidID);
    393 			numraid = raidID;
    394 			return;
    395 		}
    396 	}
    397 
    398 #ifdef RAID_AUTOCONFIG
    399 	raidautoconfig = 1;
    400 #endif
    401 
    402 if (raidautoconfig) {
    403 	/* 1. locate all RAID components on the system */
    404 
    405 #if DEBUG
    406 	printf("Searching for raid components...\n");
    407 #endif
    408 	ac_list = rf_find_raid_components();
    409 
    410 	/* 2. sort them into their respective sets */
    411 
    412 	config_sets = rf_create_auto_sets(ac_list);
    413 
    414 	/* 3. evaluate each set and configure the valid ones
    415 	   This gets done in rf_buildroothack() */
    416 
    417 	/* schedule the creation of the thread to do the
    418 	   "/ on RAID" stuff */
    419 
    420 	kthread_create(rf_buildroothack,config_sets);
    421 
    422 #if 0
    423 	mountroothook_establish(rf_mountroot_hook, &raidrootdev[0]);
    424 #endif
    425 }
    426 
    427 }
    428 
    429 void
    430 rf_buildroothack(arg)
    431 	void *arg;
    432 {
    433 	RF_ConfigSet_t *config_sets = arg;
    434 	RF_ConfigSet_t *cset;
    435 	RF_ConfigSet_t *next_cset;
    436 	int retcode;
    437 	int raidID;
    438 	int rootID;
    439 	int num_root;
    440 
    441 	rootID = 0;
    442 	num_root = 0;
    443 	cset = config_sets;
    444 	while(cset != NULL ) {
    445 		next_cset = cset->next;
    446 		if (rf_have_enough_components(cset) &&
    447 		    cset->ac->clabel->autoconfigure==1) {
    448 			retcode = rf_auto_config_set(cset,&raidID);
    449 			if (!retcode) {
    450 				if (cset->rootable) {
    451 					rootID = raidID;
    452 					num_root++;
    453 				}
    454 			} else {
    455 				/* The autoconfig didn't work :( */
    456 #if DEBUG
    457 				printf("Autoconfig failed with code %d for raid%d\n", retcode, raidID);
    458 #endif
    459 				rf_release_all_vps(cset);
    460 			}
    461 		} else {
    462 			/* we're not autoconfiguring this set...
    463 			   release the associated resources */
    464 			rf_release_all_vps(cset);
    465 		}
    466 		/* cleanup */
    467 		rf_cleanup_config_set(cset);
    468 		cset = next_cset;
    469 	}
    470 	if (boothowto & RB_ASKNAME) {
    471 		/* We don't auto-config... */
    472 	} else {
    473 		/* They didn't ask, and we found something bootable... */
    474 
    475 		if (num_root == 1) {
    476 			booted_device = &raidrootdev[rootID];
    477 		} else if (num_root > 1) {
    478 			/* we can't guess.. require the user to answer... */
    479 			boothowto |= RB_ASKNAME;
    480 		}
    481 	}
    482 }
    483 
    484 
    485 int
    486 raidsize(dev)
    487 	dev_t   dev;
    488 {
    489 	struct raid_softc *rs;
    490 	struct disklabel *lp;
    491 	int     part, unit, omask, size;
    492 
    493 	unit = raidunit(dev);
    494 	if (unit >= numraid)
    495 		return (-1);
    496 	rs = &raid_softc[unit];
    497 
    498 	if ((rs->sc_flags & RAIDF_INITED) == 0)
    499 		return (-1);
    500 
    501 	part = DISKPART(dev);
    502 	omask = rs->sc_dkdev.dk_openmask & (1 << part);
    503 	lp = rs->sc_dkdev.dk_label;
    504 
    505 	if (omask == 0 && raidopen(dev, 0, S_IFBLK, curproc))
    506 		return (-1);
    507 
    508 	if (lp->d_partitions[part].p_fstype != FS_SWAP)
    509 		size = -1;
    510 	else
    511 		size = lp->d_partitions[part].p_size *
    512 		    (lp->d_secsize / DEV_BSIZE);
    513 
    514 	if (omask == 0 && raidclose(dev, 0, S_IFBLK, curproc))
    515 		return (-1);
    516 
    517 	return (size);
    518 
    519 }
    520 
    521 int
    522 raiddump(dev, blkno, va, size)
    523 	dev_t   dev;
    524 	daddr_t blkno;
    525 	caddr_t va;
    526 	size_t  size;
    527 {
    528 	/* Not implemented. */
    529 	return ENXIO;
    530 }
    531 /* ARGSUSED */
    532 int
    533 raidopen(dev, flags, fmt, p)
    534 	dev_t   dev;
    535 	int     flags, fmt;
    536 	struct proc *p;
    537 {
    538 	int     unit = raidunit(dev);
    539 	struct raid_softc *rs;
    540 	struct disklabel *lp;
    541 	int     part, pmask;
    542 	int     error = 0;
    543 
    544 	if (unit >= numraid)
    545 		return (ENXIO);
    546 	rs = &raid_softc[unit];
    547 
    548 	if ((error = raidlock(rs)) != 0)
    549 		return (error);
    550 	lp = rs->sc_dkdev.dk_label;
    551 
    552 	part = DISKPART(dev);
    553 	pmask = (1 << part);
    554 
    555 	db1_printf(("Opening raid device number: %d partition: %d\n",
    556 		unit, part));
    557 
    558 
    559 	if ((rs->sc_flags & RAIDF_INITED) &&
    560 	    (rs->sc_dkdev.dk_openmask == 0))
    561 		raidgetdisklabel(dev);
    562 
    563 	/* make sure that this partition exists */
    564 
    565 	if (part != RAW_PART) {
    566 		db1_printf(("Not a raw partition..\n"));
    567 		if (((rs->sc_flags & RAIDF_INITED) == 0) ||
    568 		    ((part >= lp->d_npartitions) ||
    569 			(lp->d_partitions[part].p_fstype == FS_UNUSED))) {
    570 			error = ENXIO;
    571 			raidunlock(rs);
    572 			db1_printf(("Bailing out...\n"));
    573 			return (error);
    574 		}
    575 	}
    576 	/* Prevent this unit from being unconfigured while open. */
    577 	switch (fmt) {
    578 	case S_IFCHR:
    579 		rs->sc_dkdev.dk_copenmask |= pmask;
    580 		break;
    581 
    582 	case S_IFBLK:
    583 		rs->sc_dkdev.dk_bopenmask |= pmask;
    584 		break;
    585 	}
    586 
    587 	if ((rs->sc_dkdev.dk_openmask == 0) &&
    588 	    ((rs->sc_flags & RAIDF_INITED) != 0)) {
    589 		/* First one... mark things as dirty... Note that we *MUST*
    590 		 have done a configure before this.  I DO NOT WANT TO BE
    591 		 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
    592 		 THAT THEY BELONG TOGETHER!!!!! */
    593 		/* XXX should check to see if we're only open for reading
    594 		   here... If so, we needn't do this, but then need some
    595 		   other way of keeping track of what's happened.. */
    596 
    597 		rf_markalldirty( raidPtrs[unit] );
    598 	}
    599 
    600 
    601 	rs->sc_dkdev.dk_openmask =
    602 	    rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
    603 
    604 	raidunlock(rs);
    605 
    606 	return (error);
    607 
    608 
    609 }
    610 /* ARGSUSED */
    611 int
    612 raidclose(dev, flags, fmt, p)
    613 	dev_t   dev;
    614 	int     flags, fmt;
    615 	struct proc *p;
    616 {
    617 	int     unit = raidunit(dev);
    618 	struct raid_softc *rs;
    619 	int     error = 0;
    620 	int     part;
    621 
    622 	if (unit >= numraid)
    623 		return (ENXIO);
    624 	rs = &raid_softc[unit];
    625 
    626 	if ((error = raidlock(rs)) != 0)
    627 		return (error);
    628 
    629 	part = DISKPART(dev);
    630 
    631 	/* ...that much closer to allowing unconfiguration... */
    632 	switch (fmt) {
    633 	case S_IFCHR:
    634 		rs->sc_dkdev.dk_copenmask &= ~(1 << part);
    635 		break;
    636 
    637 	case S_IFBLK:
    638 		rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
    639 		break;
    640 	}
    641 	rs->sc_dkdev.dk_openmask =
    642 	    rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
    643 
    644 	if ((rs->sc_dkdev.dk_openmask == 0) &&
    645 	    ((rs->sc_flags & RAIDF_INITED) != 0)) {
    646 		/* Last one... device is not unconfigured yet.
    647 		   Device shutdown has taken care of setting the
    648 		   clean bits if RAIDF_INITED is not set
    649 		   mark things as clean... */
    650 #if 0
    651 		printf("Last one on raid%d.  Updating status.\n",unit);
    652 #endif
    653 		rf_update_component_labels(raidPtrs[unit],
    654 						 RF_FINAL_COMPONENT_UPDATE);
    655 		if (doing_shutdown) {
    656 			/* last one, and we're going down, so
    657 			   lights out for this RAID set too. */
    658 			error = rf_Shutdown(raidPtrs[unit]);
    659 
    660 			/* It's no longer initialized... */
    661 			rs->sc_flags &= ~RAIDF_INITED;
    662 
    663 			/* Detach the disk. */
    664 			disk_detach(&rs->sc_dkdev);
    665 		}
    666 	}
    667 
    668 	raidunlock(rs);
    669 	return (0);
    670 
    671 }
    672 
    673 void
    674 raidstrategy(bp)
    675 	struct buf *bp;
    676 {
    677 	int s;
    678 
    679 	unsigned int raidID = raidunit(bp->b_dev);
    680 	RF_Raid_t *raidPtr;
    681 	struct raid_softc *rs = &raid_softc[raidID];
    682 	struct disklabel *lp;
    683 	int     wlabel;
    684 
    685 	if ((rs->sc_flags & RAIDF_INITED) ==0) {
    686 		bp->b_error = ENXIO;
    687 		bp->b_flags |= B_ERROR;
    688 		bp->b_resid = bp->b_bcount;
    689 		biodone(bp);
    690 		return;
    691 	}
    692 	if (raidID >= numraid || !raidPtrs[raidID]) {
    693 		bp->b_error = ENODEV;
    694 		bp->b_flags |= B_ERROR;
    695 		bp->b_resid = bp->b_bcount;
    696 		biodone(bp);
    697 		return;
    698 	}
    699 	raidPtr = raidPtrs[raidID];
    700 	if (!raidPtr->valid) {
    701 		bp->b_error = ENODEV;
    702 		bp->b_flags |= B_ERROR;
    703 		bp->b_resid = bp->b_bcount;
    704 		biodone(bp);
    705 		return;
    706 	}
    707 	if (bp->b_bcount == 0) {
    708 		db1_printf(("b_bcount is zero..\n"));
    709 		biodone(bp);
    710 		return;
    711 	}
    712 	lp = rs->sc_dkdev.dk_label;
    713 
    714 	/*
    715 	 * Do bounds checking and adjust transfer.  If there's an
    716 	 * error, the bounds check will flag that for us.
    717 	 */
    718 
    719 	wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
    720 	if (DISKPART(bp->b_dev) != RAW_PART)
    721 		if (bounds_check_with_label(bp, lp, wlabel) <= 0) {
    722 			db1_printf(("Bounds check failed!!:%d %d\n",
    723 				(int) bp->b_blkno, (int) wlabel));
    724 			biodone(bp);
    725 			return;
    726 		}
    727 	s = splbio();
    728 
    729 	bp->b_resid = 0;
    730 
    731 	/* stuff it onto our queue */
    732 	BUFQ_INSERT_TAIL(&rs->buf_queue, bp);
    733 
    734 	raidstart(raidPtrs[raidID]);
    735 
    736 	splx(s);
    737 }
    738 /* ARGSUSED */
    739 int
    740 raidread(dev, uio, flags)
    741 	dev_t   dev;
    742 	struct uio *uio;
    743 	int     flags;
    744 {
    745 	int     unit = raidunit(dev);
    746 	struct raid_softc *rs;
    747 	int     part;
    748 
    749 	if (unit >= numraid)
    750 		return (ENXIO);
    751 	rs = &raid_softc[unit];
    752 
    753 	if ((rs->sc_flags & RAIDF_INITED) == 0)
    754 		return (ENXIO);
    755 	part = DISKPART(dev);
    756 
    757 	db1_printf(("raidread: unit: %d partition: %d\n", unit, part));
    758 
    759 	return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
    760 
    761 }
    762 /* ARGSUSED */
    763 int
    764 raidwrite(dev, uio, flags)
    765 	dev_t   dev;
    766 	struct uio *uio;
    767 	int     flags;
    768 {
    769 	int     unit = raidunit(dev);
    770 	struct raid_softc *rs;
    771 
    772 	if (unit >= numraid)
    773 		return (ENXIO);
    774 	rs = &raid_softc[unit];
    775 
    776 	if ((rs->sc_flags & RAIDF_INITED) == 0)
    777 		return (ENXIO);
    778 	db1_printf(("raidwrite\n"));
    779 	return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
    780 
    781 }
    782 
    783 int
    784 raidioctl(dev, cmd, data, flag, p)
    785 	dev_t   dev;
    786 	u_long  cmd;
    787 	caddr_t data;
    788 	int     flag;
    789 	struct proc *p;
    790 {
    791 	int     unit = raidunit(dev);
    792 	int     error = 0;
    793 	int     part, pmask;
    794 	struct raid_softc *rs;
    795 	RF_Config_t *k_cfg, *u_cfg;
    796 	RF_Raid_t *raidPtr;
    797 	RF_RaidDisk_t *diskPtr;
    798 	RF_AccTotals_t *totals;
    799 	RF_DeviceConfig_t *d_cfg, **ucfgp;
    800 	u_char *specific_buf;
    801 	int retcode = 0;
    802 	int row;
    803 	int column;
    804 	struct rf_recon_req *rrcopy, *rr;
    805 	RF_ComponentLabel_t *clabel;
    806 	RF_ComponentLabel_t ci_label;
    807 	RF_ComponentLabel_t **clabel_ptr;
    808 	RF_SingleComponent_t *sparePtr,*componentPtr;
    809 	RF_SingleComponent_t hot_spare;
    810 	RF_SingleComponent_t component;
    811 	RF_ProgressInfo_t progressInfo, **progressInfoPtr;
    812 	int i, j, d;
    813 #ifdef __HAVE_OLD_DISKLABEL
    814 	struct disklabel newlabel;
    815 #endif
    816 
    817 	if (unit >= numraid)
    818 		return (ENXIO);
    819 	rs = &raid_softc[unit];
    820 	raidPtr = raidPtrs[unit];
    821 
    822 	db1_printf(("raidioctl: %d %d %d %d\n", (int) dev,
    823 		(int) DISKPART(dev), (int) unit, (int) cmd));
    824 
    825 	/* Must be open for writes for these commands... */
    826 	switch (cmd) {
    827 	case DIOCSDINFO:
    828 	case DIOCWDINFO:
    829 #ifdef __HAVE_OLD_DISKLABEL
    830 	case ODIOCWDINFO:
    831 	case ODIOCSDINFO:
    832 #endif
    833 	case DIOCWLABEL:
    834 		if ((flag & FWRITE) == 0)
    835 			return (EBADF);
    836 	}
    837 
    838 	/* Must be initialized for these... */
    839 	switch (cmd) {
    840 	case DIOCGDINFO:
    841 	case DIOCSDINFO:
    842 	case DIOCWDINFO:
    843 #ifdef __HAVE_OLD_DISKLABEL
    844 	case ODIOCGDINFO:
    845 	case ODIOCWDINFO:
    846 	case ODIOCSDINFO:
    847 	case ODIOCGDEFLABEL:
    848 #endif
    849 	case DIOCGPART:
    850 	case DIOCWLABEL:
    851 	case DIOCGDEFLABEL:
    852 	case RAIDFRAME_SHUTDOWN:
    853 	case RAIDFRAME_REWRITEPARITY:
    854 	case RAIDFRAME_GET_INFO:
    855 	case RAIDFRAME_RESET_ACCTOTALS:
    856 	case RAIDFRAME_GET_ACCTOTALS:
    857 	case RAIDFRAME_KEEP_ACCTOTALS:
    858 	case RAIDFRAME_GET_SIZE:
    859 	case RAIDFRAME_FAIL_DISK:
    860 	case RAIDFRAME_COPYBACK:
    861 	case RAIDFRAME_CHECK_RECON_STATUS:
    862 	case RAIDFRAME_CHECK_RECON_STATUS_EXT:
    863 	case RAIDFRAME_GET_COMPONENT_LABEL:
    864 	case RAIDFRAME_SET_COMPONENT_LABEL:
    865 	case RAIDFRAME_ADD_HOT_SPARE:
    866 	case RAIDFRAME_REMOVE_HOT_SPARE:
    867 	case RAIDFRAME_INIT_LABELS:
    868 	case RAIDFRAME_REBUILD_IN_PLACE:
    869 	case RAIDFRAME_CHECK_PARITY:
    870 	case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
    871 	case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
    872 	case RAIDFRAME_CHECK_COPYBACK_STATUS:
    873 	case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
    874 	case RAIDFRAME_SET_AUTOCONFIG:
    875 	case RAIDFRAME_SET_ROOT:
    876 	case RAIDFRAME_DELETE_COMPONENT:
    877 	case RAIDFRAME_INCORPORATE_HOT_SPARE:
    878 		if ((rs->sc_flags & RAIDF_INITED) == 0)
    879 			return (ENXIO);
    880 	}
    881 
    882 	switch (cmd) {
    883 
    884 		/* configure the system */
    885 	case RAIDFRAME_CONFIGURE:
    886 
    887 		if (raidPtr->valid) {
    888 			/* There is a valid RAID set running on this unit! */
    889 			printf("raid%d: Device already configured!\n",unit);
    890 			return(EINVAL);
    891 		}
    892 
    893 		/* copy-in the configuration information */
    894 		/* data points to a pointer to the configuration structure */
    895 
    896 		u_cfg = *((RF_Config_t **) data);
    897 		RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
    898 		if (k_cfg == NULL) {
    899 			return (ENOMEM);
    900 		}
    901 		retcode = copyin((caddr_t) u_cfg, (caddr_t) k_cfg,
    902 		    sizeof(RF_Config_t));
    903 		if (retcode) {
    904 			RF_Free(k_cfg, sizeof(RF_Config_t));
    905 			db1_printf(("rf_ioctl: retcode=%d copyin.1\n",
    906 				retcode));
    907 			return (retcode);
    908 		}
    909 		/* allocate a buffer for the layout-specific data, and copy it
    910 		 * in */
    911 		if (k_cfg->layoutSpecificSize) {
    912 			if (k_cfg->layoutSpecificSize > 10000) {
    913 				/* sanity check */
    914 				RF_Free(k_cfg, sizeof(RF_Config_t));
    915 				return (EINVAL);
    916 			}
    917 			RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
    918 			    (u_char *));
    919 			if (specific_buf == NULL) {
    920 				RF_Free(k_cfg, sizeof(RF_Config_t));
    921 				return (ENOMEM);
    922 			}
    923 			retcode = copyin(k_cfg->layoutSpecific,
    924 			    (caddr_t) specific_buf,
    925 			    k_cfg->layoutSpecificSize);
    926 			if (retcode) {
    927 				RF_Free(k_cfg, sizeof(RF_Config_t));
    928 				RF_Free(specific_buf,
    929 					k_cfg->layoutSpecificSize);
    930 				db1_printf(("rf_ioctl: retcode=%d copyin.2\n",
    931 					retcode));
    932 				return (retcode);
    933 			}
    934 		} else
    935 			specific_buf = NULL;
    936 		k_cfg->layoutSpecific = specific_buf;
    937 
    938 		/* should do some kind of sanity check on the configuration.
    939 		 * Store the sum of all the bytes in the last byte? */
    940 
    941 		/* configure the system */
    942 
    943 		/*
    944 		 * Clear the entire RAID descriptor, just to make sure
    945 		 *  there is no stale data left in the case of a
    946 		 *  reconfiguration
    947 		 */
    948 		memset((char *) raidPtr, 0, sizeof(RF_Raid_t));
    949 		raidPtr->raidid = unit;
    950 
    951 		retcode = rf_Configure(raidPtr, k_cfg, NULL);
    952 
    953 		if (retcode == 0) {
    954 
    955 			/* allow this many simultaneous IO's to
    956 			   this RAID device */
    957 			raidPtr->openings = RAIDOUTSTANDING;
    958 
    959 			raidinit(raidPtr);
    960 			rf_markalldirty(raidPtr);
    961 		}
    962 		/* free the buffers.  No return code here. */
    963 		if (k_cfg->layoutSpecificSize) {
    964 			RF_Free(specific_buf, k_cfg->layoutSpecificSize);
    965 		}
    966 		RF_Free(k_cfg, sizeof(RF_Config_t));
    967 
    968 		return (retcode);
    969 
    970 		/* shutdown the system */
    971 	case RAIDFRAME_SHUTDOWN:
    972 
    973 		if ((error = raidlock(rs)) != 0)
    974 			return (error);
    975 
    976 		/*
    977 		 * If somebody has a partition mounted, we shouldn't
    978 		 * shutdown.
    979 		 */
    980 
    981 		part = DISKPART(dev);
    982 		pmask = (1 << part);
    983 		if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
    984 		    ((rs->sc_dkdev.dk_bopenmask & pmask) &&
    985 			(rs->sc_dkdev.dk_copenmask & pmask))) {
    986 			raidunlock(rs);
    987 			return (EBUSY);
    988 		}
    989 
    990 		retcode = rf_Shutdown(raidPtr);
    991 
    992 		/* It's no longer initialized... */
    993 		rs->sc_flags &= ~RAIDF_INITED;
    994 
    995 		/* Detach the disk. */
    996 		disk_detach(&rs->sc_dkdev);
    997 
    998 		raidunlock(rs);
    999 
   1000 		return (retcode);
   1001 	case RAIDFRAME_GET_COMPONENT_LABEL:
   1002 		clabel_ptr = (RF_ComponentLabel_t **) data;
   1003 		/* need to read the component label for the disk indicated
   1004 		   by row,column in clabel */
   1005 
   1006 		/* For practice, let's get it directly fromdisk, rather
   1007 		   than from the in-core copy */
   1008 		RF_Malloc( clabel, sizeof( RF_ComponentLabel_t ),
   1009 			   (RF_ComponentLabel_t *));
   1010 		if (clabel == NULL)
   1011 			return (ENOMEM);
   1012 
   1013 		memset((char *) clabel, 0, sizeof(RF_ComponentLabel_t));
   1014 
   1015 		retcode = copyin( *clabel_ptr, clabel,
   1016 				  sizeof(RF_ComponentLabel_t));
   1017 
   1018 		if (retcode) {
   1019 			RF_Free( clabel, sizeof(RF_ComponentLabel_t));
   1020 			return(retcode);
   1021 		}
   1022 
   1023 		row = clabel->row;
   1024 		column = clabel->column;
   1025 
   1026 		if ((row < 0) || (row >= raidPtr->numRow) ||
   1027 		    (column < 0) || (column >= raidPtr->numCol +
   1028 				     raidPtr->numSpare)) {
   1029 			RF_Free( clabel, sizeof(RF_ComponentLabel_t));
   1030 			return(EINVAL);
   1031 		}
   1032 
   1033 		raidread_component_label(raidPtr->Disks[row][column].dev,
   1034 				raidPtr->raid_cinfo[row][column].ci_vp,
   1035 				clabel );
   1036 
   1037 		retcode = copyout((caddr_t) clabel,
   1038 				  (caddr_t) *clabel_ptr,
   1039 				  sizeof(RF_ComponentLabel_t));
   1040 		RF_Free( clabel, sizeof(RF_ComponentLabel_t));
   1041 		return (retcode);
   1042 
   1043 	case RAIDFRAME_SET_COMPONENT_LABEL:
   1044 		clabel = (RF_ComponentLabel_t *) data;
   1045 
   1046 		/* XXX check the label for valid stuff... */
   1047 		/* Note that some things *should not* get modified --
   1048 		   the user should be re-initing the labels instead of
   1049 		   trying to patch things.
   1050 		   */
   1051 
   1052 		printf("Got component label:\n");
   1053 		printf("Version: %d\n",clabel->version);
   1054 		printf("Serial Number: %d\n",clabel->serial_number);
   1055 		printf("Mod counter: %d\n",clabel->mod_counter);
   1056 		printf("Row: %d\n", clabel->row);
   1057 		printf("Column: %d\n", clabel->column);
   1058 		printf("Num Rows: %d\n", clabel->num_rows);
   1059 		printf("Num Columns: %d\n", clabel->num_columns);
   1060 		printf("Clean: %d\n", clabel->clean);
   1061 		printf("Status: %d\n", clabel->status);
   1062 
   1063 		row = clabel->row;
   1064 		column = clabel->column;
   1065 
   1066 		if ((row < 0) || (row >= raidPtr->numRow) ||
   1067 		    (column < 0) || (column >= raidPtr->numCol)) {
   1068 			return(EINVAL);
   1069 		}
   1070 
   1071 		/* XXX this isn't allowed to do anything for now :-) */
   1072 
   1073 		/* XXX and before it is, we need to fill in the rest
   1074 		   of the fields!?!?!?! */
   1075 #if 0
   1076 		raidwrite_component_label(
   1077                             raidPtr->Disks[row][column].dev,
   1078 			    raidPtr->raid_cinfo[row][column].ci_vp,
   1079 			    clabel );
   1080 #endif
   1081 		return (0);
   1082 
   1083 	case RAIDFRAME_INIT_LABELS:
   1084 		clabel = (RF_ComponentLabel_t *) data;
   1085 		/*
   1086 		   we only want the serial number from
   1087 		   the above.  We get all the rest of the information
   1088 		   from the config that was used to create this RAID
   1089 		   set.
   1090 		   */
   1091 
   1092 		raidPtr->serial_number = clabel->serial_number;
   1093 
   1094 		raid_init_component_label(raidPtr, &ci_label);
   1095 		ci_label.serial_number = clabel->serial_number;
   1096 
   1097 		for(row=0;row<raidPtr->numRow;row++) {
   1098 			ci_label.row = row;
   1099 			for(column=0;column<raidPtr->numCol;column++) {
   1100 				diskPtr = &raidPtr->Disks[row][column];
   1101 				if (!RF_DEAD_DISK(diskPtr->status)) {
   1102 					ci_label.partitionSize = diskPtr->partitionSize;
   1103 					ci_label.column = column;
   1104 					raidwrite_component_label(
   1105 					  raidPtr->Disks[row][column].dev,
   1106 					  raidPtr->raid_cinfo[row][column].ci_vp,
   1107 					  &ci_label );
   1108 				}
   1109 			}
   1110 		}
   1111 
   1112 		return (retcode);
   1113 	case RAIDFRAME_SET_AUTOCONFIG:
   1114 		d = rf_set_autoconfig(raidPtr, *(int *) data);
   1115 		printf("New autoconfig value is: %d\n", d);
   1116 		*(int *) data = d;
   1117 		return (retcode);
   1118 
   1119 	case RAIDFRAME_SET_ROOT:
   1120 		d = rf_set_rootpartition(raidPtr, *(int *) data);
   1121 		printf("New rootpartition value is: %d\n", d);
   1122 		*(int *) data = d;
   1123 		return (retcode);
   1124 
   1125 		/* initialize all parity */
   1126 	case RAIDFRAME_REWRITEPARITY:
   1127 
   1128 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1129 			/* Parity for RAID 0 is trivially correct */
   1130 			raidPtr->parity_good = RF_RAID_CLEAN;
   1131 			return(0);
   1132 		}
   1133 
   1134 		if (raidPtr->parity_rewrite_in_progress == 1) {
   1135 			/* Re-write is already in progress! */
   1136 			return(EINVAL);
   1137 		}
   1138 
   1139 		retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
   1140 					   rf_RewriteParityThread,
   1141 					   raidPtr,"raid_parity");
   1142 		return (retcode);
   1143 
   1144 
   1145 	case RAIDFRAME_ADD_HOT_SPARE:
   1146 		sparePtr = (RF_SingleComponent_t *) data;
   1147 		memcpy( &hot_spare, sparePtr, sizeof(RF_SingleComponent_t));
   1148 		retcode = rf_add_hot_spare(raidPtr, &hot_spare);
   1149 		return(retcode);
   1150 
   1151 	case RAIDFRAME_REMOVE_HOT_SPARE:
   1152 		return(retcode);
   1153 
   1154 	case RAIDFRAME_DELETE_COMPONENT:
   1155 		componentPtr = (RF_SingleComponent_t *)data;
   1156 		memcpy( &component, componentPtr,
   1157 			sizeof(RF_SingleComponent_t));
   1158 		retcode = rf_delete_component(raidPtr, &component);
   1159 		return(retcode);
   1160 
   1161 	case RAIDFRAME_INCORPORATE_HOT_SPARE:
   1162 		componentPtr = (RF_SingleComponent_t *)data;
   1163 		memcpy( &component, componentPtr,
   1164 			sizeof(RF_SingleComponent_t));
   1165 		retcode = rf_incorporate_hot_spare(raidPtr, &component);
   1166 		return(retcode);
   1167 
   1168 	case RAIDFRAME_REBUILD_IN_PLACE:
   1169 
   1170 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1171 			/* Can't do this on a RAID 0!! */
   1172 			return(EINVAL);
   1173 		}
   1174 
   1175 		if (raidPtr->recon_in_progress == 1) {
   1176 			/* a reconstruct is already in progress! */
   1177 			return(EINVAL);
   1178 		}
   1179 
   1180 		componentPtr = (RF_SingleComponent_t *) data;
   1181 		memcpy( &component, componentPtr,
   1182 			sizeof(RF_SingleComponent_t));
   1183 		row = component.row;
   1184 		column = component.column;
   1185 		printf("Rebuild: %d %d\n",row, column);
   1186 		if ((row < 0) || (row >= raidPtr->numRow) ||
   1187 		    (column < 0) || (column >= raidPtr->numCol)) {
   1188 			return(EINVAL);
   1189 		}
   1190 
   1191 		RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
   1192 		if (rrcopy == NULL)
   1193 			return(ENOMEM);
   1194 
   1195 		rrcopy->raidPtr = (void *) raidPtr;
   1196 		rrcopy->row = row;
   1197 		rrcopy->col = column;
   1198 
   1199 		retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
   1200 					   rf_ReconstructInPlaceThread,
   1201 					   rrcopy,"raid_reconip");
   1202 		return(retcode);
   1203 
   1204 	case RAIDFRAME_GET_INFO:
   1205 		if (!raidPtr->valid)
   1206 			return (ENODEV);
   1207 		ucfgp = (RF_DeviceConfig_t **) data;
   1208 		RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t),
   1209 			  (RF_DeviceConfig_t *));
   1210 		if (d_cfg == NULL)
   1211 			return (ENOMEM);
   1212 		memset((char *) d_cfg, 0, sizeof(RF_DeviceConfig_t));
   1213 		d_cfg->rows = raidPtr->numRow;
   1214 		d_cfg->cols = raidPtr->numCol;
   1215 		d_cfg->ndevs = raidPtr->numRow * raidPtr->numCol;
   1216 		if (d_cfg->ndevs >= RF_MAX_DISKS) {
   1217 			RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
   1218 			return (ENOMEM);
   1219 		}
   1220 		d_cfg->nspares = raidPtr->numSpare;
   1221 		if (d_cfg->nspares >= RF_MAX_DISKS) {
   1222 			RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
   1223 			return (ENOMEM);
   1224 		}
   1225 		d_cfg->maxqdepth = raidPtr->maxQueueDepth;
   1226 		d = 0;
   1227 		for (i = 0; i < d_cfg->rows; i++) {
   1228 			for (j = 0; j < d_cfg->cols; j++) {
   1229 				d_cfg->devs[d] = raidPtr->Disks[i][j];
   1230 				d++;
   1231 			}
   1232 		}
   1233 		for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) {
   1234 			d_cfg->spares[i] = raidPtr->Disks[0][j];
   1235 		}
   1236 		retcode = copyout((caddr_t) d_cfg, (caddr_t) * ucfgp,
   1237 				  sizeof(RF_DeviceConfig_t));
   1238 		RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
   1239 
   1240 		return (retcode);
   1241 
   1242 	case RAIDFRAME_CHECK_PARITY:
   1243 		*(int *) data = raidPtr->parity_good;
   1244 		return (0);
   1245 
   1246 	case RAIDFRAME_RESET_ACCTOTALS:
   1247 		memset(&raidPtr->acc_totals, 0, sizeof(raidPtr->acc_totals));
   1248 		return (0);
   1249 
   1250 	case RAIDFRAME_GET_ACCTOTALS:
   1251 		totals = (RF_AccTotals_t *) data;
   1252 		*totals = raidPtr->acc_totals;
   1253 		return (0);
   1254 
   1255 	case RAIDFRAME_KEEP_ACCTOTALS:
   1256 		raidPtr->keep_acc_totals = *(int *)data;
   1257 		return (0);
   1258 
   1259 	case RAIDFRAME_GET_SIZE:
   1260 		*(int *) data = raidPtr->totalSectors;
   1261 		return (0);
   1262 
   1263 		/* fail a disk & optionally start reconstruction */
   1264 	case RAIDFRAME_FAIL_DISK:
   1265 
   1266 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1267 			/* Can't do this on a RAID 0!! */
   1268 			return(EINVAL);
   1269 		}
   1270 
   1271 		rr = (struct rf_recon_req *) data;
   1272 
   1273 		if (rr->row < 0 || rr->row >= raidPtr->numRow
   1274 		    || rr->col < 0 || rr->col >= raidPtr->numCol)
   1275 			return (EINVAL);
   1276 
   1277 		printf("raid%d: Failing the disk: row: %d col: %d\n",
   1278 		       unit, rr->row, rr->col);
   1279 
   1280 		/* make a copy of the recon request so that we don't rely on
   1281 		 * the user's buffer */
   1282 		RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
   1283 		if (rrcopy == NULL)
   1284 			return(ENOMEM);
   1285 		memcpy(rrcopy, rr, sizeof(*rr));
   1286 		rrcopy->raidPtr = (void *) raidPtr;
   1287 
   1288 		retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
   1289 					   rf_ReconThread,
   1290 					   rrcopy,"raid_recon");
   1291 		return (0);
   1292 
   1293 		/* invoke a copyback operation after recon on whatever disk
   1294 		 * needs it, if any */
   1295 	case RAIDFRAME_COPYBACK:
   1296 
   1297 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1298 			/* This makes no sense on a RAID 0!! */
   1299 			return(EINVAL);
   1300 		}
   1301 
   1302 		if (raidPtr->copyback_in_progress == 1) {
   1303 			/* Copyback is already in progress! */
   1304 			return(EINVAL);
   1305 		}
   1306 
   1307 		retcode = RF_CREATE_THREAD(raidPtr->copyback_thread,
   1308 					   rf_CopybackThread,
   1309 					   raidPtr,"raid_copyback");
   1310 		return (retcode);
   1311 
   1312 		/* return the percentage completion of reconstruction */
   1313 	case RAIDFRAME_CHECK_RECON_STATUS:
   1314 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1315 			/* This makes no sense on a RAID 0, so tell the
   1316 			   user it's done. */
   1317 			*(int *) data = 100;
   1318 			return(0);
   1319 		}
   1320 		row = 0; /* XXX we only consider a single row... */
   1321 		if (raidPtr->status[row] != rf_rs_reconstructing)
   1322 			*(int *) data = 100;
   1323 		else
   1324 			*(int *) data = raidPtr->reconControl[row]->percentComplete;
   1325 		return (0);
   1326 	case RAIDFRAME_CHECK_RECON_STATUS_EXT:
   1327 		progressInfoPtr = (RF_ProgressInfo_t **) data;
   1328 		row = 0; /* XXX we only consider a single row... */
   1329 		if (raidPtr->status[row] != rf_rs_reconstructing) {
   1330 			progressInfo.remaining = 0;
   1331 			progressInfo.completed = 100;
   1332 			progressInfo.total = 100;
   1333 		} else {
   1334 			progressInfo.total =
   1335 				raidPtr->reconControl[row]->numRUsTotal;
   1336 			progressInfo.completed =
   1337 				raidPtr->reconControl[row]->numRUsComplete;
   1338 			progressInfo.remaining = progressInfo.total -
   1339 				progressInfo.completed;
   1340 		}
   1341 		retcode = copyout((caddr_t) &progressInfo,
   1342 				  (caddr_t) *progressInfoPtr,
   1343 				  sizeof(RF_ProgressInfo_t));
   1344 		return (retcode);
   1345 
   1346 	case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
   1347 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1348 			/* This makes no sense on a RAID 0, so tell the
   1349 			   user it's done. */
   1350 			*(int *) data = 100;
   1351 			return(0);
   1352 		}
   1353 		if (raidPtr->parity_rewrite_in_progress == 1) {
   1354 			*(int *) data = 100 *
   1355 				raidPtr->parity_rewrite_stripes_done /
   1356 				raidPtr->Layout.numStripe;
   1357 		} else {
   1358 			*(int *) data = 100;
   1359 		}
   1360 		return (0);
   1361 
   1362 	case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
   1363 		progressInfoPtr = (RF_ProgressInfo_t **) data;
   1364 		if (raidPtr->parity_rewrite_in_progress == 1) {
   1365 			progressInfo.total = raidPtr->Layout.numStripe;
   1366 			progressInfo.completed =
   1367 				raidPtr->parity_rewrite_stripes_done;
   1368 			progressInfo.remaining = progressInfo.total -
   1369 				progressInfo.completed;
   1370 		} else {
   1371 			progressInfo.remaining = 0;
   1372 			progressInfo.completed = 100;
   1373 			progressInfo.total = 100;
   1374 		}
   1375 		retcode = copyout((caddr_t) &progressInfo,
   1376 				  (caddr_t) *progressInfoPtr,
   1377 				  sizeof(RF_ProgressInfo_t));
   1378 		return (retcode);
   1379 
   1380 	case RAIDFRAME_CHECK_COPYBACK_STATUS:
   1381 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1382 			/* This makes no sense on a RAID 0 */
   1383 			*(int *) data = 100;
   1384 			return(0);
   1385 		}
   1386 		if (raidPtr->copyback_in_progress == 1) {
   1387 			*(int *) data = 100 * raidPtr->copyback_stripes_done /
   1388 				raidPtr->Layout.numStripe;
   1389 		} else {
   1390 			*(int *) data = 100;
   1391 		}
   1392 		return (0);
   1393 
   1394 	case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
   1395 		progressInfoPtr = (RF_ProgressInfo_t **) data;
   1396 		if (raidPtr->copyback_in_progress == 1) {
   1397 			progressInfo.total = raidPtr->Layout.numStripe;
   1398 			progressInfo.completed =
   1399 				raidPtr->copyback_stripes_done;
   1400 			progressInfo.remaining = progressInfo.total -
   1401 				progressInfo.completed;
   1402 		} else {
   1403 			progressInfo.remaining = 0;
   1404 			progressInfo.completed = 100;
   1405 			progressInfo.total = 100;
   1406 		}
   1407 		retcode = copyout((caddr_t) &progressInfo,
   1408 				  (caddr_t) *progressInfoPtr,
   1409 				  sizeof(RF_ProgressInfo_t));
   1410 		return (retcode);
   1411 
   1412 		/* the sparetable daemon calls this to wait for the kernel to
   1413 		 * need a spare table. this ioctl does not return until a
   1414 		 * spare table is needed. XXX -- calling mpsleep here in the
   1415 		 * ioctl code is almost certainly wrong and evil. -- XXX XXX
   1416 		 * -- I should either compute the spare table in the kernel,
   1417 		 * or have a different -- XXX XXX -- interface (a different
   1418 		 * character device) for delivering the table     -- XXX */
   1419 #if 0
   1420 	case RAIDFRAME_SPARET_WAIT:
   1421 		RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1422 		while (!rf_sparet_wait_queue)
   1423 			mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE);
   1424 		waitreq = rf_sparet_wait_queue;
   1425 		rf_sparet_wait_queue = rf_sparet_wait_queue->next;
   1426 		RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1427 
   1428 		/* structure assignment */
   1429 		*((RF_SparetWait_t *) data) = *waitreq;
   1430 
   1431 		RF_Free(waitreq, sizeof(*waitreq));
   1432 		return (0);
   1433 
   1434 		/* wakes up a process waiting on SPARET_WAIT and puts an error
   1435 		 * code in it that will cause the dameon to exit */
   1436 	case RAIDFRAME_ABORT_SPARET_WAIT:
   1437 		RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
   1438 		waitreq->fcol = -1;
   1439 		RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1440 		waitreq->next = rf_sparet_wait_queue;
   1441 		rf_sparet_wait_queue = waitreq;
   1442 		RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1443 		wakeup(&rf_sparet_wait_queue);
   1444 		return (0);
   1445 
   1446 		/* used by the spare table daemon to deliver a spare table
   1447 		 * into the kernel */
   1448 	case RAIDFRAME_SEND_SPARET:
   1449 
   1450 		/* install the spare table */
   1451 		retcode = rf_SetSpareTable(raidPtr, *(void **) data);
   1452 
   1453 		/* respond to the requestor.  the return status of the spare
   1454 		 * table installation is passed in the "fcol" field */
   1455 		RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
   1456 		waitreq->fcol = retcode;
   1457 		RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1458 		waitreq->next = rf_sparet_resp_queue;
   1459 		rf_sparet_resp_queue = waitreq;
   1460 		wakeup(&rf_sparet_resp_queue);
   1461 		RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1462 
   1463 		return (retcode);
   1464 #endif
   1465 
   1466 	default:
   1467 		break; /* fall through to the os-specific code below */
   1468 
   1469 	}
   1470 
   1471 	if (!raidPtr->valid)
   1472 		return (EINVAL);
   1473 
   1474 	/*
   1475 	 * Add support for "regular" device ioctls here.
   1476 	 */
   1477 
   1478 	switch (cmd) {
   1479 	case DIOCGDINFO:
   1480 		*(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
   1481 		break;
   1482 #ifdef __HAVE_OLD_DISKLABEL
   1483 	case ODIOCGDINFO:
   1484 		newlabel = *(rs->sc_dkdev.dk_label);
   1485 		if (newlabel.d_npartitions > OLDMAXPARTITIONS)
   1486 			return ENOTTY;
   1487 		memcpy(data, &newlabel, sizeof (struct olddisklabel));
   1488 		break;
   1489 #endif
   1490 
   1491 	case DIOCGPART:
   1492 		((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
   1493 		((struct partinfo *) data)->part =
   1494 		    &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
   1495 		break;
   1496 
   1497 	case DIOCWDINFO:
   1498 	case DIOCSDINFO:
   1499 #ifdef __HAVE_OLD_DISKLABEL
   1500 	case ODIOCWDINFO:
   1501 	case ODIOCSDINFO:
   1502 #endif
   1503 	{
   1504 		struct disklabel *lp;
   1505 #ifdef __HAVE_OLD_DISKLABEL
   1506 		if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) {
   1507 			memset(&newlabel, 0, sizeof newlabel);
   1508 			memcpy(&newlabel, data, sizeof (struct olddisklabel));
   1509 			lp = &newlabel;
   1510 		} else
   1511 #endif
   1512 		lp = (struct disklabel *)data;
   1513 
   1514 		if ((error = raidlock(rs)) != 0)
   1515 			return (error);
   1516 
   1517 		rs->sc_flags |= RAIDF_LABELLING;
   1518 
   1519 		error = setdisklabel(rs->sc_dkdev.dk_label,
   1520 		    lp, 0, rs->sc_dkdev.dk_cpulabel);
   1521 		if (error == 0) {
   1522 			if (cmd == DIOCWDINFO
   1523 #ifdef __HAVE_OLD_DISKLABEL
   1524 			    || cmd == ODIOCWDINFO
   1525 #endif
   1526 			   )
   1527 				error = writedisklabel(RAIDLABELDEV(dev),
   1528 				    raidstrategy, rs->sc_dkdev.dk_label,
   1529 				    rs->sc_dkdev.dk_cpulabel);
   1530 		}
   1531 		rs->sc_flags &= ~RAIDF_LABELLING;
   1532 
   1533 		raidunlock(rs);
   1534 
   1535 		if (error)
   1536 			return (error);
   1537 		break;
   1538 	}
   1539 
   1540 	case DIOCWLABEL:
   1541 		if (*(int *) data != 0)
   1542 			rs->sc_flags |= RAIDF_WLABEL;
   1543 		else
   1544 			rs->sc_flags &= ~RAIDF_WLABEL;
   1545 		break;
   1546 
   1547 	case DIOCGDEFLABEL:
   1548 		raidgetdefaultlabel(raidPtr, rs, (struct disklabel *) data);
   1549 		break;
   1550 
   1551 #ifdef __HAVE_OLD_DISKLABEL
   1552 	case ODIOCGDEFLABEL:
   1553 		raidgetdefaultlabel(raidPtr, rs, &newlabel);
   1554 		if (newlabel.d_npartitions > OLDMAXPARTITIONS)
   1555 			return ENOTTY;
   1556 		memcpy(data, &newlabel, sizeof (struct olddisklabel));
   1557 		break;
   1558 #endif
   1559 
   1560 	default:
   1561 		retcode = ENOTTY;
   1562 	}
   1563 	return (retcode);
   1564 
   1565 }
   1566 
   1567 
   1568 /* raidinit -- complete the rest of the initialization for the
   1569    RAIDframe device.  */
   1570 
   1571 
   1572 static void
   1573 raidinit(raidPtr)
   1574 	RF_Raid_t *raidPtr;
   1575 {
   1576 	struct raid_softc *rs;
   1577 	int     unit;
   1578 
   1579 	unit = raidPtr->raidid;
   1580 
   1581 	rs = &raid_softc[unit];
   1582 
   1583 	/* XXX should check return code first... */
   1584 	rs->sc_flags |= RAIDF_INITED;
   1585 
   1586 	sprintf(rs->sc_xname, "raid%d", unit);	/* XXX doesn't check bounds. */
   1587 
   1588 	rs->sc_dkdev.dk_name = rs->sc_xname;
   1589 
   1590 	/* disk_attach actually creates space for the CPU disklabel, among
   1591 	 * other things, so it's critical to call this *BEFORE* we try putzing
   1592 	 * with disklabels. */
   1593 
   1594 	disk_attach(&rs->sc_dkdev);
   1595 
   1596 	/* XXX There may be a weird interaction here between this, and
   1597 	 * protectedSectors, as used in RAIDframe.  */
   1598 
   1599 	rs->sc_size = raidPtr->totalSectors;
   1600 
   1601 }
   1602 
   1603 /* wake up the daemon & tell it to get us a spare table
   1604  * XXX
   1605  * the entries in the queues should be tagged with the raidPtr
   1606  * so that in the extremely rare case that two recons happen at once,
   1607  * we know for which device were requesting a spare table
   1608  * XXX
   1609  *
   1610  * XXX This code is not currently used. GO
   1611  */
   1612 int
   1613 rf_GetSpareTableFromDaemon(req)
   1614 	RF_SparetWait_t *req;
   1615 {
   1616 	int     retcode;
   1617 
   1618 	RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1619 	req->next = rf_sparet_wait_queue;
   1620 	rf_sparet_wait_queue = req;
   1621 	wakeup(&rf_sparet_wait_queue);
   1622 
   1623 	/* mpsleep unlocks the mutex */
   1624 	while (!rf_sparet_resp_queue) {
   1625 		tsleep(&rf_sparet_resp_queue, PRIBIO,
   1626 		    "raidframe getsparetable", 0);
   1627 	}
   1628 	req = rf_sparet_resp_queue;
   1629 	rf_sparet_resp_queue = req->next;
   1630 	RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1631 
   1632 	retcode = req->fcol;
   1633 	RF_Free(req, sizeof(*req));	/* this is not the same req as we
   1634 					 * alloc'd */
   1635 	return (retcode);
   1636 }
   1637 
   1638 /* a wrapper around rf_DoAccess that extracts appropriate info from the
   1639  * bp & passes it down.
   1640  * any calls originating in the kernel must use non-blocking I/O
   1641  * do some extra sanity checking to return "appropriate" error values for
   1642  * certain conditions (to make some standard utilities work)
   1643  *
   1644  * Formerly known as: rf_DoAccessKernel
   1645  */
   1646 void
   1647 raidstart(raidPtr)
   1648 	RF_Raid_t *raidPtr;
   1649 {
   1650 	RF_SectorCount_t num_blocks, pb, sum;
   1651 	RF_RaidAddr_t raid_addr;
   1652 	int     retcode;
   1653 	struct partition *pp;
   1654 	daddr_t blocknum;
   1655 	int     unit;
   1656 	struct raid_softc *rs;
   1657 	int     do_async;
   1658 	struct buf *bp;
   1659 
   1660 	unit = raidPtr->raidid;
   1661 	rs = &raid_softc[unit];
   1662 
   1663 	/* quick check to see if anything has died recently */
   1664 	RF_LOCK_MUTEX(raidPtr->mutex);
   1665 	if (raidPtr->numNewFailures > 0) {
   1666 		rf_update_component_labels(raidPtr,
   1667 					   RF_NORMAL_COMPONENT_UPDATE);
   1668 		raidPtr->numNewFailures--;
   1669 	}
   1670 	RF_UNLOCK_MUTEX(raidPtr->mutex);
   1671 
   1672 	/* Check to see if we're at the limit... */
   1673 	RF_LOCK_MUTEX(raidPtr->mutex);
   1674 	while (raidPtr->openings > 0) {
   1675 		RF_UNLOCK_MUTEX(raidPtr->mutex);
   1676 
   1677 		/* get the next item, if any, from the queue */
   1678 		if ((bp = BUFQ_FIRST(&rs->buf_queue)) == NULL) {
   1679 			/* nothing more to do */
   1680 			return;
   1681 		}
   1682 		BUFQ_REMOVE(&rs->buf_queue, bp);
   1683 
   1684 		/* Ok, for the bp we have here, bp->b_blkno is relative to the
   1685 		 * partition.. Need to make it absolute to the underlying
   1686 		 * device.. */
   1687 
   1688 		blocknum = bp->b_blkno;
   1689 		if (DISKPART(bp->b_dev) != RAW_PART) {
   1690 			pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
   1691 			blocknum += pp->p_offset;
   1692 		}
   1693 
   1694 		db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
   1695 			    (int) blocknum));
   1696 
   1697 		db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
   1698 		db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
   1699 
   1700 		/* *THIS* is where we adjust what block we're going to...
   1701 		 * but DO NOT TOUCH bp->b_blkno!!! */
   1702 		raid_addr = blocknum;
   1703 
   1704 		num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
   1705 		pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
   1706 		sum = raid_addr + num_blocks + pb;
   1707 		if (1 || rf_debugKernelAccess) {
   1708 			db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
   1709 				    (int) raid_addr, (int) sum, (int) num_blocks,
   1710 				    (int) pb, (int) bp->b_resid));
   1711 		}
   1712 		if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
   1713 		    || (sum < num_blocks) || (sum < pb)) {
   1714 			bp->b_error = ENOSPC;
   1715 			bp->b_flags |= B_ERROR;
   1716 			bp->b_resid = bp->b_bcount;
   1717 			biodone(bp);
   1718 			RF_LOCK_MUTEX(raidPtr->mutex);
   1719 			continue;
   1720 		}
   1721 		/*
   1722 		 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
   1723 		 */
   1724 
   1725 		if (bp->b_bcount & raidPtr->sectorMask) {
   1726 			bp->b_error = EINVAL;
   1727 			bp->b_flags |= B_ERROR;
   1728 			bp->b_resid = bp->b_bcount;
   1729 			biodone(bp);
   1730 			RF_LOCK_MUTEX(raidPtr->mutex);
   1731 			continue;
   1732 
   1733 		}
   1734 		db1_printf(("Calling DoAccess..\n"));
   1735 
   1736 
   1737 		RF_LOCK_MUTEX(raidPtr->mutex);
   1738 		raidPtr->openings--;
   1739 		RF_UNLOCK_MUTEX(raidPtr->mutex);
   1740 
   1741 		/*
   1742 		 * Everything is async.
   1743 		 */
   1744 		do_async = 1;
   1745 
   1746 		disk_busy(&rs->sc_dkdev);
   1747 
   1748 		/* XXX we're still at splbio() here... do we *really*
   1749 		   need to be? */
   1750 
   1751 		/* don't ever condition on bp->b_flags & B_WRITE.
   1752 		 * always condition on B_READ instead */
   1753 
   1754 		retcode = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
   1755 				      RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
   1756 				      do_async, raid_addr, num_blocks,
   1757 				      bp->b_data, bp, RF_DAG_NONBLOCKING_IO);
   1758 
   1759 		RF_LOCK_MUTEX(raidPtr->mutex);
   1760 	}
   1761 	RF_UNLOCK_MUTEX(raidPtr->mutex);
   1762 }
   1763 
   1764 
   1765 
   1766 
   1767 /* invoke an I/O from kernel mode.  Disk queue should be locked upon entry */
   1768 
   1769 int
   1770 rf_DispatchKernelIO(queue, req)
   1771 	RF_DiskQueue_t *queue;
   1772 	RF_DiskQueueData_t *req;
   1773 {
   1774 	int     op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
   1775 	struct buf *bp;
   1776 	struct raidbuf *raidbp = NULL;
   1777 	struct raid_softc *rs;
   1778 	int     unit;
   1779 	int s;
   1780 
   1781 	s=0;
   1782 	/* s = splbio();*/ /* want to test this */
   1783 	/* XXX along with the vnode, we also need the softc associated with
   1784 	 * this device.. */
   1785 
   1786 	req->queue = queue;
   1787 
   1788 	unit = queue->raidPtr->raidid;
   1789 
   1790 	db1_printf(("DispatchKernelIO unit: %d\n", unit));
   1791 
   1792 	if (unit >= numraid) {
   1793 		printf("Invalid unit number: %d %d\n", unit, numraid);
   1794 		panic("Invalid Unit number in rf_DispatchKernelIO\n");
   1795 	}
   1796 	rs = &raid_softc[unit];
   1797 
   1798 	bp = req->bp;
   1799 #if 1
   1800 	/* XXX when there is a physical disk failure, someone is passing us a
   1801 	 * buffer that contains old stuff!!  Attempt to deal with this problem
   1802 	 * without taking a performance hit... (not sure where the real bug
   1803 	 * is.  It's buried in RAIDframe somewhere) :-(  GO ) */
   1804 
   1805 	if (bp->b_flags & B_ERROR) {
   1806 		bp->b_flags &= ~B_ERROR;
   1807 	}
   1808 	if (bp->b_error != 0) {
   1809 		bp->b_error = 0;
   1810 	}
   1811 #endif
   1812 	raidbp = RAIDGETBUF(rs);
   1813 
   1814 	raidbp->rf_flags = 0;	/* XXX not really used anywhere... */
   1815 
   1816 	/*
   1817 	 * context for raidiodone
   1818 	 */
   1819 	raidbp->rf_obp = bp;
   1820 	raidbp->req = req;
   1821 
   1822 	LIST_INIT(&raidbp->rf_buf.b_dep);
   1823 
   1824 	switch (req->type) {
   1825 	case RF_IO_TYPE_NOP:	/* used primarily to unlock a locked queue */
   1826 		/* XXX need to do something extra here.. */
   1827 		/* I'm leaving this in, as I've never actually seen it used,
   1828 		 * and I'd like folks to report it... GO */
   1829 		printf(("WAKEUP CALLED\n"));
   1830 		queue->numOutstanding++;
   1831 
   1832 		/* XXX need to glue the original buffer into this??  */
   1833 
   1834 		KernelWakeupFunc(&raidbp->rf_buf);
   1835 		break;
   1836 
   1837 	case RF_IO_TYPE_READ:
   1838 	case RF_IO_TYPE_WRITE:
   1839 
   1840 		if (req->tracerec) {
   1841 			RF_ETIMER_START(req->tracerec->timer);
   1842 		}
   1843 		InitBP(&raidbp->rf_buf, queue->rf_cinfo->ci_vp,
   1844 		    op | bp->b_flags, queue->rf_cinfo->ci_dev,
   1845 		    req->sectorOffset, req->numSector,
   1846 		    req->buf, KernelWakeupFunc, (void *) req,
   1847 		    queue->raidPtr->logBytesPerSector, req->b_proc);
   1848 
   1849 		if (rf_debugKernelAccess) {
   1850 			db1_printf(("dispatch: bp->b_blkno = %ld\n",
   1851 				(long) bp->b_blkno));
   1852 		}
   1853 		queue->numOutstanding++;
   1854 		queue->last_deq_sector = req->sectorOffset;
   1855 		/* acc wouldn't have been let in if there were any pending
   1856 		 * reqs at any other priority */
   1857 		queue->curPriority = req->priority;
   1858 
   1859 		db1_printf(("Going for %c to unit %d row %d col %d\n",
   1860 			req->type, unit, queue->row, queue->col));
   1861 		db1_printf(("sector %d count %d (%d bytes) %d\n",
   1862 			(int) req->sectorOffset, (int) req->numSector,
   1863 			(int) (req->numSector <<
   1864 			    queue->raidPtr->logBytesPerSector),
   1865 			(int) queue->raidPtr->logBytesPerSector));
   1866 		if ((raidbp->rf_buf.b_flags & B_READ) == 0) {
   1867 			raidbp->rf_buf.b_vp->v_numoutput++;
   1868 		}
   1869 		VOP_STRATEGY(&raidbp->rf_buf);
   1870 
   1871 		break;
   1872 
   1873 	default:
   1874 		panic("bad req->type in rf_DispatchKernelIO");
   1875 	}
   1876 	db1_printf(("Exiting from DispatchKernelIO\n"));
   1877 	/* splx(s); */ /* want to test this */
   1878 	return (0);
   1879 }
   1880 /* this is the callback function associated with a I/O invoked from
   1881    kernel code.
   1882  */
   1883 static void
   1884 KernelWakeupFunc(vbp)
   1885 	struct buf *vbp;
   1886 {
   1887 	RF_DiskQueueData_t *req = NULL;
   1888 	RF_DiskQueue_t *queue;
   1889 	struct raidbuf *raidbp = (struct raidbuf *) vbp;
   1890 	struct buf *bp;
   1891 	struct raid_softc *rs;
   1892 	int     unit;
   1893 	int s;
   1894 
   1895 	s = splbio();
   1896 	db1_printf(("recovering the request queue:\n"));
   1897 	req = raidbp->req;
   1898 
   1899 	bp = raidbp->rf_obp;
   1900 
   1901 	queue = (RF_DiskQueue_t *) req->queue;
   1902 
   1903 	if (raidbp->rf_buf.b_flags & B_ERROR) {
   1904 		bp->b_flags |= B_ERROR;
   1905 		bp->b_error = raidbp->rf_buf.b_error ?
   1906 		    raidbp->rf_buf.b_error : EIO;
   1907 	}
   1908 
   1909 	/* XXX methinks this could be wrong... */
   1910 #if 1
   1911 	bp->b_resid = raidbp->rf_buf.b_resid;
   1912 #endif
   1913 
   1914 	if (req->tracerec) {
   1915 		RF_ETIMER_STOP(req->tracerec->timer);
   1916 		RF_ETIMER_EVAL(req->tracerec->timer);
   1917 		RF_LOCK_MUTEX(rf_tracing_mutex);
   1918 		req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
   1919 		req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
   1920 		req->tracerec->num_phys_ios++;
   1921 		RF_UNLOCK_MUTEX(rf_tracing_mutex);
   1922 	}
   1923 	bp->b_bcount = raidbp->rf_buf.b_bcount;	/* XXXX ?? */
   1924 
   1925 	unit = queue->raidPtr->raidid;	/* *Much* simpler :-> */
   1926 
   1927 
   1928 	/* XXX Ok, let's get aggressive... If B_ERROR is set, let's go
   1929 	 * ballistic, and mark the component as hosed... */
   1930 
   1931 	if (bp->b_flags & B_ERROR) {
   1932 		/* Mark the disk as dead */
   1933 		/* but only mark it once... */
   1934 		if (queue->raidPtr->Disks[queue->row][queue->col].status ==
   1935 		    rf_ds_optimal) {
   1936 			printf("raid%d: IO Error.  Marking %s as failed.\n",
   1937 			    unit, queue->raidPtr->Disks[queue->row][queue->col].devname);
   1938 			queue->raidPtr->Disks[queue->row][queue->col].status =
   1939 			    rf_ds_failed;
   1940 			queue->raidPtr->status[queue->row] = rf_rs_degraded;
   1941 			queue->raidPtr->numFailures++;
   1942 			queue->raidPtr->numNewFailures++;
   1943 		} else {	/* Disk is already dead... */
   1944 			/* printf("Disk already marked as dead!\n"); */
   1945 		}
   1946 
   1947 	}
   1948 
   1949 	rs = &raid_softc[unit];
   1950 	RAIDPUTBUF(rs, raidbp);
   1951 
   1952 	rf_DiskIOComplete(queue, req, (bp->b_flags & B_ERROR) ? 1 : 0);
   1953 	(req->CompleteFunc) (req->argument, (bp->b_flags & B_ERROR) ? 1 : 0);
   1954 
   1955 	splx(s);
   1956 }
   1957 
   1958 
   1959 
   1960 /*
   1961  * initialize a buf structure for doing an I/O in the kernel.
   1962  */
   1963 static void
   1964 InitBP(bp, b_vp, rw_flag, dev, startSect, numSect, buf, cbFunc, cbArg,
   1965        logBytesPerSector, b_proc)
   1966 	struct buf *bp;
   1967 	struct vnode *b_vp;
   1968 	unsigned rw_flag;
   1969 	dev_t dev;
   1970 	RF_SectorNum_t startSect;
   1971 	RF_SectorCount_t numSect;
   1972 	caddr_t buf;
   1973 	void (*cbFunc) (struct buf *);
   1974 	void *cbArg;
   1975 	int logBytesPerSector;
   1976 	struct proc *b_proc;
   1977 {
   1978 	/* bp->b_flags       = B_PHYS | rw_flag; */
   1979 	bp->b_flags = B_CALL | rw_flag;	/* XXX need B_PHYS here too??? */
   1980 	bp->b_bcount = numSect << logBytesPerSector;
   1981 	bp->b_bufsize = bp->b_bcount;
   1982 	bp->b_error = 0;
   1983 	bp->b_dev = dev;
   1984 	bp->b_data = buf;
   1985 	bp->b_blkno = startSect;
   1986 	bp->b_resid = bp->b_bcount;	/* XXX is this right!??!?!! */
   1987 	if (bp->b_bcount == 0) {
   1988 		panic("bp->b_bcount is zero in InitBP!!\n");
   1989 	}
   1990 	bp->b_proc = b_proc;
   1991 	bp->b_iodone = cbFunc;
   1992 	bp->b_vp = b_vp;
   1993 
   1994 }
   1995 
   1996 static void
   1997 raidgetdefaultlabel(raidPtr, rs, lp)
   1998 	RF_Raid_t *raidPtr;
   1999 	struct raid_softc *rs;
   2000 	struct disklabel *lp;
   2001 {
   2002 	db1_printf(("Building a default label...\n"));
   2003 	memset(lp, 0, sizeof(*lp));
   2004 
   2005 	/* fabricate a label... */
   2006 	lp->d_secperunit = raidPtr->totalSectors;
   2007 	lp->d_secsize = raidPtr->bytesPerSector;
   2008 	lp->d_nsectors = raidPtr->Layout.dataSectorsPerStripe;
   2009 	lp->d_ntracks = 4 * raidPtr->numCol;
   2010 	lp->d_ncylinders = raidPtr->totalSectors /
   2011 		(lp->d_nsectors * lp->d_ntracks);
   2012 	lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
   2013 
   2014 	strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
   2015 	lp->d_type = DTYPE_RAID;
   2016 	strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
   2017 	lp->d_rpm = 3600;
   2018 	lp->d_interleave = 1;
   2019 	lp->d_flags = 0;
   2020 
   2021 	lp->d_partitions[RAW_PART].p_offset = 0;
   2022 	lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors;
   2023 	lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
   2024 	lp->d_npartitions = RAW_PART + 1;
   2025 
   2026 	lp->d_magic = DISKMAGIC;
   2027 	lp->d_magic2 = DISKMAGIC;
   2028 	lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
   2029 
   2030 }
   2031 /*
   2032  * Read the disklabel from the raid device.  If one is not present, fake one
   2033  * up.
   2034  */
   2035 static void
   2036 raidgetdisklabel(dev)
   2037 	dev_t   dev;
   2038 {
   2039 	int     unit = raidunit(dev);
   2040 	struct raid_softc *rs = &raid_softc[unit];
   2041 	char   *errstring;
   2042 	struct disklabel *lp = rs->sc_dkdev.dk_label;
   2043 	struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel;
   2044 	RF_Raid_t *raidPtr;
   2045 
   2046 	db1_printf(("Getting the disklabel...\n"));
   2047 
   2048 	memset(clp, 0, sizeof(*clp));
   2049 
   2050 	raidPtr = raidPtrs[unit];
   2051 
   2052 	raidgetdefaultlabel(raidPtr, rs, lp);
   2053 
   2054 	/*
   2055 	 * Call the generic disklabel extraction routine.
   2056 	 */
   2057 	errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
   2058 	    rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
   2059 	if (errstring)
   2060 		raidmakedisklabel(rs);
   2061 	else {
   2062 		int     i;
   2063 		struct partition *pp;
   2064 
   2065 		/*
   2066 		 * Sanity check whether the found disklabel is valid.
   2067 		 *
   2068 		 * This is necessary since total size of the raid device
   2069 		 * may vary when an interleave is changed even though exactly
   2070 		 * same componets are used, and old disklabel may used
   2071 		 * if that is found.
   2072 		 */
   2073 		if (lp->d_secperunit != rs->sc_size)
   2074 			printf("WARNING: %s: "
   2075 			    "total sector size in disklabel (%d) != "
   2076 			    "the size of raid (%ld)\n", rs->sc_xname,
   2077 			    lp->d_secperunit, (long) rs->sc_size);
   2078 		for (i = 0; i < lp->d_npartitions; i++) {
   2079 			pp = &lp->d_partitions[i];
   2080 			if (pp->p_offset + pp->p_size > rs->sc_size)
   2081 				printf("WARNING: %s: end of partition `%c' "
   2082 				    "exceeds the size of raid (%ld)\n",
   2083 				    rs->sc_xname, 'a' + i, (long) rs->sc_size);
   2084 		}
   2085 	}
   2086 
   2087 }
   2088 /*
   2089  * Take care of things one might want to take care of in the event
   2090  * that a disklabel isn't present.
   2091  */
   2092 static void
   2093 raidmakedisklabel(rs)
   2094 	struct raid_softc *rs;
   2095 {
   2096 	struct disklabel *lp = rs->sc_dkdev.dk_label;
   2097 	db1_printf(("Making a label..\n"));
   2098 
   2099 	/*
   2100 	 * For historical reasons, if there's no disklabel present
   2101 	 * the raw partition must be marked FS_BSDFFS.
   2102 	 */
   2103 
   2104 	lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
   2105 
   2106 	strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
   2107 
   2108 	lp->d_checksum = dkcksum(lp);
   2109 }
   2110 /*
   2111  * Lookup the provided name in the filesystem.  If the file exists,
   2112  * is a valid block device, and isn't being used by anyone else,
   2113  * set *vpp to the file's vnode.
   2114  * You'll find the original of this in ccd.c
   2115  */
   2116 int
   2117 raidlookup(path, p, vpp)
   2118 	char   *path;
   2119 	struct proc *p;
   2120 	struct vnode **vpp;	/* result */
   2121 {
   2122 	struct nameidata nd;
   2123 	struct vnode *vp;
   2124 	struct vattr va;
   2125 	int     error;
   2126 
   2127 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, path, p);
   2128 	if ((error = vn_open(&nd, FREAD | FWRITE, 0)) != 0) {
   2129 #ifdef DEBUG
   2130 		printf("RAIDframe: vn_open returned %d\n", error);
   2131 #endif
   2132 		return (error);
   2133 	}
   2134 	vp = nd.ni_vp;
   2135 	if (vp->v_usecount > 1) {
   2136 		VOP_UNLOCK(vp, 0);
   2137 		(void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
   2138 		return (EBUSY);
   2139 	}
   2140 	if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)) != 0) {
   2141 		VOP_UNLOCK(vp, 0);
   2142 		(void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
   2143 		return (error);
   2144 	}
   2145 	/* XXX: eventually we should handle VREG, too. */
   2146 	if (va.va_type != VBLK) {
   2147 		VOP_UNLOCK(vp, 0);
   2148 		(void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
   2149 		return (ENOTBLK);
   2150 	}
   2151 	VOP_UNLOCK(vp, 0);
   2152 	*vpp = vp;
   2153 	return (0);
   2154 }
   2155 /*
   2156  * Wait interruptibly for an exclusive lock.
   2157  *
   2158  * XXX
   2159  * Several drivers do this; it should be abstracted and made MP-safe.
   2160  * (Hmm... where have we seen this warning before :->  GO )
   2161  */
   2162 static int
   2163 raidlock(rs)
   2164 	struct raid_softc *rs;
   2165 {
   2166 	int     error;
   2167 
   2168 	while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
   2169 		rs->sc_flags |= RAIDF_WANTED;
   2170 		if ((error =
   2171 			tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
   2172 			return (error);
   2173 	}
   2174 	rs->sc_flags |= RAIDF_LOCKED;
   2175 	return (0);
   2176 }
   2177 /*
   2178  * Unlock and wake up any waiters.
   2179  */
   2180 static void
   2181 raidunlock(rs)
   2182 	struct raid_softc *rs;
   2183 {
   2184 
   2185 	rs->sc_flags &= ~RAIDF_LOCKED;
   2186 	if ((rs->sc_flags & RAIDF_WANTED) != 0) {
   2187 		rs->sc_flags &= ~RAIDF_WANTED;
   2188 		wakeup(rs);
   2189 	}
   2190 }
   2191 
   2192 
   2193 #define RF_COMPONENT_INFO_OFFSET  16384 /* bytes */
   2194 #define RF_COMPONENT_INFO_SIZE     1024 /* bytes */
   2195 
   2196 int
   2197 raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter)
   2198 {
   2199 	RF_ComponentLabel_t clabel;
   2200 	raidread_component_label(dev, b_vp, &clabel);
   2201 	clabel.mod_counter = mod_counter;
   2202 	clabel.clean = RF_RAID_CLEAN;
   2203 	raidwrite_component_label(dev, b_vp, &clabel);
   2204 	return(0);
   2205 }
   2206 
   2207 
   2208 int
   2209 raidmarkdirty(dev_t dev, struct vnode *b_vp, int mod_counter)
   2210 {
   2211 	RF_ComponentLabel_t clabel;
   2212 	raidread_component_label(dev, b_vp, &clabel);
   2213 	clabel.mod_counter = mod_counter;
   2214 	clabel.clean = RF_RAID_DIRTY;
   2215 	raidwrite_component_label(dev, b_vp, &clabel);
   2216 	return(0);
   2217 }
   2218 
   2219 /* ARGSUSED */
   2220 int
   2221 raidread_component_label(dev, b_vp, clabel)
   2222 	dev_t dev;
   2223 	struct vnode *b_vp;
   2224 	RF_ComponentLabel_t *clabel;
   2225 {
   2226 	struct buf *bp;
   2227 	const struct bdevsw *bdev;
   2228 	int error;
   2229 
   2230 	/* XXX should probably ensure that we don't try to do this if
   2231 	   someone has changed rf_protected_sectors. */
   2232 
   2233 	if (b_vp == NULL) {
   2234 		/* For whatever reason, this component is not valid.
   2235 		   Don't try to read a component label from it. */
   2236 		return(EINVAL);
   2237 	}
   2238 
   2239 	/* get a block of the appropriate size... */
   2240 	bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
   2241 	bp->b_dev = dev;
   2242 
   2243 	/* get our ducks in a row for the read */
   2244 	bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
   2245 	bp->b_bcount = RF_COMPONENT_INFO_SIZE;
   2246 	bp->b_flags |= B_READ;
   2247  	bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
   2248 
   2249 	bdev = bdevsw_lookup(bp->b_dev);
   2250 	if (bdev == NULL)
   2251 		return (ENXIO);
   2252 	(*bdev->d_strategy)(bp);
   2253 
   2254 	error = biowait(bp);
   2255 
   2256 	if (!error) {
   2257 		memcpy(clabel, bp->b_data,
   2258 		       sizeof(RF_ComponentLabel_t));
   2259 #if 0
   2260 		rf_print_component_label( clabel );
   2261 #endif
   2262         } else {
   2263 #if 0
   2264 		printf("Failed to read RAID component label!\n");
   2265 #endif
   2266 	}
   2267 
   2268 	brelse(bp);
   2269 	return(error);
   2270 }
   2271 /* ARGSUSED */
   2272 int
   2273 raidwrite_component_label(dev, b_vp, clabel)
   2274 	dev_t dev;
   2275 	struct vnode *b_vp;
   2276 	RF_ComponentLabel_t *clabel;
   2277 {
   2278 	struct buf *bp;
   2279 	const struct bdevsw *bdev;
   2280 	int error;
   2281 
   2282 	/* get a block of the appropriate size... */
   2283 	bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
   2284 	bp->b_dev = dev;
   2285 
   2286 	/* get our ducks in a row for the write */
   2287 	bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
   2288 	bp->b_bcount = RF_COMPONENT_INFO_SIZE;
   2289 	bp->b_flags |= B_WRITE;
   2290  	bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
   2291 
   2292 	memset(bp->b_data, 0, RF_COMPONENT_INFO_SIZE );
   2293 
   2294 	memcpy(bp->b_data, clabel, sizeof(RF_ComponentLabel_t));
   2295 
   2296 	bdev = bdevsw_lookup(bp->b_dev);
   2297 	if (bdev == NULL)
   2298 		return (ENXIO);
   2299 	(*bdev->d_strategy)(bp);
   2300 	error = biowait(bp);
   2301 	brelse(bp);
   2302 	if (error) {
   2303 #if 1
   2304 		printf("Failed to write RAID component info!\n");
   2305 #endif
   2306 	}
   2307 
   2308 	return(error);
   2309 }
   2310 
   2311 void
   2312 rf_markalldirty(raidPtr)
   2313 	RF_Raid_t *raidPtr;
   2314 {
   2315 	RF_ComponentLabel_t clabel;
   2316 	int r,c;
   2317 
   2318 	raidPtr->mod_counter++;
   2319 	for (r = 0; r < raidPtr->numRow; r++) {
   2320 		for (c = 0; c < raidPtr->numCol; c++) {
   2321 			/* we don't want to touch (at all) a disk that has
   2322 			   failed */
   2323 			if (!RF_DEAD_DISK(raidPtr->Disks[r][c].status)) {
   2324 				raidread_component_label(
   2325 					raidPtr->Disks[r][c].dev,
   2326 					raidPtr->raid_cinfo[r][c].ci_vp,
   2327 					&clabel);
   2328 				if (clabel.status == rf_ds_spared) {
   2329 					/* XXX do something special...
   2330 					 but whatever you do, don't
   2331 					 try to access it!! */
   2332 				} else {
   2333 #if 0
   2334 				clabel.status =
   2335 					raidPtr->Disks[r][c].status;
   2336 				raidwrite_component_label(
   2337 					raidPtr->Disks[r][c].dev,
   2338 					raidPtr->raid_cinfo[r][c].ci_vp,
   2339 					&clabel);
   2340 #endif
   2341 				raidmarkdirty(
   2342 				       raidPtr->Disks[r][c].dev,
   2343 				       raidPtr->raid_cinfo[r][c].ci_vp,
   2344 				       raidPtr->mod_counter);
   2345 				}
   2346 			}
   2347 		}
   2348 	}
   2349 	/* printf("Component labels marked dirty.\n"); */
   2350 #if 0
   2351 	for( c = 0; c < raidPtr->numSpare ; c++) {
   2352 		sparecol = raidPtr->numCol + c;
   2353 		if (raidPtr->Disks[r][sparecol].status == rf_ds_used_spare) {
   2354 			/*
   2355 
   2356 			   XXX this is where we get fancy and map this spare
   2357 			   into it's correct spot in the array.
   2358 
   2359 			 */
   2360 			/*
   2361 
   2362 			   we claim this disk is "optimal" if it's
   2363 			   rf_ds_used_spare, as that means it should be
   2364 			   directly substitutable for the disk it replaced.
   2365 			   We note that too...
   2366 
   2367 			 */
   2368 
   2369 			for(i=0;i<raidPtr->numRow;i++) {
   2370 				for(j=0;j<raidPtr->numCol;j++) {
   2371 					if ((raidPtr->Disks[i][j].spareRow ==
   2372 					     r) &&
   2373 					    (raidPtr->Disks[i][j].spareCol ==
   2374 					     sparecol)) {
   2375 						srow = r;
   2376 						scol = sparecol;
   2377 						break;
   2378 					}
   2379 				}
   2380 			}
   2381 
   2382 			raidread_component_label(
   2383 				      raidPtr->Disks[r][sparecol].dev,
   2384 				      raidPtr->raid_cinfo[r][sparecol].ci_vp,
   2385 				      &clabel);
   2386 			/* make sure status is noted */
   2387 			clabel.version = RF_COMPONENT_LABEL_VERSION;
   2388 			clabel.mod_counter = raidPtr->mod_counter;
   2389 			clabel.serial_number = raidPtr->serial_number;
   2390 			clabel.row = srow;
   2391 			clabel.column = scol;
   2392 			clabel.num_rows = raidPtr->numRow;
   2393 			clabel.num_columns = raidPtr->numCol;
   2394 			clabel.clean = RF_RAID_DIRTY; /* changed in a bit*/
   2395 			clabel.status = rf_ds_optimal;
   2396 			raidwrite_component_label(
   2397 				      raidPtr->Disks[r][sparecol].dev,
   2398 				      raidPtr->raid_cinfo[r][sparecol].ci_vp,
   2399 				      &clabel);
   2400 			raidmarkclean( raidPtr->Disks[r][sparecol].dev,
   2401 			              raidPtr->raid_cinfo[r][sparecol].ci_vp);
   2402 		}
   2403 	}
   2404 
   2405 #endif
   2406 }
   2407 
   2408 
   2409 void
   2410 rf_update_component_labels(raidPtr, final)
   2411 	RF_Raid_t *raidPtr;
   2412 	int final;
   2413 {
   2414 	RF_ComponentLabel_t clabel;
   2415 	int sparecol;
   2416 	int r,c;
   2417 	int i,j;
   2418 	int srow, scol;
   2419 
   2420 	srow = -1;
   2421 	scol = -1;
   2422 
   2423 	/* XXX should do extra checks to make sure things really are clean,
   2424 	   rather than blindly setting the clean bit... */
   2425 
   2426 	raidPtr->mod_counter++;
   2427 
   2428 	for (r = 0; r < raidPtr->numRow; r++) {
   2429 		for (c = 0; c < raidPtr->numCol; c++) {
   2430 			if (raidPtr->Disks[r][c].status == rf_ds_optimal) {
   2431 				raidread_component_label(
   2432 					raidPtr->Disks[r][c].dev,
   2433 					raidPtr->raid_cinfo[r][c].ci_vp,
   2434 					&clabel);
   2435 				/* make sure status is noted */
   2436 				clabel.status = rf_ds_optimal;
   2437 				/* bump the counter */
   2438 				clabel.mod_counter = raidPtr->mod_counter;
   2439 
   2440 				raidwrite_component_label(
   2441 					raidPtr->Disks[r][c].dev,
   2442 					raidPtr->raid_cinfo[r][c].ci_vp,
   2443 					&clabel);
   2444 				if (final == RF_FINAL_COMPONENT_UPDATE) {
   2445 					if (raidPtr->parity_good == RF_RAID_CLEAN) {
   2446 						raidmarkclean(
   2447 							      raidPtr->Disks[r][c].dev,
   2448 							      raidPtr->raid_cinfo[r][c].ci_vp,
   2449 							      raidPtr->mod_counter);
   2450 					}
   2451 				}
   2452 			}
   2453 			/* else we don't touch it.. */
   2454 		}
   2455 	}
   2456 
   2457 	for( c = 0; c < raidPtr->numSpare ; c++) {
   2458 		sparecol = raidPtr->numCol + c;
   2459 		/* Need to ensure that the reconstruct actually completed! */
   2460 		if (raidPtr->Disks[0][sparecol].status == rf_ds_used_spare) {
   2461 			/*
   2462 
   2463 			   we claim this disk is "optimal" if it's
   2464 			   rf_ds_used_spare, as that means it should be
   2465 			   directly substitutable for the disk it replaced.
   2466 			   We note that too...
   2467 
   2468 			 */
   2469 
   2470 			for(i=0;i<raidPtr->numRow;i++) {
   2471 				for(j=0;j<raidPtr->numCol;j++) {
   2472 					if ((raidPtr->Disks[i][j].spareRow ==
   2473 					     0) &&
   2474 					    (raidPtr->Disks[i][j].spareCol ==
   2475 					     sparecol)) {
   2476 						srow = i;
   2477 						scol = j;
   2478 						break;
   2479 					}
   2480 				}
   2481 			}
   2482 
   2483 			/* XXX shouldn't *really* need this... */
   2484 			raidread_component_label(
   2485 				      raidPtr->Disks[0][sparecol].dev,
   2486 				      raidPtr->raid_cinfo[0][sparecol].ci_vp,
   2487 				      &clabel);
   2488 			/* make sure status is noted */
   2489 
   2490 			raid_init_component_label(raidPtr, &clabel);
   2491 
   2492 			clabel.mod_counter = raidPtr->mod_counter;
   2493 			clabel.row = srow;
   2494 			clabel.column = scol;
   2495 			clabel.status = rf_ds_optimal;
   2496 
   2497 			raidwrite_component_label(
   2498 				      raidPtr->Disks[0][sparecol].dev,
   2499 				      raidPtr->raid_cinfo[0][sparecol].ci_vp,
   2500 				      &clabel);
   2501 			if (final == RF_FINAL_COMPONENT_UPDATE) {
   2502 				if (raidPtr->parity_good == RF_RAID_CLEAN) {
   2503 					raidmarkclean( raidPtr->Disks[0][sparecol].dev,
   2504 						       raidPtr->raid_cinfo[0][sparecol].ci_vp,
   2505 						       raidPtr->mod_counter);
   2506 				}
   2507 			}
   2508 		}
   2509 	}
   2510 	/* 	printf("Component labels updated\n"); */
   2511 }
   2512 
   2513 void
   2514 rf_close_component(raidPtr, vp, auto_configured)
   2515 	RF_Raid_t *raidPtr;
   2516 	struct vnode *vp;
   2517 	int auto_configured;
   2518 {
   2519 	struct proc *p;
   2520 
   2521 	p = raidPtr->engine_thread;
   2522 
   2523 	if (vp != NULL) {
   2524 		if (auto_configured == 1) {
   2525 			vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
   2526 			VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
   2527 			vput(vp);
   2528 
   2529 		} else {
   2530 			(void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
   2531 		}
   2532 	} else {
   2533 		printf("vnode was NULL\n");
   2534 	}
   2535 }
   2536 
   2537 
   2538 void
   2539 rf_UnconfigureVnodes(raidPtr)
   2540 	RF_Raid_t *raidPtr;
   2541 {
   2542 	int r,c;
   2543 	struct proc *p;
   2544 	struct vnode *vp;
   2545 	int acd;
   2546 
   2547 
   2548 	/* We take this opportunity to close the vnodes like we should.. */
   2549 
   2550 	p = raidPtr->engine_thread;
   2551 
   2552 	for (r = 0; r < raidPtr->numRow; r++) {
   2553 		for (c = 0; c < raidPtr->numCol; c++) {
   2554 			printf("Closing vnode for row: %d col: %d\n", r, c);
   2555 			vp = raidPtr->raid_cinfo[r][c].ci_vp;
   2556 			acd = raidPtr->Disks[r][c].auto_configured;
   2557 			rf_close_component(raidPtr, vp, acd);
   2558 			raidPtr->raid_cinfo[r][c].ci_vp = NULL;
   2559 			raidPtr->Disks[r][c].auto_configured = 0;
   2560 		}
   2561 	}
   2562 	for (r = 0; r < raidPtr->numSpare; r++) {
   2563 		printf("Closing vnode for spare: %d\n", r);
   2564 		vp = raidPtr->raid_cinfo[0][raidPtr->numCol + r].ci_vp;
   2565 		acd = raidPtr->Disks[0][raidPtr->numCol + r].auto_configured;
   2566 		rf_close_component(raidPtr, vp, acd);
   2567 		raidPtr->raid_cinfo[0][raidPtr->numCol + r].ci_vp = NULL;
   2568 		raidPtr->Disks[0][raidPtr->numCol + r].auto_configured = 0;
   2569 	}
   2570 }
   2571 
   2572 
   2573 void
   2574 rf_ReconThread(req)
   2575 	struct rf_recon_req *req;
   2576 {
   2577 	int     s;
   2578 	RF_Raid_t *raidPtr;
   2579 
   2580 	s = splbio();
   2581 	raidPtr = (RF_Raid_t *) req->raidPtr;
   2582 	raidPtr->recon_in_progress = 1;
   2583 
   2584 	rf_FailDisk((RF_Raid_t *) req->raidPtr, req->row, req->col,
   2585 		    ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
   2586 
   2587 	/* XXX get rid of this! we don't need it at all.. */
   2588 	RF_Free(req, sizeof(*req));
   2589 
   2590 	raidPtr->recon_in_progress = 0;
   2591 	splx(s);
   2592 
   2593 	/* That's all... */
   2594 	kthread_exit(0);        /* does not return */
   2595 }
   2596 
   2597 void
   2598 rf_RewriteParityThread(raidPtr)
   2599 	RF_Raid_t *raidPtr;
   2600 {
   2601 	int retcode;
   2602 	int s;
   2603 
   2604 	raidPtr->parity_rewrite_in_progress = 1;
   2605 	s = splbio();
   2606 	retcode = rf_RewriteParity(raidPtr);
   2607 	splx(s);
   2608 	if (retcode) {
   2609 		printf("raid%d: Error re-writing parity!\n",raidPtr->raidid);
   2610 	} else {
   2611 		/* set the clean bit!  If we shutdown correctly,
   2612 		   the clean bit on each component label will get
   2613 		   set */
   2614 		raidPtr->parity_good = RF_RAID_CLEAN;
   2615 	}
   2616 	raidPtr->parity_rewrite_in_progress = 0;
   2617 
   2618 	/* Anyone waiting for us to stop?  If so, inform them... */
   2619 	if (raidPtr->waitShutdown) {
   2620 		wakeup(&raidPtr->parity_rewrite_in_progress);
   2621 	}
   2622 
   2623 	/* That's all... */
   2624 	kthread_exit(0);        /* does not return */
   2625 }
   2626 
   2627 
   2628 void
   2629 rf_CopybackThread(raidPtr)
   2630 	RF_Raid_t *raidPtr;
   2631 {
   2632 	int s;
   2633 
   2634 	raidPtr->copyback_in_progress = 1;
   2635 	s = splbio();
   2636 	rf_CopybackReconstructedData(raidPtr);
   2637 	splx(s);
   2638 	raidPtr->copyback_in_progress = 0;
   2639 
   2640 	/* That's all... */
   2641 	kthread_exit(0);        /* does not return */
   2642 }
   2643 
   2644 
   2645 void
   2646 rf_ReconstructInPlaceThread(req)
   2647 	struct rf_recon_req *req;
   2648 {
   2649 	int retcode;
   2650 	int s;
   2651 	RF_Raid_t *raidPtr;
   2652 
   2653 	s = splbio();
   2654 	raidPtr = req->raidPtr;
   2655 	raidPtr->recon_in_progress = 1;
   2656 	retcode = rf_ReconstructInPlace(raidPtr, req->row, req->col);
   2657 	RF_Free(req, sizeof(*req));
   2658 	raidPtr->recon_in_progress = 0;
   2659 	splx(s);
   2660 
   2661 	/* That's all... */
   2662 	kthread_exit(0);        /* does not return */
   2663 }
   2664 
   2665 void
   2666 rf_mountroot_hook(dev)
   2667 	struct device *dev;
   2668 {
   2669 
   2670 }
   2671 
   2672 
   2673 RF_AutoConfig_t *
   2674 rf_find_raid_components()
   2675 {
   2676 	struct vnode *vp;
   2677 	struct disklabel label;
   2678 	struct device *dv;
   2679 	dev_t dev;
   2680 	int bmajor;
   2681 	int error;
   2682 	int i;
   2683 	int good_one;
   2684 	RF_ComponentLabel_t *clabel;
   2685 	RF_AutoConfig_t *ac_list;
   2686 	RF_AutoConfig_t *ac;
   2687 
   2688 
   2689 	/* initialize the AutoConfig list */
   2690 	ac_list = NULL;
   2691 
   2692 	/* we begin by trolling through *all* the devices on the system */
   2693 
   2694 	for (dv = alldevs.tqh_first; dv != NULL;
   2695 	     dv = dv->dv_list.tqe_next) {
   2696 
   2697 		/* we are only interested in disks... */
   2698 		if (dv->dv_class != DV_DISK)
   2699 			continue;
   2700 
   2701 		/* we don't care about floppies... */
   2702 		if (!strcmp(dv->dv_cfdata->cf_driver->cd_name,"fd")) {
   2703 			continue;
   2704 		}
   2705 		/* hdfd is the Atari/Hades floppy driver */
   2706 		if (!strcmp(dv->dv_cfdata->cf_driver->cd_name,"hdfd")) {
   2707 			continue;
   2708 		}
   2709 
   2710 		/* need to find the device_name_to_block_device_major stuff */
   2711 		bmajor = devsw_name2blk(dv->dv_xname, NULL, 0);
   2712 
   2713 		/* get a vnode for the raw partition of this disk */
   2714 
   2715 		dev = MAKEDISKDEV(bmajor, dv->dv_unit, RAW_PART);
   2716 		if (bdevvp(dev, &vp))
   2717 			panic("RAID can't alloc vnode");
   2718 
   2719 		error = VOP_OPEN(vp, FREAD, NOCRED, 0);
   2720 
   2721 		if (error) {
   2722 			/* "Who cares."  Continue looking
   2723 			   for something that exists*/
   2724 			vput(vp);
   2725 			continue;
   2726 		}
   2727 
   2728 		/* Ok, the disk exists.  Go get the disklabel. */
   2729 		error = VOP_IOCTL(vp, DIOCGDINFO, (caddr_t)&label,
   2730 				  FREAD, NOCRED, 0);
   2731 		if (error) {
   2732 			/*
   2733 			 * XXX can't happen - open() would
   2734 			 * have errored out (or faked up one)
   2735 			 */
   2736 			printf("can't get label for dev %s%c (%d)!?!?\n",
   2737 			       dv->dv_xname, 'a' + RAW_PART, error);
   2738 		}
   2739 
   2740 		/* don't need this any more.  We'll allocate it again
   2741 		   a little later if we really do... */
   2742 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
   2743 		VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
   2744 		vput(vp);
   2745 
   2746 		for (i=0; i < label.d_npartitions; i++) {
   2747 			/* We only support partitions marked as RAID */
   2748 			if (label.d_partitions[i].p_fstype != FS_RAID)
   2749 				continue;
   2750 
   2751 			dev = MAKEDISKDEV(bmajor, dv->dv_unit, i);
   2752 			if (bdevvp(dev, &vp))
   2753 				panic("RAID can't alloc vnode");
   2754 
   2755 			error = VOP_OPEN(vp, FREAD, NOCRED, 0);
   2756 			if (error) {
   2757 				/* Whatever... */
   2758 				vput(vp);
   2759 				continue;
   2760 			}
   2761 
   2762 			good_one = 0;
   2763 
   2764 			clabel = (RF_ComponentLabel_t *)
   2765 				malloc(sizeof(RF_ComponentLabel_t),
   2766 				       M_RAIDFRAME, M_NOWAIT);
   2767 			if (clabel == NULL) {
   2768 				/* XXX CLEANUP HERE */
   2769 				printf("RAID auto config: out of memory!\n");
   2770 				return(NULL); /* XXX probably should panic? */
   2771 			}
   2772 
   2773 			if (!raidread_component_label(dev, vp, clabel)) {
   2774 				/* Got the label.  Does it look reasonable? */
   2775 				if (rf_reasonable_label(clabel) &&
   2776 				    (clabel->partitionSize <=
   2777 				     label.d_partitions[i].p_size)) {
   2778 #if DEBUG
   2779 					printf("Component on: %s%c: %d\n",
   2780 					       dv->dv_xname, 'a'+i,
   2781 					       label.d_partitions[i].p_size);
   2782 					rf_print_component_label(clabel);
   2783 #endif
   2784 					/* if it's reasonable, add it,
   2785 					   else ignore it. */
   2786 					ac = (RF_AutoConfig_t *)
   2787 						malloc(sizeof(RF_AutoConfig_t),
   2788 						       M_RAIDFRAME,
   2789 						       M_NOWAIT);
   2790 					if (ac == NULL) {
   2791 						/* XXX should panic?? */
   2792 						return(NULL);
   2793 					}
   2794 
   2795 					sprintf(ac->devname, "%s%c",
   2796 						dv->dv_xname, 'a'+i);
   2797 					ac->dev = dev;
   2798 					ac->vp = vp;
   2799 					ac->clabel = clabel;
   2800 					ac->next = ac_list;
   2801 					ac_list = ac;
   2802 					good_one = 1;
   2803 				}
   2804 			}
   2805 			if (!good_one) {
   2806 				/* cleanup */
   2807 				free(clabel, M_RAIDFRAME);
   2808 				vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
   2809 				VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
   2810 				vput(vp);
   2811 			}
   2812 		}
   2813 	}
   2814 	return(ac_list);
   2815 }
   2816 
   2817 static int
   2818 rf_reasonable_label(clabel)
   2819 	RF_ComponentLabel_t *clabel;
   2820 {
   2821 
   2822 	if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) ||
   2823 	     (clabel->version==RF_COMPONENT_LABEL_VERSION)) &&
   2824 	    ((clabel->clean == RF_RAID_CLEAN) ||
   2825 	     (clabel->clean == RF_RAID_DIRTY)) &&
   2826 	    clabel->row >=0 &&
   2827 	    clabel->column >= 0 &&
   2828 	    clabel->num_rows > 0 &&
   2829 	    clabel->num_columns > 0 &&
   2830 	    clabel->row < clabel->num_rows &&
   2831 	    clabel->column < clabel->num_columns &&
   2832 	    clabel->blockSize > 0 &&
   2833 	    clabel->numBlocks > 0) {
   2834 		/* label looks reasonable enough... */
   2835 		return(1);
   2836 	}
   2837 	return(0);
   2838 }
   2839 
   2840 
   2841 void
   2842 rf_print_component_label(clabel)
   2843 	RF_ComponentLabel_t *clabel;
   2844 {
   2845 	printf("   Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
   2846 	       clabel->row, clabel->column,
   2847 	       clabel->num_rows, clabel->num_columns);
   2848 	printf("   Version: %d Serial Number: %d Mod Counter: %d\n",
   2849 	       clabel->version, clabel->serial_number,
   2850 	       clabel->mod_counter);
   2851 	printf("   Clean: %s Status: %d\n",
   2852 	       clabel->clean ? "Yes" : "No", clabel->status );
   2853 	printf("   sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
   2854 	       clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
   2855 	printf("   RAID Level: %c  blocksize: %d numBlocks: %d\n",
   2856 	       (char) clabel->parityConfig, clabel->blockSize,
   2857 	       clabel->numBlocks);
   2858 	printf("   Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No" );
   2859 	printf("   Contains root partition: %s\n",
   2860 	       clabel->root_partition ? "Yes" : "No" );
   2861 	printf("   Last configured as: raid%d\n", clabel->last_unit );
   2862 #if 0
   2863 	   printf("   Config order: %d\n", clabel->config_order);
   2864 #endif
   2865 
   2866 }
   2867 
   2868 RF_ConfigSet_t *
   2869 rf_create_auto_sets(ac_list)
   2870 	RF_AutoConfig_t *ac_list;
   2871 {
   2872 	RF_AutoConfig_t *ac;
   2873 	RF_ConfigSet_t *config_sets;
   2874 	RF_ConfigSet_t *cset;
   2875 	RF_AutoConfig_t *ac_next;
   2876 
   2877 
   2878 	config_sets = NULL;
   2879 
   2880 	/* Go through the AutoConfig list, and figure out which components
   2881 	   belong to what sets.  */
   2882 	ac = ac_list;
   2883 	while(ac!=NULL) {
   2884 		/* we're going to putz with ac->next, so save it here
   2885 		   for use at the end of the loop */
   2886 		ac_next = ac->next;
   2887 
   2888 		if (config_sets == NULL) {
   2889 			/* will need at least this one... */
   2890 			config_sets = (RF_ConfigSet_t *)
   2891 				malloc(sizeof(RF_ConfigSet_t),
   2892 				       M_RAIDFRAME, M_NOWAIT);
   2893 			if (config_sets == NULL) {
   2894 				panic("rf_create_auto_sets: No memory!\n");
   2895 			}
   2896 			/* this one is easy :) */
   2897 			config_sets->ac = ac;
   2898 			config_sets->next = NULL;
   2899 			config_sets->rootable = 0;
   2900 			ac->next = NULL;
   2901 		} else {
   2902 			/* which set does this component fit into? */
   2903 			cset = config_sets;
   2904 			while(cset!=NULL) {
   2905 				if (rf_does_it_fit(cset, ac)) {
   2906 					/* looks like it matches... */
   2907 					ac->next = cset->ac;
   2908 					cset->ac = ac;
   2909 					break;
   2910 				}
   2911 				cset = cset->next;
   2912 			}
   2913 			if (cset==NULL) {
   2914 				/* didn't find a match above... new set..*/
   2915 				cset = (RF_ConfigSet_t *)
   2916 					malloc(sizeof(RF_ConfigSet_t),
   2917 					       M_RAIDFRAME, M_NOWAIT);
   2918 				if (cset == NULL) {
   2919 					panic("rf_create_auto_sets: No memory!\n");
   2920 				}
   2921 				cset->ac = ac;
   2922 				ac->next = NULL;
   2923 				cset->next = config_sets;
   2924 				cset->rootable = 0;
   2925 				config_sets = cset;
   2926 			}
   2927 		}
   2928 		ac = ac_next;
   2929 	}
   2930 
   2931 
   2932 	return(config_sets);
   2933 }
   2934 
   2935 static int
   2936 rf_does_it_fit(cset, ac)
   2937 	RF_ConfigSet_t *cset;
   2938 	RF_AutoConfig_t *ac;
   2939 {
   2940 	RF_ComponentLabel_t *clabel1, *clabel2;
   2941 
   2942 	/* If this one matches the *first* one in the set, that's good
   2943 	   enough, since the other members of the set would have been
   2944 	   through here too... */
   2945 	/* note that we are not checking partitionSize here..
   2946 
   2947 	   Note that we are also not checking the mod_counters here.
   2948 	   If everything else matches execpt the mod_counter, that's
   2949 	   good enough for this test.  We will deal with the mod_counters
   2950 	   a little later in the autoconfiguration process.
   2951 
   2952 	    (clabel1->mod_counter == clabel2->mod_counter) &&
   2953 
   2954 	   The reason we don't check for this is that failed disks
   2955 	   will have lower modification counts.  If those disks are
   2956 	   not added to the set they used to belong to, then they will
   2957 	   form their own set, which may result in 2 different sets,
   2958 	   for example, competing to be configured at raid0, and
   2959 	   perhaps competing to be the root filesystem set.  If the
   2960 	   wrong ones get configured, or both attempt to become /,
   2961 	   weird behaviour and or serious lossage will occur.  Thus we
   2962 	   need to bring them into the fold here, and kick them out at
   2963 	   a later point.
   2964 
   2965 	*/
   2966 
   2967 	clabel1 = cset->ac->clabel;
   2968 	clabel2 = ac->clabel;
   2969 	if ((clabel1->version == clabel2->version) &&
   2970 	    (clabel1->serial_number == clabel2->serial_number) &&
   2971 	    (clabel1->num_rows == clabel2->num_rows) &&
   2972 	    (clabel1->num_columns == clabel2->num_columns) &&
   2973 	    (clabel1->sectPerSU == clabel2->sectPerSU) &&
   2974 	    (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
   2975 	    (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
   2976 	    (clabel1->parityConfig == clabel2->parityConfig) &&
   2977 	    (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
   2978 	    (clabel1->blockSize == clabel2->blockSize) &&
   2979 	    (clabel1->numBlocks == clabel2->numBlocks) &&
   2980 	    (clabel1->autoconfigure == clabel2->autoconfigure) &&
   2981 	    (clabel1->root_partition == clabel2->root_partition) &&
   2982 	    (clabel1->last_unit == clabel2->last_unit) &&
   2983 	    (clabel1->config_order == clabel2->config_order)) {
   2984 		/* if it get's here, it almost *has* to be a match */
   2985 	} else {
   2986 		/* it's not consistent with somebody in the set..
   2987 		   punt */
   2988 		return(0);
   2989 	}
   2990 	/* all was fine.. it must fit... */
   2991 	return(1);
   2992 }
   2993 
   2994 int
   2995 rf_have_enough_components(cset)
   2996 	RF_ConfigSet_t *cset;
   2997 {
   2998 	RF_AutoConfig_t *ac;
   2999 	RF_AutoConfig_t *auto_config;
   3000 	RF_ComponentLabel_t *clabel;
   3001 	int r,c;
   3002 	int num_rows;
   3003 	int num_cols;
   3004 	int num_missing;
   3005 	int mod_counter;
   3006 	int mod_counter_found;
   3007 	int even_pair_failed;
   3008 	char parity_type;
   3009 
   3010 
   3011 	/* check to see that we have enough 'live' components
   3012 	   of this set.  If so, we can configure it if necessary */
   3013 
   3014 	num_rows = cset->ac->clabel->num_rows;
   3015 	num_cols = cset->ac->clabel->num_columns;
   3016 	parity_type = cset->ac->clabel->parityConfig;
   3017 
   3018 	/* XXX Check for duplicate components!?!?!? */
   3019 
   3020 	/* Determine what the mod_counter is supposed to be for this set. */
   3021 
   3022 	mod_counter_found = 0;
   3023 	mod_counter = 0;
   3024 	ac = cset->ac;
   3025 	while(ac!=NULL) {
   3026 		if (mod_counter_found==0) {
   3027 			mod_counter = ac->clabel->mod_counter;
   3028 			mod_counter_found = 1;
   3029 		} else {
   3030 			if (ac->clabel->mod_counter > mod_counter) {
   3031 				mod_counter = ac->clabel->mod_counter;
   3032 			}
   3033 		}
   3034 		ac = ac->next;
   3035 	}
   3036 
   3037 	num_missing = 0;
   3038 	auto_config = cset->ac;
   3039 
   3040 	for(r=0; r<num_rows; r++) {
   3041 		even_pair_failed = 0;
   3042 		for(c=0; c<num_cols; c++) {
   3043 			ac = auto_config;
   3044 			while(ac!=NULL) {
   3045 				if ((ac->clabel->row == r) &&
   3046 				    (ac->clabel->column == c) &&
   3047 				    (ac->clabel->mod_counter == mod_counter)) {
   3048 					/* it's this one... */
   3049 #if DEBUG
   3050 					printf("Found: %s at %d,%d\n",
   3051 					       ac->devname,r,c);
   3052 #endif
   3053 					break;
   3054 				}
   3055 				ac=ac->next;
   3056 			}
   3057 			if (ac==NULL) {
   3058 				/* Didn't find one here! */
   3059 				/* special case for RAID 1, especially
   3060 				   where there are more than 2
   3061 				   components (where RAIDframe treats
   3062 				   things a little differently :( ) */
   3063 				if (parity_type == '1') {
   3064 					if (c%2 == 0) { /* even component */
   3065 						even_pair_failed = 1;
   3066 					} else { /* odd component.  If
   3067                                                     we're failed, and
   3068                                                     so is the even
   3069                                                     component, it's
   3070                                                     "Good Night, Charlie" */
   3071 						if (even_pair_failed == 1) {
   3072 							return(0);
   3073 						}
   3074 					}
   3075 				} else {
   3076 					/* normal accounting */
   3077 					num_missing++;
   3078 				}
   3079 			}
   3080 			if ((parity_type == '1') && (c%2 == 1)) {
   3081 				/* Just did an even component, and we didn't
   3082 				   bail.. reset the even_pair_failed flag,
   3083 				   and go on to the next component.... */
   3084 				even_pair_failed = 0;
   3085 			}
   3086 		}
   3087 	}
   3088 
   3089 	clabel = cset->ac->clabel;
   3090 
   3091 	if (((clabel->parityConfig == '0') && (num_missing > 0)) ||
   3092 	    ((clabel->parityConfig == '4') && (num_missing > 1)) ||
   3093 	    ((clabel->parityConfig == '5') && (num_missing > 1))) {
   3094 		/* XXX this needs to be made *much* more general */
   3095 		/* Too many failures */
   3096 		return(0);
   3097 	}
   3098 	/* otherwise, all is well, and we've got enough to take a kick
   3099 	   at autoconfiguring this set */
   3100 	return(1);
   3101 }
   3102 
   3103 void
   3104 rf_create_configuration(ac,config,raidPtr)
   3105 	RF_AutoConfig_t *ac;
   3106 	RF_Config_t *config;
   3107 	RF_Raid_t *raidPtr;
   3108 {
   3109 	RF_ComponentLabel_t *clabel;
   3110 	int i;
   3111 
   3112 	clabel = ac->clabel;
   3113 
   3114 	/* 1. Fill in the common stuff */
   3115 	config->numRow = clabel->num_rows;
   3116 	config->numCol = clabel->num_columns;
   3117 	config->numSpare = 0; /* XXX should this be set here? */
   3118 	config->sectPerSU = clabel->sectPerSU;
   3119 	config->SUsPerPU = clabel->SUsPerPU;
   3120 	config->SUsPerRU = clabel->SUsPerRU;
   3121 	config->parityConfig = clabel->parityConfig;
   3122 	/* XXX... */
   3123 	strcpy(config->diskQueueType,"fifo");
   3124 	config->maxOutstandingDiskReqs = clabel->maxOutstanding;
   3125 	config->layoutSpecificSize = 0; /* XXX ?? */
   3126 
   3127 	while(ac!=NULL) {
   3128 		/* row/col values will be in range due to the checks
   3129 		   in reasonable_label() */
   3130 		strcpy(config->devnames[ac->clabel->row][ac->clabel->column],
   3131 		       ac->devname);
   3132 		ac = ac->next;
   3133 	}
   3134 
   3135 	for(i=0;i<RF_MAXDBGV;i++) {
   3136 		config->debugVars[i][0] = NULL;
   3137 	}
   3138 }
   3139 
   3140 int
   3141 rf_set_autoconfig(raidPtr, new_value)
   3142 	RF_Raid_t *raidPtr;
   3143 	int new_value;
   3144 {
   3145 	RF_ComponentLabel_t clabel;
   3146 	struct vnode *vp;
   3147 	dev_t dev;
   3148 	int row, column;
   3149 
   3150 	raidPtr->autoconfigure = new_value;
   3151 	for(row=0; row<raidPtr->numRow; row++) {
   3152 		for(column=0; column<raidPtr->numCol; column++) {
   3153 			if (raidPtr->Disks[row][column].status ==
   3154 			    rf_ds_optimal) {
   3155 				dev = raidPtr->Disks[row][column].dev;
   3156 				vp = raidPtr->raid_cinfo[row][column].ci_vp;
   3157 				raidread_component_label(dev, vp, &clabel);
   3158 				clabel.autoconfigure = new_value;
   3159 				raidwrite_component_label(dev, vp, &clabel);
   3160 			}
   3161 		}
   3162 	}
   3163 	return(new_value);
   3164 }
   3165 
   3166 int
   3167 rf_set_rootpartition(raidPtr, new_value)
   3168 	RF_Raid_t *raidPtr;
   3169 	int new_value;
   3170 {
   3171 	RF_ComponentLabel_t clabel;
   3172 	struct vnode *vp;
   3173 	dev_t dev;
   3174 	int row, column;
   3175 
   3176 	raidPtr->root_partition = new_value;
   3177 	for(row=0; row<raidPtr->numRow; row++) {
   3178 		for(column=0; column<raidPtr->numCol; column++) {
   3179 			if (raidPtr->Disks[row][column].status ==
   3180 			    rf_ds_optimal) {
   3181 				dev = raidPtr->Disks[row][column].dev;
   3182 				vp = raidPtr->raid_cinfo[row][column].ci_vp;
   3183 				raidread_component_label(dev, vp, &clabel);
   3184 				clabel.root_partition = new_value;
   3185 				raidwrite_component_label(dev, vp, &clabel);
   3186 			}
   3187 		}
   3188 	}
   3189 	return(new_value);
   3190 }
   3191 
   3192 void
   3193 rf_release_all_vps(cset)
   3194 	RF_ConfigSet_t *cset;
   3195 {
   3196 	RF_AutoConfig_t *ac;
   3197 
   3198 	ac = cset->ac;
   3199 	while(ac!=NULL) {
   3200 		/* Close the vp, and give it back */
   3201 		if (ac->vp) {
   3202 			vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY);
   3203 			VOP_CLOSE(ac->vp, FREAD, NOCRED, 0);
   3204 			vput(ac->vp);
   3205 			ac->vp = NULL;
   3206 		}
   3207 		ac = ac->next;
   3208 	}
   3209 }
   3210 
   3211 
   3212 void
   3213 rf_cleanup_config_set(cset)
   3214 	RF_ConfigSet_t *cset;
   3215 {
   3216 	RF_AutoConfig_t *ac;
   3217 	RF_AutoConfig_t *next_ac;
   3218 
   3219 	ac = cset->ac;
   3220 	while(ac!=NULL) {
   3221 		next_ac = ac->next;
   3222 		/* nuke the label */
   3223 		free(ac->clabel, M_RAIDFRAME);
   3224 		/* cleanup the config structure */
   3225 		free(ac, M_RAIDFRAME);
   3226 		/* "next.." */
   3227 		ac = next_ac;
   3228 	}
   3229 	/* and, finally, nuke the config set */
   3230 	free(cset, M_RAIDFRAME);
   3231 }
   3232 
   3233 
   3234 void
   3235 raid_init_component_label(raidPtr, clabel)
   3236 	RF_Raid_t *raidPtr;
   3237 	RF_ComponentLabel_t *clabel;
   3238 {
   3239 	/* current version number */
   3240 	clabel->version = RF_COMPONENT_LABEL_VERSION;
   3241 	clabel->serial_number = raidPtr->serial_number;
   3242 	clabel->mod_counter = raidPtr->mod_counter;
   3243 	clabel->num_rows = raidPtr->numRow;
   3244 	clabel->num_columns = raidPtr->numCol;
   3245 	clabel->clean = RF_RAID_DIRTY; /* not clean */
   3246 	clabel->status = rf_ds_optimal; /* "It's good!" */
   3247 
   3248 	clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
   3249 	clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
   3250 	clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
   3251 
   3252 	clabel->blockSize = raidPtr->bytesPerSector;
   3253 	clabel->numBlocks = raidPtr->sectorsPerDisk;
   3254 
   3255 	/* XXX not portable */
   3256 	clabel->parityConfig = raidPtr->Layout.map->parityConfig;
   3257 	clabel->maxOutstanding = raidPtr->maxOutstanding;
   3258 	clabel->autoconfigure = raidPtr->autoconfigure;
   3259 	clabel->root_partition = raidPtr->root_partition;
   3260 	clabel->last_unit = raidPtr->raidid;
   3261 	clabel->config_order = raidPtr->config_order;
   3262 }
   3263 
   3264 int
   3265 rf_auto_config_set(cset,unit)
   3266 	RF_ConfigSet_t *cset;
   3267 	int *unit;
   3268 {
   3269 	RF_Raid_t *raidPtr;
   3270 	RF_Config_t *config;
   3271 	int raidID;
   3272 	int retcode;
   3273 
   3274 	printf("RAID autoconfigure\n");
   3275 
   3276 	retcode = 0;
   3277 	*unit = -1;
   3278 
   3279 	/* 1. Create a config structure */
   3280 
   3281 	config = (RF_Config_t *)malloc(sizeof(RF_Config_t),
   3282 				       M_RAIDFRAME,
   3283 				       M_NOWAIT);
   3284 	if (config==NULL) {
   3285 		printf("Out of mem!?!?\n");
   3286 				/* XXX do something more intelligent here. */
   3287 		return(1);
   3288 	}
   3289 
   3290 	memset(config, 0, sizeof(RF_Config_t));
   3291 
   3292 	/* XXX raidID needs to be set correctly.. */
   3293 
   3294 	/*
   3295 	   2. Figure out what RAID ID this one is supposed to live at
   3296 	   See if we can get the same RAID dev that it was configured
   3297 	   on last time..
   3298 	*/
   3299 
   3300 	raidID = cset->ac->clabel->last_unit;
   3301 	if ((raidID < 0) || (raidID >= numraid)) {
   3302 		/* let's not wander off into lala land. */
   3303 		raidID = numraid - 1;
   3304 	}
   3305 	if (raidPtrs[raidID]->valid != 0) {
   3306 
   3307 		/*
   3308 		   Nope... Go looking for an alternative...
   3309 		   Start high so we don't immediately use raid0 if that's
   3310 		   not taken.
   3311 		*/
   3312 
   3313 		for(raidID = numraid - 1; raidID >= 0; raidID--) {
   3314 			if (raidPtrs[raidID]->valid == 0) {
   3315 				/* can use this one! */
   3316 				break;
   3317 			}
   3318 		}
   3319 	}
   3320 
   3321 	if (raidID < 0) {
   3322 		/* punt... */
   3323 		printf("Unable to auto configure this set!\n");
   3324 		printf("(Out of RAID devs!)\n");
   3325 		return(1);
   3326 	}
   3327 	printf("Configuring raid%d:\n",raidID);
   3328 	raidPtr = raidPtrs[raidID];
   3329 
   3330 	/* XXX all this stuff should be done SOMEWHERE ELSE! */
   3331 	raidPtr->raidid = raidID;
   3332 	raidPtr->openings = RAIDOUTSTANDING;
   3333 
   3334 	/* 3. Build the configuration structure */
   3335 	rf_create_configuration(cset->ac, config, raidPtr);
   3336 
   3337 	/* 4. Do the configuration */
   3338 	retcode = rf_Configure(raidPtr, config, cset->ac);
   3339 
   3340 	if (retcode == 0) {
   3341 
   3342 		raidinit(raidPtrs[raidID]);
   3343 
   3344 		rf_markalldirty(raidPtrs[raidID]);
   3345 		raidPtrs[raidID]->autoconfigure = 1; /* XXX do this here? */
   3346 		if (cset->ac->clabel->root_partition==1) {
   3347 			/* everything configured just fine.  Make a note
   3348 			   that this set is eligible to be root. */
   3349 			cset->rootable = 1;
   3350 			/* XXX do this here? */
   3351 			raidPtrs[raidID]->root_partition = 1;
   3352 		}
   3353 	}
   3354 
   3355 	/* 5. Cleanup */
   3356 	free(config, M_RAIDFRAME);
   3357 
   3358 	*unit = raidID;
   3359 	return(retcode);
   3360 }
   3361 
   3362 void
   3363 rf_disk_unbusy(desc)
   3364 	RF_RaidAccessDesc_t *desc;
   3365 {
   3366 	struct buf *bp;
   3367 
   3368 	bp = (struct buf *)desc->bp;
   3369 	disk_unbusy(&raid_softc[desc->raidPtr->raidid].sc_dkdev,
   3370 			    (bp->b_bcount - bp->b_resid));
   3371 }
   3372