Home | History | Annotate | Line # | Download | only in raidframe
rf_netbsdkintf.c revision 1.135
      1 /*	$NetBSD: rf_netbsdkintf.c,v 1.135 2002/09/22 03:46:40 oster Exp $	*/
      2 /*-
      3  * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
      4  * All rights reserved.
      5  *
      6  * This code is derived from software contributed to The NetBSD Foundation
      7  * by Greg Oster; Jason R. Thorpe.
      8  *
      9  * Redistribution and use in source and binary forms, with or without
     10  * modification, are permitted provided that the following conditions
     11  * are met:
     12  * 1. Redistributions of source code must retain the above copyright
     13  *    notice, this list of conditions and the following disclaimer.
     14  * 2. Redistributions in binary form must reproduce the above copyright
     15  *    notice, this list of conditions and the following disclaimer in the
     16  *    documentation and/or other materials provided with the distribution.
     17  * 3. All advertising materials mentioning features or use of this software
     18  *    must display the following acknowledgement:
     19  *        This product includes software developed by the NetBSD
     20  *        Foundation, Inc. and its contributors.
     21  * 4. Neither the name of The NetBSD Foundation nor the names of its
     22  *    contributors may be used to endorse or promote products derived
     23  *    from this software without specific prior written permission.
     24  *
     25  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     26  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     27  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     28  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     29  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     30  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     31  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     32  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     33  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     34  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     35  * POSSIBILITY OF SUCH DAMAGE.
     36  */
     37 
     38 /*
     39  * Copyright (c) 1988 University of Utah.
     40  * Copyright (c) 1990, 1993
     41  *      The Regents of the University of California.  All rights reserved.
     42  *
     43  * This code is derived from software contributed to Berkeley by
     44  * the Systems Programming Group of the University of Utah Computer
     45  * Science Department.
     46  *
     47  * Redistribution and use in source and binary forms, with or without
     48  * modification, are permitted provided that the following conditions
     49  * are met:
     50  * 1. Redistributions of source code must retain the above copyright
     51  *    notice, this list of conditions and the following disclaimer.
     52  * 2. Redistributions in binary form must reproduce the above copyright
     53  *    notice, this list of conditions and the following disclaimer in the
     54  *    documentation and/or other materials provided with the distribution.
     55  * 3. All advertising materials mentioning features or use of this software
     56  *    must display the following acknowledgement:
     57  *      This product includes software developed by the University of
     58  *      California, Berkeley and its contributors.
     59  * 4. Neither the name of the University nor the names of its contributors
     60  *    may be used to endorse or promote products derived from this software
     61  *    without specific prior written permission.
     62  *
     63  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     64  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     65  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     66  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     67  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     68  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     69  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     70  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     71  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     72  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     73  * SUCH DAMAGE.
     74  *
     75  * from: Utah $Hdr: cd.c 1.6 90/11/28$
     76  *
     77  *      @(#)cd.c        8.2 (Berkeley) 11/16/93
     78  */
     79 
     80 
     81 
     82 
     83 /*
     84  * Copyright (c) 1995 Carnegie-Mellon University.
     85  * All rights reserved.
     86  *
     87  * Authors: Mark Holland, Jim Zelenka
     88  *
     89  * Permission to use, copy, modify and distribute this software and
     90  * its documentation is hereby granted, provided that both the copyright
     91  * notice and this permission notice appear in all copies of the
     92  * software, derivative works or modified versions, and any portions
     93  * thereof, and that both notices appear in supporting documentation.
     94  *
     95  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
     96  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
     97  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
     98  *
     99  * Carnegie Mellon requests users of this software to return to
    100  *
    101  *  Software Distribution Coordinator  or  Software.Distribution (at) CS.CMU.EDU
    102  *  School of Computer Science
    103  *  Carnegie Mellon University
    104  *  Pittsburgh PA 15213-3890
    105  *
    106  * any improvements or extensions that they make and grant Carnegie the
    107  * rights to redistribute these changes.
    108  */
    109 
    110 /***********************************************************
    111  *
    112  * rf_kintf.c -- the kernel interface routines for RAIDframe
    113  *
    114  ***********************************************************/
    115 
    116 #include <sys/cdefs.h>
    117 __KERNEL_RCSID(0, "$NetBSD: rf_netbsdkintf.c,v 1.135 2002/09/22 03:46:40 oster Exp $");
    118 
    119 #include <sys/param.h>
    120 #include <sys/errno.h>
    121 #include <sys/pool.h>
    122 #include <sys/queue.h>
    123 #include <sys/disk.h>
    124 #include <sys/device.h>
    125 #include <sys/stat.h>
    126 #include <sys/ioctl.h>
    127 #include <sys/fcntl.h>
    128 #include <sys/systm.h>
    129 #include <sys/namei.h>
    130 #include <sys/vnode.h>
    131 #include <sys/disklabel.h>
    132 #include <sys/conf.h>
    133 #include <sys/lock.h>
    134 #include <sys/buf.h>
    135 #include <sys/user.h>
    136 #include <sys/reboot.h>
    137 
    138 #include <dev/raidframe/raidframevar.h>
    139 #include <dev/raidframe/raidframeio.h>
    140 #include "raid.h"
    141 #include "opt_raid_autoconfig.h"
    142 #include "rf_raid.h"
    143 #include "rf_copyback.h"
    144 #include "rf_dag.h"
    145 #include "rf_dagflags.h"
    146 #include "rf_desc.h"
    147 #include "rf_diskqueue.h"
    148 #include "rf_etimer.h"
    149 #include "rf_general.h"
    150 #include "rf_kintf.h"
    151 #include "rf_options.h"
    152 #include "rf_driver.h"
    153 #include "rf_parityscan.h"
    154 #include "rf_threadstuff.h"
    155 
    156 #ifdef DEBUG
    157 int     rf_kdebug_level = 0;
    158 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
    159 #else				/* DEBUG */
    160 #define db1_printf(a) { }
    161 #endif				/* DEBUG */
    162 
    163 static RF_Raid_t **raidPtrs;	/* global raid device descriptors */
    164 
    165 RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex)
    166 
    167 static RF_SparetWait_t *rf_sparet_wait_queue;	/* requests to install a
    168 						 * spare table */
    169 static RF_SparetWait_t *rf_sparet_resp_queue;	/* responses from
    170 						 * installation process */
    171 
    172 /* prototypes */
    173 static void KernelWakeupFunc(struct buf * bp);
    174 static void InitBP(struct buf * bp, struct vnode *, unsigned rw_flag,
    175 		   dev_t dev, RF_SectorNum_t startSect,
    176 		   RF_SectorCount_t numSect, caddr_t buf,
    177 		   void (*cbFunc) (struct buf *), void *cbArg,
    178 		   int logBytesPerSector, struct proc * b_proc);
    179 static void raidinit(RF_Raid_t *);
    180 
    181 void raidattach(int);
    182 
    183 dev_type_open(raidopen);
    184 dev_type_close(raidclose);
    185 dev_type_read(raidread);
    186 dev_type_write(raidwrite);
    187 dev_type_ioctl(raidioctl);
    188 dev_type_strategy(raidstrategy);
    189 dev_type_dump(raiddump);
    190 dev_type_size(raidsize);
    191 
    192 const struct bdevsw raid_bdevsw = {
    193 	raidopen, raidclose, raidstrategy, raidioctl,
    194 	raiddump, raidsize, D_DISK
    195 };
    196 
    197 const struct cdevsw raid_cdevsw = {
    198 	raidopen, raidclose, raidread, raidwrite, raidioctl,
    199 	nostop, notty, nopoll, nommap, D_DISK
    200 };
    201 
    202 /*
    203  * Pilfered from ccd.c
    204  */
    205 
    206 struct raidbuf {
    207 	struct buf rf_buf;	/* new I/O buf.  MUST BE FIRST!!! */
    208 	struct buf *rf_obp;	/* ptr. to original I/O buf */
    209 	RF_DiskQueueData_t *req;/* the request that this was part of.. */
    210 };
    211 
    212 /* component buffer pool */
    213 struct pool raidframe_cbufpool;
    214 
    215 #define RAIDGETBUF(rs) pool_get(&raidframe_cbufpool, PR_NOWAIT)
    216 #define	RAIDPUTBUF(rs, cbp) pool_put(&raidframe_cbufpool, cbp)
    217 
    218 /* XXX Not sure if the following should be replacing the raidPtrs above,
    219    or if it should be used in conjunction with that...
    220 */
    221 
    222 struct raid_softc {
    223 	int     sc_flags;	/* flags */
    224 	int     sc_cflags;	/* configuration flags */
    225 	size_t  sc_size;        /* size of the raid device */
    226 	char    sc_xname[20];	/* XXX external name */
    227 	struct disk sc_dkdev;	/* generic disk device info */
    228 	struct bufq_state buf_queue;	/* used for the device queue */
    229 };
    230 /* sc_flags */
    231 #define RAIDF_INITED	0x01	/* unit has been initialized */
    232 #define RAIDF_WLABEL	0x02	/* label area is writable */
    233 #define RAIDF_LABELLING	0x04	/* unit is currently being labelled */
    234 #define RAIDF_WANTED	0x40	/* someone is waiting to obtain a lock */
    235 #define RAIDF_LOCKED	0x80	/* unit is locked */
    236 
    237 #define	raidunit(x)	DISKUNIT(x)
    238 int numraid = 0;
    239 
    240 /*
    241  * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
    242  * Be aware that large numbers can allow the driver to consume a lot of
    243  * kernel memory, especially on writes, and in degraded mode reads.
    244  *
    245  * For example: with a stripe width of 64 blocks (32k) and 5 disks,
    246  * a single 64K write will typically require 64K for the old data,
    247  * 64K for the old parity, and 64K for the new parity, for a total
    248  * of 192K (if the parity buffer is not re-used immediately).
    249  * Even it if is used immediately, that's still 128K, which when multiplied
    250  * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
    251  *
    252  * Now in degraded mode, for example, a 64K read on the above setup may
    253  * require data reconstruction, which will require *all* of the 4 remaining
    254  * disks to participate -- 4 * 32K/disk == 128K again.
    255  */
    256 
    257 #ifndef RAIDOUTSTANDING
    258 #define RAIDOUTSTANDING   6
    259 #endif
    260 
    261 #define RAIDLABELDEV(dev)	\
    262 	(MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
    263 
    264 /* declared here, and made public, for the benefit of KVM stuff.. */
    265 struct raid_softc *raid_softc;
    266 
    267 static void raidgetdefaultlabel(RF_Raid_t *, struct raid_softc *,
    268 				     struct disklabel *);
    269 static void raidgetdisklabel(dev_t);
    270 static void raidmakedisklabel(struct raid_softc *);
    271 
    272 static int raidlock(struct raid_softc *);
    273 static void raidunlock(struct raid_softc *);
    274 
    275 static void rf_markalldirty(RF_Raid_t *);
    276 
    277 struct device *raidrootdev;
    278 
    279 void rf_ReconThread(struct rf_recon_req *);
    280 /* XXX what I want is: */
    281 /*void rf_ReconThread(RF_Raid_t *raidPtr);  */
    282 void rf_RewriteParityThread(RF_Raid_t *raidPtr);
    283 void rf_CopybackThread(RF_Raid_t *raidPtr);
    284 void rf_ReconstructInPlaceThread(struct rf_recon_req *);
    285 void rf_buildroothack(void *);
    286 
    287 RF_AutoConfig_t *rf_find_raid_components(void);
    288 RF_ConfigSet_t *rf_create_auto_sets(RF_AutoConfig_t *);
    289 static int rf_does_it_fit(RF_ConfigSet_t *,RF_AutoConfig_t *);
    290 static int rf_reasonable_label(RF_ComponentLabel_t *);
    291 void rf_create_configuration(RF_AutoConfig_t *,RF_Config_t *, RF_Raid_t *);
    292 int rf_set_autoconfig(RF_Raid_t *, int);
    293 int rf_set_rootpartition(RF_Raid_t *, int);
    294 void rf_release_all_vps(RF_ConfigSet_t *);
    295 void rf_cleanup_config_set(RF_ConfigSet_t *);
    296 int rf_have_enough_components(RF_ConfigSet_t *);
    297 int rf_auto_config_set(RF_ConfigSet_t *, int *);
    298 
    299 static int raidautoconfig = 0; /* Debugging, mostly.  Set to 0 to not
    300 				  allow autoconfig to take place.
    301 			          Note that this is overridden by having
    302 			          RAID_AUTOCONFIG as an option in the
    303 			          kernel config file.  */
    304 
    305 void
    306 raidattach(num)
    307 	int     num;
    308 {
    309 	int raidID;
    310 	int i, rc;
    311 	RF_AutoConfig_t *ac_list; /* autoconfig list */
    312 	RF_ConfigSet_t *config_sets;
    313 
    314 #ifdef DEBUG
    315 	printf("raidattach: Asked for %d units\n", num);
    316 #endif
    317 
    318 	if (num <= 0) {
    319 #ifdef DIAGNOSTIC
    320 		panic("raidattach: count <= 0");
    321 #endif
    322 		return;
    323 	}
    324 	/* This is where all the initialization stuff gets done. */
    325 
    326 	numraid = num;
    327 
    328 	/* Make some space for requested number of units... */
    329 
    330 	RF_Calloc(raidPtrs, num, sizeof(RF_Raid_t *), (RF_Raid_t **));
    331 	if (raidPtrs == NULL) {
    332 		panic("raidPtrs is NULL!!\n");
    333 	}
    334 
    335 	/* Initialize the component buffer pool. */
    336 	pool_init(&raidframe_cbufpool, sizeof(struct raidbuf), 0,
    337 	    0, 0, "raidpl", NULL);
    338 
    339 	rc = rf_mutex_init(&rf_sparet_wait_mutex);
    340 	if (rc) {
    341 		RF_PANIC();
    342 	}
    343 
    344 	rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
    345 
    346 	for (i = 0; i < num; i++)
    347 		raidPtrs[i] = NULL;
    348 	rc = rf_BootRaidframe();
    349 	if (rc == 0)
    350 		printf("Kernelized RAIDframe activated\n");
    351 	else
    352 		panic("Serious error booting RAID!!\n");
    353 
    354 	/* put together some datastructures like the CCD device does.. This
    355 	 * lets us lock the device and what-not when it gets opened. */
    356 
    357 	raid_softc = (struct raid_softc *)
    358 		malloc(num * sizeof(struct raid_softc),
    359 		       M_RAIDFRAME, M_NOWAIT);
    360 	if (raid_softc == NULL) {
    361 		printf("WARNING: no memory for RAIDframe driver\n");
    362 		return;
    363 	}
    364 
    365 	memset(raid_softc, 0, num * sizeof(struct raid_softc));
    366 
    367 	raidrootdev = (struct device *)malloc(num * sizeof(struct device),
    368 					      M_RAIDFRAME, M_NOWAIT);
    369 	if (raidrootdev == NULL) {
    370 		panic("No memory for RAIDframe driver!!?!?!\n");
    371 	}
    372 
    373 	for (raidID = 0; raidID < num; raidID++) {
    374 		bufq_alloc(&raid_softc[raidID].buf_queue, BUFQ_FCFS);
    375 
    376 		raidrootdev[raidID].dv_class  = DV_DISK;
    377 		raidrootdev[raidID].dv_cfdata = NULL;
    378 		raidrootdev[raidID].dv_unit   = raidID;
    379 		raidrootdev[raidID].dv_parent = NULL;
    380 		raidrootdev[raidID].dv_flags  = 0;
    381 		sprintf(raidrootdev[raidID].dv_xname,"raid%d",raidID);
    382 
    383 		RF_Calloc(raidPtrs[raidID], 1, sizeof(RF_Raid_t),
    384 			  (RF_Raid_t *));
    385 		if (raidPtrs[raidID] == NULL) {
    386 			printf("WARNING: raidPtrs[%d] is NULL\n", raidID);
    387 			numraid = raidID;
    388 			return;
    389 		}
    390 	}
    391 
    392 #ifdef RAID_AUTOCONFIG
    393 	raidautoconfig = 1;
    394 #endif
    395 
    396 if (raidautoconfig) {
    397 	/* 1. locate all RAID components on the system */
    398 
    399 #if DEBUG
    400 	printf("Searching for raid components...\n");
    401 #endif
    402 	ac_list = rf_find_raid_components();
    403 
    404 	/* 2. sort them into their respective sets */
    405 
    406 	config_sets = rf_create_auto_sets(ac_list);
    407 
    408 	/* 3. evaluate each set and configure the valid ones
    409 	   This gets done in rf_buildroothack() */
    410 
    411 	/* schedule the creation of the thread to do the
    412 	   "/ on RAID" stuff */
    413 
    414 	kthread_create(rf_buildroothack,config_sets);
    415 
    416 }
    417 
    418 }
    419 
    420 void
    421 rf_buildroothack(arg)
    422 	void *arg;
    423 {
    424 	RF_ConfigSet_t *config_sets = arg;
    425 	RF_ConfigSet_t *cset;
    426 	RF_ConfigSet_t *next_cset;
    427 	int retcode;
    428 	int raidID;
    429 	int rootID;
    430 	int num_root;
    431 
    432 	rootID = 0;
    433 	num_root = 0;
    434 	cset = config_sets;
    435 	while(cset != NULL ) {
    436 		next_cset = cset->next;
    437 		if (rf_have_enough_components(cset) &&
    438 		    cset->ac->clabel->autoconfigure==1) {
    439 			retcode = rf_auto_config_set(cset,&raidID);
    440 			if (!retcode) {
    441 				if (cset->rootable) {
    442 					rootID = raidID;
    443 					num_root++;
    444 				}
    445 			} else {
    446 				/* The autoconfig didn't work :( */
    447 #if DEBUG
    448 				printf("Autoconfig failed with code %d for raid%d\n", retcode, raidID);
    449 #endif
    450 				rf_release_all_vps(cset);
    451 			}
    452 		} else {
    453 			/* we're not autoconfiguring this set...
    454 			   release the associated resources */
    455 			rf_release_all_vps(cset);
    456 		}
    457 		/* cleanup */
    458 		rf_cleanup_config_set(cset);
    459 		cset = next_cset;
    460 	}
    461 
    462 	/* we found something bootable... */
    463 
    464 	if (num_root == 1) {
    465 		booted_device = &raidrootdev[rootID];
    466 	} else if (num_root > 1) {
    467 		/* we can't guess.. require the user to answer... */
    468 		boothowto |= RB_ASKNAME;
    469 	}
    470 }
    471 
    472 
    473 int
    474 raidsize(dev)
    475 	dev_t   dev;
    476 {
    477 	struct raid_softc *rs;
    478 	struct disklabel *lp;
    479 	int     part, unit, omask, size;
    480 
    481 	unit = raidunit(dev);
    482 	if (unit >= numraid)
    483 		return (-1);
    484 	rs = &raid_softc[unit];
    485 
    486 	if ((rs->sc_flags & RAIDF_INITED) == 0)
    487 		return (-1);
    488 
    489 	part = DISKPART(dev);
    490 	omask = rs->sc_dkdev.dk_openmask & (1 << part);
    491 	lp = rs->sc_dkdev.dk_label;
    492 
    493 	if (omask == 0 && raidopen(dev, 0, S_IFBLK, curproc))
    494 		return (-1);
    495 
    496 	if (lp->d_partitions[part].p_fstype != FS_SWAP)
    497 		size = -1;
    498 	else
    499 		size = lp->d_partitions[part].p_size *
    500 		    (lp->d_secsize / DEV_BSIZE);
    501 
    502 	if (omask == 0 && raidclose(dev, 0, S_IFBLK, curproc))
    503 		return (-1);
    504 
    505 	return (size);
    506 
    507 }
    508 
    509 int
    510 raiddump(dev, blkno, va, size)
    511 	dev_t   dev;
    512 	daddr_t blkno;
    513 	caddr_t va;
    514 	size_t  size;
    515 {
    516 	/* Not implemented. */
    517 	return ENXIO;
    518 }
    519 /* ARGSUSED */
    520 int
    521 raidopen(dev, flags, fmt, p)
    522 	dev_t   dev;
    523 	int     flags, fmt;
    524 	struct proc *p;
    525 {
    526 	int     unit = raidunit(dev);
    527 	struct raid_softc *rs;
    528 	struct disklabel *lp;
    529 	int     part, pmask;
    530 	int     error = 0;
    531 
    532 	if (unit >= numraid)
    533 		return (ENXIO);
    534 	rs = &raid_softc[unit];
    535 
    536 	if ((error = raidlock(rs)) != 0)
    537 		return (error);
    538 	lp = rs->sc_dkdev.dk_label;
    539 
    540 	part = DISKPART(dev);
    541 	pmask = (1 << part);
    542 
    543 	db1_printf(("Opening raid device number: %d partition: %d\n",
    544 		unit, part));
    545 
    546 
    547 	if ((rs->sc_flags & RAIDF_INITED) &&
    548 	    (rs->sc_dkdev.dk_openmask == 0))
    549 		raidgetdisklabel(dev);
    550 
    551 	/* make sure that this partition exists */
    552 
    553 	if (part != RAW_PART) {
    554 		db1_printf(("Not a raw partition..\n"));
    555 		if (((rs->sc_flags & RAIDF_INITED) == 0) ||
    556 		    ((part >= lp->d_npartitions) ||
    557 			(lp->d_partitions[part].p_fstype == FS_UNUSED))) {
    558 			error = ENXIO;
    559 			raidunlock(rs);
    560 			db1_printf(("Bailing out...\n"));
    561 			return (error);
    562 		}
    563 	}
    564 	/* Prevent this unit from being unconfigured while open. */
    565 	switch (fmt) {
    566 	case S_IFCHR:
    567 		rs->sc_dkdev.dk_copenmask |= pmask;
    568 		break;
    569 
    570 	case S_IFBLK:
    571 		rs->sc_dkdev.dk_bopenmask |= pmask;
    572 		break;
    573 	}
    574 
    575 	if ((rs->sc_dkdev.dk_openmask == 0) &&
    576 	    ((rs->sc_flags & RAIDF_INITED) != 0)) {
    577 		/* First one... mark things as dirty... Note that we *MUST*
    578 		 have done a configure before this.  I DO NOT WANT TO BE
    579 		 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
    580 		 THAT THEY BELONG TOGETHER!!!!! */
    581 		/* XXX should check to see if we're only open for reading
    582 		   here... If so, we needn't do this, but then need some
    583 		   other way of keeping track of what's happened.. */
    584 
    585 		rf_markalldirty( raidPtrs[unit] );
    586 	}
    587 
    588 
    589 	rs->sc_dkdev.dk_openmask =
    590 	    rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
    591 
    592 	raidunlock(rs);
    593 
    594 	return (error);
    595 
    596 
    597 }
    598 /* ARGSUSED */
    599 int
    600 raidclose(dev, flags, fmt, p)
    601 	dev_t   dev;
    602 	int     flags, fmt;
    603 	struct proc *p;
    604 {
    605 	int     unit = raidunit(dev);
    606 	struct raid_softc *rs;
    607 	int     error = 0;
    608 	int     part;
    609 
    610 	if (unit >= numraid)
    611 		return (ENXIO);
    612 	rs = &raid_softc[unit];
    613 
    614 	if ((error = raidlock(rs)) != 0)
    615 		return (error);
    616 
    617 	part = DISKPART(dev);
    618 
    619 	/* ...that much closer to allowing unconfiguration... */
    620 	switch (fmt) {
    621 	case S_IFCHR:
    622 		rs->sc_dkdev.dk_copenmask &= ~(1 << part);
    623 		break;
    624 
    625 	case S_IFBLK:
    626 		rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
    627 		break;
    628 	}
    629 	rs->sc_dkdev.dk_openmask =
    630 	    rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
    631 
    632 	if ((rs->sc_dkdev.dk_openmask == 0) &&
    633 	    ((rs->sc_flags & RAIDF_INITED) != 0)) {
    634 		/* Last one... device is not unconfigured yet.
    635 		   Device shutdown has taken care of setting the
    636 		   clean bits if RAIDF_INITED is not set
    637 		   mark things as clean... */
    638 #if 0
    639 		printf("Last one on raid%d.  Updating status.\n",unit);
    640 #endif
    641 		rf_update_component_labels(raidPtrs[unit],
    642 						 RF_FINAL_COMPONENT_UPDATE);
    643 		if (doing_shutdown) {
    644 			/* last one, and we're going down, so
    645 			   lights out for this RAID set too. */
    646 			error = rf_Shutdown(raidPtrs[unit]);
    647 
    648 			/* It's no longer initialized... */
    649 			rs->sc_flags &= ~RAIDF_INITED;
    650 
    651 			/* Detach the disk. */
    652 			disk_detach(&rs->sc_dkdev);
    653 		}
    654 	}
    655 
    656 	raidunlock(rs);
    657 	return (0);
    658 
    659 }
    660 
    661 void
    662 raidstrategy(bp)
    663 	struct buf *bp;
    664 {
    665 	int s;
    666 
    667 	unsigned int raidID = raidunit(bp->b_dev);
    668 	RF_Raid_t *raidPtr;
    669 	struct raid_softc *rs = &raid_softc[raidID];
    670 	struct disklabel *lp;
    671 	int     wlabel;
    672 
    673 	if ((rs->sc_flags & RAIDF_INITED) ==0) {
    674 		bp->b_error = ENXIO;
    675 		bp->b_flags |= B_ERROR;
    676 		bp->b_resid = bp->b_bcount;
    677 		biodone(bp);
    678 		return;
    679 	}
    680 	if (raidID >= numraid || !raidPtrs[raidID]) {
    681 		bp->b_error = ENODEV;
    682 		bp->b_flags |= B_ERROR;
    683 		bp->b_resid = bp->b_bcount;
    684 		biodone(bp);
    685 		return;
    686 	}
    687 	raidPtr = raidPtrs[raidID];
    688 	if (!raidPtr->valid) {
    689 		bp->b_error = ENODEV;
    690 		bp->b_flags |= B_ERROR;
    691 		bp->b_resid = bp->b_bcount;
    692 		biodone(bp);
    693 		return;
    694 	}
    695 	if (bp->b_bcount == 0) {
    696 		db1_printf(("b_bcount is zero..\n"));
    697 		biodone(bp);
    698 		return;
    699 	}
    700 	lp = rs->sc_dkdev.dk_label;
    701 
    702 	/*
    703 	 * Do bounds checking and adjust transfer.  If there's an
    704 	 * error, the bounds check will flag that for us.
    705 	 */
    706 
    707 	wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
    708 	if (DISKPART(bp->b_dev) != RAW_PART)
    709 		if (bounds_check_with_label(bp, lp, wlabel) <= 0) {
    710 			db1_printf(("Bounds check failed!!:%d %d\n",
    711 				(int) bp->b_blkno, (int) wlabel));
    712 			biodone(bp);
    713 			return;
    714 		}
    715 	s = splbio();
    716 
    717 	bp->b_resid = 0;
    718 
    719 	/* stuff it onto our queue */
    720 	BUFQ_PUT(&rs->buf_queue, bp);
    721 
    722 	raidstart(raidPtrs[raidID]);
    723 
    724 	splx(s);
    725 }
    726 /* ARGSUSED */
    727 int
    728 raidread(dev, uio, flags)
    729 	dev_t   dev;
    730 	struct uio *uio;
    731 	int     flags;
    732 {
    733 	int     unit = raidunit(dev);
    734 	struct raid_softc *rs;
    735 	int     part;
    736 
    737 	if (unit >= numraid)
    738 		return (ENXIO);
    739 	rs = &raid_softc[unit];
    740 
    741 	if ((rs->sc_flags & RAIDF_INITED) == 0)
    742 		return (ENXIO);
    743 	part = DISKPART(dev);
    744 
    745 	db1_printf(("raidread: unit: %d partition: %d\n", unit, part));
    746 
    747 	return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
    748 
    749 }
    750 /* ARGSUSED */
    751 int
    752 raidwrite(dev, uio, flags)
    753 	dev_t   dev;
    754 	struct uio *uio;
    755 	int     flags;
    756 {
    757 	int     unit = raidunit(dev);
    758 	struct raid_softc *rs;
    759 
    760 	if (unit >= numraid)
    761 		return (ENXIO);
    762 	rs = &raid_softc[unit];
    763 
    764 	if ((rs->sc_flags & RAIDF_INITED) == 0)
    765 		return (ENXIO);
    766 	db1_printf(("raidwrite\n"));
    767 	return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
    768 
    769 }
    770 
    771 int
    772 raidioctl(dev, cmd, data, flag, p)
    773 	dev_t   dev;
    774 	u_long  cmd;
    775 	caddr_t data;
    776 	int     flag;
    777 	struct proc *p;
    778 {
    779 	int     unit = raidunit(dev);
    780 	int     error = 0;
    781 	int     part, pmask;
    782 	struct raid_softc *rs;
    783 	RF_Config_t *k_cfg, *u_cfg;
    784 	RF_Raid_t *raidPtr;
    785 	RF_RaidDisk_t *diskPtr;
    786 	RF_AccTotals_t *totals;
    787 	RF_DeviceConfig_t *d_cfg, **ucfgp;
    788 	u_char *specific_buf;
    789 	int retcode = 0;
    790 	int row;
    791 	int column;
    792 	int raidid;
    793 	struct rf_recon_req *rrcopy, *rr;
    794 	RF_ComponentLabel_t *clabel;
    795 	RF_ComponentLabel_t ci_label;
    796 	RF_ComponentLabel_t **clabel_ptr;
    797 	RF_SingleComponent_t *sparePtr,*componentPtr;
    798 	RF_SingleComponent_t hot_spare;
    799 	RF_SingleComponent_t component;
    800 	RF_ProgressInfo_t progressInfo, **progressInfoPtr;
    801 	int i, j, d;
    802 #ifdef __HAVE_OLD_DISKLABEL
    803 	struct disklabel newlabel;
    804 #endif
    805 
    806 	if (unit >= numraid)
    807 		return (ENXIO);
    808 	rs = &raid_softc[unit];
    809 	raidPtr = raidPtrs[unit];
    810 
    811 	db1_printf(("raidioctl: %d %d %d %d\n", (int) dev,
    812 		(int) DISKPART(dev), (int) unit, (int) cmd));
    813 
    814 	/* Must be open for writes for these commands... */
    815 	switch (cmd) {
    816 	case DIOCSDINFO:
    817 	case DIOCWDINFO:
    818 #ifdef __HAVE_OLD_DISKLABEL
    819 	case ODIOCWDINFO:
    820 	case ODIOCSDINFO:
    821 #endif
    822 	case DIOCWLABEL:
    823 		if ((flag & FWRITE) == 0)
    824 			return (EBADF);
    825 	}
    826 
    827 	/* Must be initialized for these... */
    828 	switch (cmd) {
    829 	case DIOCGDINFO:
    830 	case DIOCSDINFO:
    831 	case DIOCWDINFO:
    832 #ifdef __HAVE_OLD_DISKLABEL
    833 	case ODIOCGDINFO:
    834 	case ODIOCWDINFO:
    835 	case ODIOCSDINFO:
    836 	case ODIOCGDEFLABEL:
    837 #endif
    838 	case DIOCGPART:
    839 	case DIOCWLABEL:
    840 	case DIOCGDEFLABEL:
    841 	case RAIDFRAME_SHUTDOWN:
    842 	case RAIDFRAME_REWRITEPARITY:
    843 	case RAIDFRAME_GET_INFO:
    844 	case RAIDFRAME_RESET_ACCTOTALS:
    845 	case RAIDFRAME_GET_ACCTOTALS:
    846 	case RAIDFRAME_KEEP_ACCTOTALS:
    847 	case RAIDFRAME_GET_SIZE:
    848 	case RAIDFRAME_FAIL_DISK:
    849 	case RAIDFRAME_COPYBACK:
    850 	case RAIDFRAME_CHECK_RECON_STATUS:
    851 	case RAIDFRAME_CHECK_RECON_STATUS_EXT:
    852 	case RAIDFRAME_GET_COMPONENT_LABEL:
    853 	case RAIDFRAME_SET_COMPONENT_LABEL:
    854 	case RAIDFRAME_ADD_HOT_SPARE:
    855 	case RAIDFRAME_REMOVE_HOT_SPARE:
    856 	case RAIDFRAME_INIT_LABELS:
    857 	case RAIDFRAME_REBUILD_IN_PLACE:
    858 	case RAIDFRAME_CHECK_PARITY:
    859 	case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
    860 	case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
    861 	case RAIDFRAME_CHECK_COPYBACK_STATUS:
    862 	case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
    863 	case RAIDFRAME_SET_AUTOCONFIG:
    864 	case RAIDFRAME_SET_ROOT:
    865 	case RAIDFRAME_DELETE_COMPONENT:
    866 	case RAIDFRAME_INCORPORATE_HOT_SPARE:
    867 		if ((rs->sc_flags & RAIDF_INITED) == 0)
    868 			return (ENXIO);
    869 	}
    870 
    871 	switch (cmd) {
    872 
    873 		/* configure the system */
    874 	case RAIDFRAME_CONFIGURE:
    875 
    876 		if (raidPtr->valid) {
    877 			/* There is a valid RAID set running on this unit! */
    878 			printf("raid%d: Device already configured!\n",unit);
    879 			return(EINVAL);
    880 		}
    881 
    882 		/* copy-in the configuration information */
    883 		/* data points to a pointer to the configuration structure */
    884 
    885 		u_cfg = *((RF_Config_t **) data);
    886 		RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
    887 		if (k_cfg == NULL) {
    888 			return (ENOMEM);
    889 		}
    890 		retcode = copyin((caddr_t) u_cfg, (caddr_t) k_cfg,
    891 		    sizeof(RF_Config_t));
    892 		if (retcode) {
    893 			RF_Free(k_cfg, sizeof(RF_Config_t));
    894 			db1_printf(("rf_ioctl: retcode=%d copyin.1\n",
    895 				retcode));
    896 			return (retcode);
    897 		}
    898 		/* allocate a buffer for the layout-specific data, and copy it
    899 		 * in */
    900 		if (k_cfg->layoutSpecificSize) {
    901 			if (k_cfg->layoutSpecificSize > 10000) {
    902 				/* sanity check */
    903 				RF_Free(k_cfg, sizeof(RF_Config_t));
    904 				return (EINVAL);
    905 			}
    906 			RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
    907 			    (u_char *));
    908 			if (specific_buf == NULL) {
    909 				RF_Free(k_cfg, sizeof(RF_Config_t));
    910 				return (ENOMEM);
    911 			}
    912 			retcode = copyin(k_cfg->layoutSpecific,
    913 			    (caddr_t) specific_buf,
    914 			    k_cfg->layoutSpecificSize);
    915 			if (retcode) {
    916 				RF_Free(k_cfg, sizeof(RF_Config_t));
    917 				RF_Free(specific_buf,
    918 					k_cfg->layoutSpecificSize);
    919 				db1_printf(("rf_ioctl: retcode=%d copyin.2\n",
    920 					retcode));
    921 				return (retcode);
    922 			}
    923 		} else
    924 			specific_buf = NULL;
    925 		k_cfg->layoutSpecific = specific_buf;
    926 
    927 		/* should do some kind of sanity check on the configuration.
    928 		 * Store the sum of all the bytes in the last byte? */
    929 
    930 		/* configure the system */
    931 
    932 		/*
    933 		 * Clear the entire RAID descriptor, just to make sure
    934 		 *  there is no stale data left in the case of a
    935 		 *  reconfiguration
    936 		 */
    937 		memset((char *) raidPtr, 0, sizeof(RF_Raid_t));
    938 		raidPtr->raidid = unit;
    939 
    940 		retcode = rf_Configure(raidPtr, k_cfg, NULL);
    941 
    942 		if (retcode == 0) {
    943 
    944 			/* allow this many simultaneous IO's to
    945 			   this RAID device */
    946 			raidPtr->openings = RAIDOUTSTANDING;
    947 
    948 			raidinit(raidPtr);
    949 			rf_markalldirty(raidPtr);
    950 		}
    951 		/* free the buffers.  No return code here. */
    952 		if (k_cfg->layoutSpecificSize) {
    953 			RF_Free(specific_buf, k_cfg->layoutSpecificSize);
    954 		}
    955 		RF_Free(k_cfg, sizeof(RF_Config_t));
    956 
    957 		return (retcode);
    958 
    959 		/* shutdown the system */
    960 	case RAIDFRAME_SHUTDOWN:
    961 
    962 		if ((error = raidlock(rs)) != 0)
    963 			return (error);
    964 
    965 		/*
    966 		 * If somebody has a partition mounted, we shouldn't
    967 		 * shutdown.
    968 		 */
    969 
    970 		part = DISKPART(dev);
    971 		pmask = (1 << part);
    972 		if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
    973 		    ((rs->sc_dkdev.dk_bopenmask & pmask) &&
    974 			(rs->sc_dkdev.dk_copenmask & pmask))) {
    975 			raidunlock(rs);
    976 			return (EBUSY);
    977 		}
    978 
    979 		retcode = rf_Shutdown(raidPtr);
    980 
    981 		/* It's no longer initialized... */
    982 		rs->sc_flags &= ~RAIDF_INITED;
    983 
    984 		/* Detach the disk. */
    985 		disk_detach(&rs->sc_dkdev);
    986 
    987 		raidunlock(rs);
    988 
    989 		return (retcode);
    990 	case RAIDFRAME_GET_COMPONENT_LABEL:
    991 		clabel_ptr = (RF_ComponentLabel_t **) data;
    992 		/* need to read the component label for the disk indicated
    993 		   by row,column in clabel */
    994 
    995 		/* For practice, let's get it directly fromdisk, rather
    996 		   than from the in-core copy */
    997 		RF_Malloc( clabel, sizeof( RF_ComponentLabel_t ),
    998 			   (RF_ComponentLabel_t *));
    999 		if (clabel == NULL)
   1000 			return (ENOMEM);
   1001 
   1002 		memset((char *) clabel, 0, sizeof(RF_ComponentLabel_t));
   1003 
   1004 		retcode = copyin( *clabel_ptr, clabel,
   1005 				  sizeof(RF_ComponentLabel_t));
   1006 
   1007 		if (retcode) {
   1008 			RF_Free( clabel, sizeof(RF_ComponentLabel_t));
   1009 			return(retcode);
   1010 		}
   1011 
   1012 		row = clabel->row;
   1013 		column = clabel->column;
   1014 
   1015 		if ((row < 0) || (row >= raidPtr->numRow) ||
   1016 		    (column < 0) || (column >= raidPtr->numCol +
   1017 				     raidPtr->numSpare)) {
   1018 			RF_Free( clabel, sizeof(RF_ComponentLabel_t));
   1019 			return(EINVAL);
   1020 		}
   1021 
   1022 		raidread_component_label(raidPtr->Disks[row][column].dev,
   1023 				raidPtr->raid_cinfo[row][column].ci_vp,
   1024 				clabel );
   1025 
   1026 		retcode = copyout((caddr_t) clabel,
   1027 				  (caddr_t) *clabel_ptr,
   1028 				  sizeof(RF_ComponentLabel_t));
   1029 		RF_Free( clabel, sizeof(RF_ComponentLabel_t));
   1030 		return (retcode);
   1031 
   1032 	case RAIDFRAME_SET_COMPONENT_LABEL:
   1033 		clabel = (RF_ComponentLabel_t *) data;
   1034 
   1035 		/* XXX check the label for valid stuff... */
   1036 		/* Note that some things *should not* get modified --
   1037 		   the user should be re-initing the labels instead of
   1038 		   trying to patch things.
   1039 		   */
   1040 
   1041 		raidid = raidPtr->raidid;
   1042 		printf("raid%d: Got component label:\n", raidid);
   1043 		printf("raid%d: Version: %d\n", raidid, clabel->version);
   1044 		printf("raid%d: Serial Number: %d\n", raidid, clabel->serial_number);
   1045 		printf("raid%d: Mod counter: %d\n", raidid, clabel->mod_counter);
   1046 		printf("raid%d: Row: %d\n", raidid, clabel->row);
   1047 		printf("raid%d: Column: %d\n", raidid, clabel->column);
   1048 		printf("raid%d: Num Rows: %d\n", raidid, clabel->num_rows);
   1049 		printf("raid%d: Num Columns: %d\n", raidid, clabel->num_columns);
   1050 		printf("raid%d: Clean: %d\n", raidid, clabel->clean);
   1051 		printf("raid%d: Status: %d\n", raidid, clabel->status);
   1052 
   1053 		row = clabel->row;
   1054 		column = clabel->column;
   1055 
   1056 		if ((row < 0) || (row >= raidPtr->numRow) ||
   1057 		    (column < 0) || (column >= raidPtr->numCol)) {
   1058 			return(EINVAL);
   1059 		}
   1060 
   1061 		/* XXX this isn't allowed to do anything for now :-) */
   1062 
   1063 		/* XXX and before it is, we need to fill in the rest
   1064 		   of the fields!?!?!?! */
   1065 #if 0
   1066 		raidwrite_component_label(
   1067                             raidPtr->Disks[row][column].dev,
   1068 			    raidPtr->raid_cinfo[row][column].ci_vp,
   1069 			    clabel );
   1070 #endif
   1071 		return (0);
   1072 
   1073 	case RAIDFRAME_INIT_LABELS:
   1074 		clabel = (RF_ComponentLabel_t *) data;
   1075 		/*
   1076 		   we only want the serial number from
   1077 		   the above.  We get all the rest of the information
   1078 		   from the config that was used to create this RAID
   1079 		   set.
   1080 		   */
   1081 
   1082 		raidPtr->serial_number = clabel->serial_number;
   1083 
   1084 		raid_init_component_label(raidPtr, &ci_label);
   1085 		ci_label.serial_number = clabel->serial_number;
   1086 
   1087 		for(row=0;row<raidPtr->numRow;row++) {
   1088 			ci_label.row = row;
   1089 			for(column=0;column<raidPtr->numCol;column++) {
   1090 				diskPtr = &raidPtr->Disks[row][column];
   1091 				if (!RF_DEAD_DISK(diskPtr->status)) {
   1092 					ci_label.partitionSize = diskPtr->partitionSize;
   1093 					ci_label.column = column;
   1094 					raidwrite_component_label(
   1095 					  raidPtr->Disks[row][column].dev,
   1096 					  raidPtr->raid_cinfo[row][column].ci_vp,
   1097 					  &ci_label );
   1098 				}
   1099 			}
   1100 		}
   1101 
   1102 		return (retcode);
   1103 	case RAIDFRAME_SET_AUTOCONFIG:
   1104 		d = rf_set_autoconfig(raidPtr, *(int *) data);
   1105 		printf("raid%d: New autoconfig value is: %d\n",
   1106 		       raidPtr->raidid, d);
   1107 		*(int *) data = d;
   1108 		return (retcode);
   1109 
   1110 	case RAIDFRAME_SET_ROOT:
   1111 		d = rf_set_rootpartition(raidPtr, *(int *) data);
   1112 		printf("raid%d: New rootpartition value is: %d\n",
   1113 		       raidPtr->raidid, d);
   1114 		*(int *) data = d;
   1115 		return (retcode);
   1116 
   1117 		/* initialize all parity */
   1118 	case RAIDFRAME_REWRITEPARITY:
   1119 
   1120 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1121 			/* Parity for RAID 0 is trivially correct */
   1122 			raidPtr->parity_good = RF_RAID_CLEAN;
   1123 			return(0);
   1124 		}
   1125 
   1126 		if (raidPtr->parity_rewrite_in_progress == 1) {
   1127 			/* Re-write is already in progress! */
   1128 			return(EINVAL);
   1129 		}
   1130 
   1131 		retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
   1132 					   rf_RewriteParityThread,
   1133 					   raidPtr,"raid_parity");
   1134 		return (retcode);
   1135 
   1136 
   1137 	case RAIDFRAME_ADD_HOT_SPARE:
   1138 		sparePtr = (RF_SingleComponent_t *) data;
   1139 		memcpy( &hot_spare, sparePtr, sizeof(RF_SingleComponent_t));
   1140 		retcode = rf_add_hot_spare(raidPtr, &hot_spare);
   1141 		return(retcode);
   1142 
   1143 	case RAIDFRAME_REMOVE_HOT_SPARE:
   1144 		return(retcode);
   1145 
   1146 	case RAIDFRAME_DELETE_COMPONENT:
   1147 		componentPtr = (RF_SingleComponent_t *)data;
   1148 		memcpy( &component, componentPtr,
   1149 			sizeof(RF_SingleComponent_t));
   1150 		retcode = rf_delete_component(raidPtr, &component);
   1151 		return(retcode);
   1152 
   1153 	case RAIDFRAME_INCORPORATE_HOT_SPARE:
   1154 		componentPtr = (RF_SingleComponent_t *)data;
   1155 		memcpy( &component, componentPtr,
   1156 			sizeof(RF_SingleComponent_t));
   1157 		retcode = rf_incorporate_hot_spare(raidPtr, &component);
   1158 		return(retcode);
   1159 
   1160 	case RAIDFRAME_REBUILD_IN_PLACE:
   1161 
   1162 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1163 			/* Can't do this on a RAID 0!! */
   1164 			return(EINVAL);
   1165 		}
   1166 
   1167 		if (raidPtr->recon_in_progress == 1) {
   1168 			/* a reconstruct is already in progress! */
   1169 			return(EINVAL);
   1170 		}
   1171 
   1172 		componentPtr = (RF_SingleComponent_t *) data;
   1173 		memcpy( &component, componentPtr,
   1174 			sizeof(RF_SingleComponent_t));
   1175 		row = component.row;
   1176 		column = component.column;
   1177 		printf("raid%d: Rebuild: %d %d\n", raidPtr->raidid,
   1178 		       row, column);
   1179 		if ((row < 0) || (row >= raidPtr->numRow) ||
   1180 		    (column < 0) || (column >= raidPtr->numCol)) {
   1181 			return(EINVAL);
   1182 		}
   1183 
   1184 		RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
   1185 		if (rrcopy == NULL)
   1186 			return(ENOMEM);
   1187 
   1188 		rrcopy->raidPtr = (void *) raidPtr;
   1189 		rrcopy->row = row;
   1190 		rrcopy->col = column;
   1191 
   1192 		retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
   1193 					   rf_ReconstructInPlaceThread,
   1194 					   rrcopy,"raid_reconip");
   1195 		return(retcode);
   1196 
   1197 	case RAIDFRAME_GET_INFO:
   1198 		if (!raidPtr->valid)
   1199 			return (ENODEV);
   1200 		ucfgp = (RF_DeviceConfig_t **) data;
   1201 		RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t),
   1202 			  (RF_DeviceConfig_t *));
   1203 		if (d_cfg == NULL)
   1204 			return (ENOMEM);
   1205 		memset((char *) d_cfg, 0, sizeof(RF_DeviceConfig_t));
   1206 		d_cfg->rows = raidPtr->numRow;
   1207 		d_cfg->cols = raidPtr->numCol;
   1208 		d_cfg->ndevs = raidPtr->numRow * raidPtr->numCol;
   1209 		if (d_cfg->ndevs >= RF_MAX_DISKS) {
   1210 			RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
   1211 			return (ENOMEM);
   1212 		}
   1213 		d_cfg->nspares = raidPtr->numSpare;
   1214 		if (d_cfg->nspares >= RF_MAX_DISKS) {
   1215 			RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
   1216 			return (ENOMEM);
   1217 		}
   1218 		d_cfg->maxqdepth = raidPtr->maxQueueDepth;
   1219 		d = 0;
   1220 		for (i = 0; i < d_cfg->rows; i++) {
   1221 			for (j = 0; j < d_cfg->cols; j++) {
   1222 				d_cfg->devs[d] = raidPtr->Disks[i][j];
   1223 				d++;
   1224 			}
   1225 		}
   1226 		for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) {
   1227 			d_cfg->spares[i] = raidPtr->Disks[0][j];
   1228 		}
   1229 		retcode = copyout((caddr_t) d_cfg, (caddr_t) * ucfgp,
   1230 				  sizeof(RF_DeviceConfig_t));
   1231 		RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
   1232 
   1233 		return (retcode);
   1234 
   1235 	case RAIDFRAME_CHECK_PARITY:
   1236 		*(int *) data = raidPtr->parity_good;
   1237 		return (0);
   1238 
   1239 	case RAIDFRAME_RESET_ACCTOTALS:
   1240 		memset(&raidPtr->acc_totals, 0, sizeof(raidPtr->acc_totals));
   1241 		return (0);
   1242 
   1243 	case RAIDFRAME_GET_ACCTOTALS:
   1244 		totals = (RF_AccTotals_t *) data;
   1245 		*totals = raidPtr->acc_totals;
   1246 		return (0);
   1247 
   1248 	case RAIDFRAME_KEEP_ACCTOTALS:
   1249 		raidPtr->keep_acc_totals = *(int *)data;
   1250 		return (0);
   1251 
   1252 	case RAIDFRAME_GET_SIZE:
   1253 		*(int *) data = raidPtr->totalSectors;
   1254 		return (0);
   1255 
   1256 		/* fail a disk & optionally start reconstruction */
   1257 	case RAIDFRAME_FAIL_DISK:
   1258 
   1259 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1260 			/* Can't do this on a RAID 0!! */
   1261 			return(EINVAL);
   1262 		}
   1263 
   1264 		rr = (struct rf_recon_req *) data;
   1265 
   1266 		if (rr->row < 0 || rr->row >= raidPtr->numRow
   1267 		    || rr->col < 0 || rr->col >= raidPtr->numCol)
   1268 			return (EINVAL);
   1269 
   1270 		printf("raid%d: Failing the disk: row: %d col: %d\n",
   1271 		       unit, rr->row, rr->col);
   1272 
   1273 		/* make a copy of the recon request so that we don't rely on
   1274 		 * the user's buffer */
   1275 		RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
   1276 		if (rrcopy == NULL)
   1277 			return(ENOMEM);
   1278 		memcpy(rrcopy, rr, sizeof(*rr));
   1279 		rrcopy->raidPtr = (void *) raidPtr;
   1280 
   1281 		retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
   1282 					   rf_ReconThread,
   1283 					   rrcopy,"raid_recon");
   1284 		return (0);
   1285 
   1286 		/* invoke a copyback operation after recon on whatever disk
   1287 		 * needs it, if any */
   1288 	case RAIDFRAME_COPYBACK:
   1289 
   1290 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1291 			/* This makes no sense on a RAID 0!! */
   1292 			return(EINVAL);
   1293 		}
   1294 
   1295 		if (raidPtr->copyback_in_progress == 1) {
   1296 			/* Copyback is already in progress! */
   1297 			return(EINVAL);
   1298 		}
   1299 
   1300 		retcode = RF_CREATE_THREAD(raidPtr->copyback_thread,
   1301 					   rf_CopybackThread,
   1302 					   raidPtr,"raid_copyback");
   1303 		return (retcode);
   1304 
   1305 		/* return the percentage completion of reconstruction */
   1306 	case RAIDFRAME_CHECK_RECON_STATUS:
   1307 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1308 			/* This makes no sense on a RAID 0, so tell the
   1309 			   user it's done. */
   1310 			*(int *) data = 100;
   1311 			return(0);
   1312 		}
   1313 		row = 0; /* XXX we only consider a single row... */
   1314 		if (raidPtr->status[row] != rf_rs_reconstructing)
   1315 			*(int *) data = 100;
   1316 		else
   1317 			*(int *) data = raidPtr->reconControl[row]->percentComplete;
   1318 		return (0);
   1319 	case RAIDFRAME_CHECK_RECON_STATUS_EXT:
   1320 		progressInfoPtr = (RF_ProgressInfo_t **) data;
   1321 		row = 0; /* XXX we only consider a single row... */
   1322 		if (raidPtr->status[row] != rf_rs_reconstructing) {
   1323 			progressInfo.remaining = 0;
   1324 			progressInfo.completed = 100;
   1325 			progressInfo.total = 100;
   1326 		} else {
   1327 			progressInfo.total =
   1328 				raidPtr->reconControl[row]->numRUsTotal;
   1329 			progressInfo.completed =
   1330 				raidPtr->reconControl[row]->numRUsComplete;
   1331 			progressInfo.remaining = progressInfo.total -
   1332 				progressInfo.completed;
   1333 		}
   1334 		retcode = copyout((caddr_t) &progressInfo,
   1335 				  (caddr_t) *progressInfoPtr,
   1336 				  sizeof(RF_ProgressInfo_t));
   1337 		return (retcode);
   1338 
   1339 	case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
   1340 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1341 			/* This makes no sense on a RAID 0, so tell the
   1342 			   user it's done. */
   1343 			*(int *) data = 100;
   1344 			return(0);
   1345 		}
   1346 		if (raidPtr->parity_rewrite_in_progress == 1) {
   1347 			*(int *) data = 100 *
   1348 				raidPtr->parity_rewrite_stripes_done /
   1349 				raidPtr->Layout.numStripe;
   1350 		} else {
   1351 			*(int *) data = 100;
   1352 		}
   1353 		return (0);
   1354 
   1355 	case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
   1356 		progressInfoPtr = (RF_ProgressInfo_t **) data;
   1357 		if (raidPtr->parity_rewrite_in_progress == 1) {
   1358 			progressInfo.total = raidPtr->Layout.numStripe;
   1359 			progressInfo.completed =
   1360 				raidPtr->parity_rewrite_stripes_done;
   1361 			progressInfo.remaining = progressInfo.total -
   1362 				progressInfo.completed;
   1363 		} else {
   1364 			progressInfo.remaining = 0;
   1365 			progressInfo.completed = 100;
   1366 			progressInfo.total = 100;
   1367 		}
   1368 		retcode = copyout((caddr_t) &progressInfo,
   1369 				  (caddr_t) *progressInfoPtr,
   1370 				  sizeof(RF_ProgressInfo_t));
   1371 		return (retcode);
   1372 
   1373 	case RAIDFRAME_CHECK_COPYBACK_STATUS:
   1374 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1375 			/* This makes no sense on a RAID 0 */
   1376 			*(int *) data = 100;
   1377 			return(0);
   1378 		}
   1379 		if (raidPtr->copyback_in_progress == 1) {
   1380 			*(int *) data = 100 * raidPtr->copyback_stripes_done /
   1381 				raidPtr->Layout.numStripe;
   1382 		} else {
   1383 			*(int *) data = 100;
   1384 		}
   1385 		return (0);
   1386 
   1387 	case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
   1388 		progressInfoPtr = (RF_ProgressInfo_t **) data;
   1389 		if (raidPtr->copyback_in_progress == 1) {
   1390 			progressInfo.total = raidPtr->Layout.numStripe;
   1391 			progressInfo.completed =
   1392 				raidPtr->copyback_stripes_done;
   1393 			progressInfo.remaining = progressInfo.total -
   1394 				progressInfo.completed;
   1395 		} else {
   1396 			progressInfo.remaining = 0;
   1397 			progressInfo.completed = 100;
   1398 			progressInfo.total = 100;
   1399 		}
   1400 		retcode = copyout((caddr_t) &progressInfo,
   1401 				  (caddr_t) *progressInfoPtr,
   1402 				  sizeof(RF_ProgressInfo_t));
   1403 		return (retcode);
   1404 
   1405 		/* the sparetable daemon calls this to wait for the kernel to
   1406 		 * need a spare table. this ioctl does not return until a
   1407 		 * spare table is needed. XXX -- calling mpsleep here in the
   1408 		 * ioctl code is almost certainly wrong and evil. -- XXX XXX
   1409 		 * -- I should either compute the spare table in the kernel,
   1410 		 * or have a different -- XXX XXX -- interface (a different
   1411 		 * character device) for delivering the table     -- XXX */
   1412 #if 0
   1413 	case RAIDFRAME_SPARET_WAIT:
   1414 		RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1415 		while (!rf_sparet_wait_queue)
   1416 			mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE);
   1417 		waitreq = rf_sparet_wait_queue;
   1418 		rf_sparet_wait_queue = rf_sparet_wait_queue->next;
   1419 		RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1420 
   1421 		/* structure assignment */
   1422 		*((RF_SparetWait_t *) data) = *waitreq;
   1423 
   1424 		RF_Free(waitreq, sizeof(*waitreq));
   1425 		return (0);
   1426 
   1427 		/* wakes up a process waiting on SPARET_WAIT and puts an error
   1428 		 * code in it that will cause the dameon to exit */
   1429 	case RAIDFRAME_ABORT_SPARET_WAIT:
   1430 		RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
   1431 		waitreq->fcol = -1;
   1432 		RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1433 		waitreq->next = rf_sparet_wait_queue;
   1434 		rf_sparet_wait_queue = waitreq;
   1435 		RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1436 		wakeup(&rf_sparet_wait_queue);
   1437 		return (0);
   1438 
   1439 		/* used by the spare table daemon to deliver a spare table
   1440 		 * into the kernel */
   1441 	case RAIDFRAME_SEND_SPARET:
   1442 
   1443 		/* install the spare table */
   1444 		retcode = rf_SetSpareTable(raidPtr, *(void **) data);
   1445 
   1446 		/* respond to the requestor.  the return status of the spare
   1447 		 * table installation is passed in the "fcol" field */
   1448 		RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
   1449 		waitreq->fcol = retcode;
   1450 		RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1451 		waitreq->next = rf_sparet_resp_queue;
   1452 		rf_sparet_resp_queue = waitreq;
   1453 		wakeup(&rf_sparet_resp_queue);
   1454 		RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1455 
   1456 		return (retcode);
   1457 #endif
   1458 
   1459 	default:
   1460 		break; /* fall through to the os-specific code below */
   1461 
   1462 	}
   1463 
   1464 	if (!raidPtr->valid)
   1465 		return (EINVAL);
   1466 
   1467 	/*
   1468 	 * Add support for "regular" device ioctls here.
   1469 	 */
   1470 
   1471 	switch (cmd) {
   1472 	case DIOCGDINFO:
   1473 		*(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
   1474 		break;
   1475 #ifdef __HAVE_OLD_DISKLABEL
   1476 	case ODIOCGDINFO:
   1477 		newlabel = *(rs->sc_dkdev.dk_label);
   1478 		if (newlabel.d_npartitions > OLDMAXPARTITIONS)
   1479 			return ENOTTY;
   1480 		memcpy(data, &newlabel, sizeof (struct olddisklabel));
   1481 		break;
   1482 #endif
   1483 
   1484 	case DIOCGPART:
   1485 		((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
   1486 		((struct partinfo *) data)->part =
   1487 		    &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
   1488 		break;
   1489 
   1490 	case DIOCWDINFO:
   1491 	case DIOCSDINFO:
   1492 #ifdef __HAVE_OLD_DISKLABEL
   1493 	case ODIOCWDINFO:
   1494 	case ODIOCSDINFO:
   1495 #endif
   1496 	{
   1497 		struct disklabel *lp;
   1498 #ifdef __HAVE_OLD_DISKLABEL
   1499 		if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) {
   1500 			memset(&newlabel, 0, sizeof newlabel);
   1501 			memcpy(&newlabel, data, sizeof (struct olddisklabel));
   1502 			lp = &newlabel;
   1503 		} else
   1504 #endif
   1505 		lp = (struct disklabel *)data;
   1506 
   1507 		if ((error = raidlock(rs)) != 0)
   1508 			return (error);
   1509 
   1510 		rs->sc_flags |= RAIDF_LABELLING;
   1511 
   1512 		error = setdisklabel(rs->sc_dkdev.dk_label,
   1513 		    lp, 0, rs->sc_dkdev.dk_cpulabel);
   1514 		if (error == 0) {
   1515 			if (cmd == DIOCWDINFO
   1516 #ifdef __HAVE_OLD_DISKLABEL
   1517 			    || cmd == ODIOCWDINFO
   1518 #endif
   1519 			   )
   1520 				error = writedisklabel(RAIDLABELDEV(dev),
   1521 				    raidstrategy, rs->sc_dkdev.dk_label,
   1522 				    rs->sc_dkdev.dk_cpulabel);
   1523 		}
   1524 		rs->sc_flags &= ~RAIDF_LABELLING;
   1525 
   1526 		raidunlock(rs);
   1527 
   1528 		if (error)
   1529 			return (error);
   1530 		break;
   1531 	}
   1532 
   1533 	case DIOCWLABEL:
   1534 		if (*(int *) data != 0)
   1535 			rs->sc_flags |= RAIDF_WLABEL;
   1536 		else
   1537 			rs->sc_flags &= ~RAIDF_WLABEL;
   1538 		break;
   1539 
   1540 	case DIOCGDEFLABEL:
   1541 		raidgetdefaultlabel(raidPtr, rs, (struct disklabel *) data);
   1542 		break;
   1543 
   1544 #ifdef __HAVE_OLD_DISKLABEL
   1545 	case ODIOCGDEFLABEL:
   1546 		raidgetdefaultlabel(raidPtr, rs, &newlabel);
   1547 		if (newlabel.d_npartitions > OLDMAXPARTITIONS)
   1548 			return ENOTTY;
   1549 		memcpy(data, &newlabel, sizeof (struct olddisklabel));
   1550 		break;
   1551 #endif
   1552 
   1553 	default:
   1554 		retcode = ENOTTY;
   1555 	}
   1556 	return (retcode);
   1557 
   1558 }
   1559 
   1560 
   1561 /* raidinit -- complete the rest of the initialization for the
   1562    RAIDframe device.  */
   1563 
   1564 
   1565 static void
   1566 raidinit(raidPtr)
   1567 	RF_Raid_t *raidPtr;
   1568 {
   1569 	struct raid_softc *rs;
   1570 	int     unit;
   1571 
   1572 	unit = raidPtr->raidid;
   1573 
   1574 	rs = &raid_softc[unit];
   1575 
   1576 	/* XXX should check return code first... */
   1577 	rs->sc_flags |= RAIDF_INITED;
   1578 
   1579 	sprintf(rs->sc_xname, "raid%d", unit);	/* XXX doesn't check bounds. */
   1580 
   1581 	rs->sc_dkdev.dk_name = rs->sc_xname;
   1582 
   1583 	/* disk_attach actually creates space for the CPU disklabel, among
   1584 	 * other things, so it's critical to call this *BEFORE* we try putzing
   1585 	 * with disklabels. */
   1586 
   1587 	disk_attach(&rs->sc_dkdev);
   1588 
   1589 	/* XXX There may be a weird interaction here between this, and
   1590 	 * protectedSectors, as used in RAIDframe.  */
   1591 
   1592 	rs->sc_size = raidPtr->totalSectors;
   1593 
   1594 }
   1595 
   1596 /* wake up the daemon & tell it to get us a spare table
   1597  * XXX
   1598  * the entries in the queues should be tagged with the raidPtr
   1599  * so that in the extremely rare case that two recons happen at once,
   1600  * we know for which device were requesting a spare table
   1601  * XXX
   1602  *
   1603  * XXX This code is not currently used. GO
   1604  */
   1605 int
   1606 rf_GetSpareTableFromDaemon(req)
   1607 	RF_SparetWait_t *req;
   1608 {
   1609 	int     retcode;
   1610 
   1611 	RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1612 	req->next = rf_sparet_wait_queue;
   1613 	rf_sparet_wait_queue = req;
   1614 	wakeup(&rf_sparet_wait_queue);
   1615 
   1616 	/* mpsleep unlocks the mutex */
   1617 	while (!rf_sparet_resp_queue) {
   1618 		tsleep(&rf_sparet_resp_queue, PRIBIO,
   1619 		    "raidframe getsparetable", 0);
   1620 	}
   1621 	req = rf_sparet_resp_queue;
   1622 	rf_sparet_resp_queue = req->next;
   1623 	RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1624 
   1625 	retcode = req->fcol;
   1626 	RF_Free(req, sizeof(*req));	/* this is not the same req as we
   1627 					 * alloc'd */
   1628 	return (retcode);
   1629 }
   1630 
   1631 /* a wrapper around rf_DoAccess that extracts appropriate info from the
   1632  * bp & passes it down.
   1633  * any calls originating in the kernel must use non-blocking I/O
   1634  * do some extra sanity checking to return "appropriate" error values for
   1635  * certain conditions (to make some standard utilities work)
   1636  *
   1637  * Formerly known as: rf_DoAccessKernel
   1638  */
   1639 void
   1640 raidstart(raidPtr)
   1641 	RF_Raid_t *raidPtr;
   1642 {
   1643 	RF_SectorCount_t num_blocks, pb, sum;
   1644 	RF_RaidAddr_t raid_addr;
   1645 	int     retcode;
   1646 	struct partition *pp;
   1647 	daddr_t blocknum;
   1648 	int     unit;
   1649 	struct raid_softc *rs;
   1650 	int     do_async;
   1651 	struct buf *bp;
   1652 
   1653 	unit = raidPtr->raidid;
   1654 	rs = &raid_softc[unit];
   1655 
   1656 	/* quick check to see if anything has died recently */
   1657 	RF_LOCK_MUTEX(raidPtr->mutex);
   1658 	if (raidPtr->numNewFailures > 0) {
   1659 		rf_update_component_labels(raidPtr,
   1660 					   RF_NORMAL_COMPONENT_UPDATE);
   1661 		raidPtr->numNewFailures--;
   1662 	}
   1663 
   1664 	/* Check to see if we're at the limit... */
   1665 	while (raidPtr->openings > 0) {
   1666 		RF_UNLOCK_MUTEX(raidPtr->mutex);
   1667 
   1668 		/* get the next item, if any, from the queue */
   1669 		if ((bp = BUFQ_GET(&rs->buf_queue)) == NULL) {
   1670 			/* nothing more to do */
   1671 			return;
   1672 		}
   1673 
   1674 		/* Ok, for the bp we have here, bp->b_blkno is relative to the
   1675 		 * partition.. Need to make it absolute to the underlying
   1676 		 * device.. */
   1677 
   1678 		blocknum = bp->b_blkno;
   1679 		if (DISKPART(bp->b_dev) != RAW_PART) {
   1680 			pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
   1681 			blocknum += pp->p_offset;
   1682 		}
   1683 
   1684 		db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
   1685 			    (int) blocknum));
   1686 
   1687 		db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
   1688 		db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
   1689 
   1690 		/* *THIS* is where we adjust what block we're going to...
   1691 		 * but DO NOT TOUCH bp->b_blkno!!! */
   1692 		raid_addr = blocknum;
   1693 
   1694 		num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
   1695 		pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
   1696 		sum = raid_addr + num_blocks + pb;
   1697 		if (1 || rf_debugKernelAccess) {
   1698 			db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
   1699 				    (int) raid_addr, (int) sum, (int) num_blocks,
   1700 				    (int) pb, (int) bp->b_resid));
   1701 		}
   1702 		if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
   1703 		    || (sum < num_blocks) || (sum < pb)) {
   1704 			bp->b_error = ENOSPC;
   1705 			bp->b_flags |= B_ERROR;
   1706 			bp->b_resid = bp->b_bcount;
   1707 			biodone(bp);
   1708 			RF_LOCK_MUTEX(raidPtr->mutex);
   1709 			continue;
   1710 		}
   1711 		/*
   1712 		 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
   1713 		 */
   1714 
   1715 		if (bp->b_bcount & raidPtr->sectorMask) {
   1716 			bp->b_error = EINVAL;
   1717 			bp->b_flags |= B_ERROR;
   1718 			bp->b_resid = bp->b_bcount;
   1719 			biodone(bp);
   1720 			RF_LOCK_MUTEX(raidPtr->mutex);
   1721 			continue;
   1722 
   1723 		}
   1724 		db1_printf(("Calling DoAccess..\n"));
   1725 
   1726 
   1727 		RF_LOCK_MUTEX(raidPtr->mutex);
   1728 		raidPtr->openings--;
   1729 		RF_UNLOCK_MUTEX(raidPtr->mutex);
   1730 
   1731 		/*
   1732 		 * Everything is async.
   1733 		 */
   1734 		do_async = 1;
   1735 
   1736 		disk_busy(&rs->sc_dkdev);
   1737 
   1738 		/* XXX we're still at splbio() here... do we *really*
   1739 		   need to be? */
   1740 
   1741 		/* don't ever condition on bp->b_flags & B_WRITE.
   1742 		 * always condition on B_READ instead */
   1743 
   1744 		retcode = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
   1745 				      RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
   1746 				      do_async, raid_addr, num_blocks,
   1747 				      bp->b_data, bp, RF_DAG_NONBLOCKING_IO);
   1748 
   1749 		RF_LOCK_MUTEX(raidPtr->mutex);
   1750 	}
   1751 	RF_UNLOCK_MUTEX(raidPtr->mutex);
   1752 }
   1753 
   1754 
   1755 
   1756 
   1757 /* invoke an I/O from kernel mode.  Disk queue should be locked upon entry */
   1758 
   1759 int
   1760 rf_DispatchKernelIO(queue, req)
   1761 	RF_DiskQueue_t *queue;
   1762 	RF_DiskQueueData_t *req;
   1763 {
   1764 	int     op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
   1765 	struct buf *bp;
   1766 	struct raidbuf *raidbp = NULL;
   1767 
   1768 	req->queue = queue;
   1769 
   1770 #if DIAGNOSTIC
   1771 	if (queue->raidPtr->raidid >= numraid) {
   1772 		printf("Invalid unit number: %d %d\n", unit, numraid);
   1773 		panic("Invalid Unit number in rf_DispatchKernelIO\n");
   1774 	}
   1775 #endif
   1776 
   1777 	bp = req->bp;
   1778 #if 1
   1779 	/* XXX when there is a physical disk failure, someone is passing us a
   1780 	 * buffer that contains old stuff!!  Attempt to deal with this problem
   1781 	 * without taking a performance hit... (not sure where the real bug
   1782 	 * is.  It's buried in RAIDframe somewhere) :-(  GO ) */
   1783 
   1784 	if (bp->b_flags & B_ERROR) {
   1785 		bp->b_flags &= ~B_ERROR;
   1786 	}
   1787 	if (bp->b_error != 0) {
   1788 		bp->b_error = 0;
   1789 	}
   1790 #endif
   1791 	raidbp = RAIDGETBUF(rs);
   1792 
   1793 	/*
   1794 	 * context for raidiodone
   1795 	 */
   1796 	raidbp->rf_obp = bp;
   1797 	raidbp->req = req;
   1798 
   1799 	LIST_INIT(&raidbp->rf_buf.b_dep);
   1800 
   1801 	switch (req->type) {
   1802 	case RF_IO_TYPE_NOP:	/* used primarily to unlock a locked queue */
   1803 		/* XXX need to do something extra here.. */
   1804 		/* I'm leaving this in, as I've never actually seen it used,
   1805 		 * and I'd like folks to report it... GO */
   1806 		printf(("WAKEUP CALLED\n"));
   1807 		queue->numOutstanding++;
   1808 
   1809 		/* XXX need to glue the original buffer into this??  */
   1810 
   1811 		KernelWakeupFunc(&raidbp->rf_buf);
   1812 		break;
   1813 
   1814 	case RF_IO_TYPE_READ:
   1815 	case RF_IO_TYPE_WRITE:
   1816 
   1817 		if (req->tracerec) {
   1818 			RF_ETIMER_START(req->tracerec->timer);
   1819 		}
   1820 		InitBP(&raidbp->rf_buf, queue->rf_cinfo->ci_vp,
   1821 		    op | bp->b_flags, queue->rf_cinfo->ci_dev,
   1822 		    req->sectorOffset, req->numSector,
   1823 		    req->buf, KernelWakeupFunc, (void *) req,
   1824 		    queue->raidPtr->logBytesPerSector, req->b_proc);
   1825 
   1826 		if (rf_debugKernelAccess) {
   1827 			db1_printf(("dispatch: bp->b_blkno = %ld\n",
   1828 				(long) bp->b_blkno));
   1829 		}
   1830 		queue->numOutstanding++;
   1831 		queue->last_deq_sector = req->sectorOffset;
   1832 		/* acc wouldn't have been let in if there were any pending
   1833 		 * reqs at any other priority */
   1834 		queue->curPriority = req->priority;
   1835 
   1836 		db1_printf(("Going for %c to unit %d row %d col %d\n",
   1837 			    req->type, queue->raidPtr->raidid,
   1838 			    queue->row, queue->col));
   1839 		db1_printf(("sector %d count %d (%d bytes) %d\n",
   1840 			(int) req->sectorOffset, (int) req->numSector,
   1841 			(int) (req->numSector <<
   1842 			    queue->raidPtr->logBytesPerSector),
   1843 			(int) queue->raidPtr->logBytesPerSector));
   1844 		if ((raidbp->rf_buf.b_flags & B_READ) == 0) {
   1845 			raidbp->rf_buf.b_vp->v_numoutput++;
   1846 		}
   1847 		VOP_STRATEGY(&raidbp->rf_buf);
   1848 
   1849 		break;
   1850 
   1851 	default:
   1852 		panic("bad req->type in rf_DispatchKernelIO");
   1853 	}
   1854 	db1_printf(("Exiting from DispatchKernelIO\n"));
   1855 
   1856 	return (0);
   1857 }
   1858 /* this is the callback function associated with a I/O invoked from
   1859    kernel code.
   1860  */
   1861 static void
   1862 KernelWakeupFunc(vbp)
   1863 	struct buf *vbp;
   1864 {
   1865 	RF_DiskQueueData_t *req = NULL;
   1866 	RF_DiskQueue_t *queue;
   1867 	struct raidbuf *raidbp = (struct raidbuf *) vbp;
   1868 	struct buf *bp;
   1869 	struct raid_softc *rs;
   1870 	int     unit;
   1871 	int s;
   1872 
   1873 	s = splbio();
   1874 	db1_printf(("recovering the request queue:\n"));
   1875 	req = raidbp->req;
   1876 
   1877 	bp = raidbp->rf_obp;
   1878 
   1879 	queue = (RF_DiskQueue_t *) req->queue;
   1880 
   1881 	if (raidbp->rf_buf.b_flags & B_ERROR) {
   1882 		bp->b_flags |= B_ERROR;
   1883 		bp->b_error = raidbp->rf_buf.b_error ?
   1884 		    raidbp->rf_buf.b_error : EIO;
   1885 	}
   1886 
   1887 	/* XXX methinks this could be wrong... */
   1888 #if 1
   1889 	bp->b_resid = raidbp->rf_buf.b_resid;
   1890 #endif
   1891 
   1892 	if (req->tracerec) {
   1893 		RF_ETIMER_STOP(req->tracerec->timer);
   1894 		RF_ETIMER_EVAL(req->tracerec->timer);
   1895 		RF_LOCK_MUTEX(rf_tracing_mutex);
   1896 		req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
   1897 		req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
   1898 		req->tracerec->num_phys_ios++;
   1899 		RF_UNLOCK_MUTEX(rf_tracing_mutex);
   1900 	}
   1901 	bp->b_bcount = raidbp->rf_buf.b_bcount;	/* XXXX ?? */
   1902 
   1903 	unit = queue->raidPtr->raidid;	/* *Much* simpler :-> */
   1904 
   1905 
   1906 	/* XXX Ok, let's get aggressive... If B_ERROR is set, let's go
   1907 	 * ballistic, and mark the component as hosed... */
   1908 
   1909 	if (bp->b_flags & B_ERROR) {
   1910 		/* Mark the disk as dead */
   1911 		/* but only mark it once... */
   1912 		if (queue->raidPtr->Disks[queue->row][queue->col].status ==
   1913 		    rf_ds_optimal) {
   1914 			printf("raid%d: IO Error.  Marking %s as failed.\n",
   1915 			    unit, queue->raidPtr->Disks[queue->row][queue->col].devname);
   1916 			queue->raidPtr->Disks[queue->row][queue->col].status =
   1917 			    rf_ds_failed;
   1918 			queue->raidPtr->status[queue->row] = rf_rs_degraded;
   1919 			queue->raidPtr->numFailures++;
   1920 			queue->raidPtr->numNewFailures++;
   1921 		} else {	/* Disk is already dead... */
   1922 			/* printf("Disk already marked as dead!\n"); */
   1923 		}
   1924 
   1925 	}
   1926 
   1927 	rs = &raid_softc[unit];
   1928 	RAIDPUTBUF(rs, raidbp);
   1929 
   1930 	rf_DiskIOComplete(queue, req, (bp->b_flags & B_ERROR) ? 1 : 0);
   1931 	(req->CompleteFunc) (req->argument, (bp->b_flags & B_ERROR) ? 1 : 0);
   1932 
   1933 	splx(s);
   1934 }
   1935 
   1936 
   1937 
   1938 /*
   1939  * initialize a buf structure for doing an I/O in the kernel.
   1940  */
   1941 static void
   1942 InitBP(bp, b_vp, rw_flag, dev, startSect, numSect, buf, cbFunc, cbArg,
   1943        logBytesPerSector, b_proc)
   1944 	struct buf *bp;
   1945 	struct vnode *b_vp;
   1946 	unsigned rw_flag;
   1947 	dev_t dev;
   1948 	RF_SectorNum_t startSect;
   1949 	RF_SectorCount_t numSect;
   1950 	caddr_t buf;
   1951 	void (*cbFunc) (struct buf *);
   1952 	void *cbArg;
   1953 	int logBytesPerSector;
   1954 	struct proc *b_proc;
   1955 {
   1956 	/* bp->b_flags       = B_PHYS | rw_flag; */
   1957 	bp->b_flags = B_CALL | rw_flag;	/* XXX need B_PHYS here too??? */
   1958 	bp->b_bcount = numSect << logBytesPerSector;
   1959 	bp->b_bufsize = bp->b_bcount;
   1960 	bp->b_error = 0;
   1961 	bp->b_dev = dev;
   1962 	bp->b_data = buf;
   1963 	bp->b_blkno = startSect;
   1964 	bp->b_resid = bp->b_bcount;	/* XXX is this right!??!?!! */
   1965 	if (bp->b_bcount == 0) {
   1966 		panic("bp->b_bcount is zero in InitBP!!\n");
   1967 	}
   1968 	bp->b_proc = b_proc;
   1969 	bp->b_iodone = cbFunc;
   1970 	bp->b_vp = b_vp;
   1971 
   1972 }
   1973 
   1974 static void
   1975 raidgetdefaultlabel(raidPtr, rs, lp)
   1976 	RF_Raid_t *raidPtr;
   1977 	struct raid_softc *rs;
   1978 	struct disklabel *lp;
   1979 {
   1980 	db1_printf(("Building a default label...\n"));
   1981 	memset(lp, 0, sizeof(*lp));
   1982 
   1983 	/* fabricate a label... */
   1984 	lp->d_secperunit = raidPtr->totalSectors;
   1985 	lp->d_secsize = raidPtr->bytesPerSector;
   1986 	lp->d_nsectors = raidPtr->Layout.dataSectorsPerStripe;
   1987 	lp->d_ntracks = 4 * raidPtr->numCol;
   1988 	lp->d_ncylinders = raidPtr->totalSectors /
   1989 		(lp->d_nsectors * lp->d_ntracks);
   1990 	lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
   1991 
   1992 	strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
   1993 	lp->d_type = DTYPE_RAID;
   1994 	strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
   1995 	lp->d_rpm = 3600;
   1996 	lp->d_interleave = 1;
   1997 	lp->d_flags = 0;
   1998 
   1999 	lp->d_partitions[RAW_PART].p_offset = 0;
   2000 	lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors;
   2001 	lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
   2002 	lp->d_npartitions = RAW_PART + 1;
   2003 
   2004 	lp->d_magic = DISKMAGIC;
   2005 	lp->d_magic2 = DISKMAGIC;
   2006 	lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
   2007 
   2008 }
   2009 /*
   2010  * Read the disklabel from the raid device.  If one is not present, fake one
   2011  * up.
   2012  */
   2013 static void
   2014 raidgetdisklabel(dev)
   2015 	dev_t   dev;
   2016 {
   2017 	int     unit = raidunit(dev);
   2018 	struct raid_softc *rs = &raid_softc[unit];
   2019 	char   *errstring;
   2020 	struct disklabel *lp = rs->sc_dkdev.dk_label;
   2021 	struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel;
   2022 	RF_Raid_t *raidPtr;
   2023 
   2024 	db1_printf(("Getting the disklabel...\n"));
   2025 
   2026 	memset(clp, 0, sizeof(*clp));
   2027 
   2028 	raidPtr = raidPtrs[unit];
   2029 
   2030 	raidgetdefaultlabel(raidPtr, rs, lp);
   2031 
   2032 	/*
   2033 	 * Call the generic disklabel extraction routine.
   2034 	 */
   2035 	errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
   2036 	    rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
   2037 	if (errstring)
   2038 		raidmakedisklabel(rs);
   2039 	else {
   2040 		int     i;
   2041 		struct partition *pp;
   2042 
   2043 		/*
   2044 		 * Sanity check whether the found disklabel is valid.
   2045 		 *
   2046 		 * This is necessary since total size of the raid device
   2047 		 * may vary when an interleave is changed even though exactly
   2048 		 * same componets are used, and old disklabel may used
   2049 		 * if that is found.
   2050 		 */
   2051 		if (lp->d_secperunit != rs->sc_size)
   2052 			printf("raid%d: WARNING: %s: "
   2053 			    "total sector size in disklabel (%d) != "
   2054 			    "the size of raid (%ld)\n", unit, rs->sc_xname,
   2055 			    lp->d_secperunit, (long) rs->sc_size);
   2056 		for (i = 0; i < lp->d_npartitions; i++) {
   2057 			pp = &lp->d_partitions[i];
   2058 			if (pp->p_offset + pp->p_size > rs->sc_size)
   2059 				printf("raid%d: WARNING: %s: end of partition `%c' "
   2060 				       "exceeds the size of raid (%ld)\n",
   2061 				       unit, rs->sc_xname, 'a' + i, (long) rs->sc_size);
   2062 		}
   2063 	}
   2064 
   2065 }
   2066 /*
   2067  * Take care of things one might want to take care of in the event
   2068  * that a disklabel isn't present.
   2069  */
   2070 static void
   2071 raidmakedisklabel(rs)
   2072 	struct raid_softc *rs;
   2073 {
   2074 	struct disklabel *lp = rs->sc_dkdev.dk_label;
   2075 	db1_printf(("Making a label..\n"));
   2076 
   2077 	/*
   2078 	 * For historical reasons, if there's no disklabel present
   2079 	 * the raw partition must be marked FS_BSDFFS.
   2080 	 */
   2081 
   2082 	lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
   2083 
   2084 	strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
   2085 
   2086 	lp->d_checksum = dkcksum(lp);
   2087 }
   2088 /*
   2089  * Lookup the provided name in the filesystem.  If the file exists,
   2090  * is a valid block device, and isn't being used by anyone else,
   2091  * set *vpp to the file's vnode.
   2092  * You'll find the original of this in ccd.c
   2093  */
   2094 int
   2095 raidlookup(path, p, vpp)
   2096 	char   *path;
   2097 	struct proc *p;
   2098 	struct vnode **vpp;	/* result */
   2099 {
   2100 	struct nameidata nd;
   2101 	struct vnode *vp;
   2102 	struct vattr va;
   2103 	int     error;
   2104 
   2105 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, path, p);
   2106 	if ((error = vn_open(&nd, FREAD | FWRITE, 0)) != 0) {
   2107 #if 0
   2108 		printf("RAIDframe: vn_open returned %d\n", error);
   2109 #endif
   2110 		return (error);
   2111 	}
   2112 	vp = nd.ni_vp;
   2113 	if (vp->v_usecount > 1) {
   2114 		VOP_UNLOCK(vp, 0);
   2115 		(void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
   2116 		return (EBUSY);
   2117 	}
   2118 	if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)) != 0) {
   2119 		VOP_UNLOCK(vp, 0);
   2120 		(void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
   2121 		return (error);
   2122 	}
   2123 	/* XXX: eventually we should handle VREG, too. */
   2124 	if (va.va_type != VBLK) {
   2125 		VOP_UNLOCK(vp, 0);
   2126 		(void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
   2127 		return (ENOTBLK);
   2128 	}
   2129 	VOP_UNLOCK(vp, 0);
   2130 	*vpp = vp;
   2131 	return (0);
   2132 }
   2133 /*
   2134  * Wait interruptibly for an exclusive lock.
   2135  *
   2136  * XXX
   2137  * Several drivers do this; it should be abstracted and made MP-safe.
   2138  * (Hmm... where have we seen this warning before :->  GO )
   2139  */
   2140 static int
   2141 raidlock(rs)
   2142 	struct raid_softc *rs;
   2143 {
   2144 	int     error;
   2145 
   2146 	while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
   2147 		rs->sc_flags |= RAIDF_WANTED;
   2148 		if ((error =
   2149 			tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
   2150 			return (error);
   2151 	}
   2152 	rs->sc_flags |= RAIDF_LOCKED;
   2153 	return (0);
   2154 }
   2155 /*
   2156  * Unlock and wake up any waiters.
   2157  */
   2158 static void
   2159 raidunlock(rs)
   2160 	struct raid_softc *rs;
   2161 {
   2162 
   2163 	rs->sc_flags &= ~RAIDF_LOCKED;
   2164 	if ((rs->sc_flags & RAIDF_WANTED) != 0) {
   2165 		rs->sc_flags &= ~RAIDF_WANTED;
   2166 		wakeup(rs);
   2167 	}
   2168 }
   2169 
   2170 
   2171 #define RF_COMPONENT_INFO_OFFSET  16384 /* bytes */
   2172 #define RF_COMPONENT_INFO_SIZE     1024 /* bytes */
   2173 
   2174 int
   2175 raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter)
   2176 {
   2177 	RF_ComponentLabel_t clabel;
   2178 	raidread_component_label(dev, b_vp, &clabel);
   2179 	clabel.mod_counter = mod_counter;
   2180 	clabel.clean = RF_RAID_CLEAN;
   2181 	raidwrite_component_label(dev, b_vp, &clabel);
   2182 	return(0);
   2183 }
   2184 
   2185 
   2186 int
   2187 raidmarkdirty(dev_t dev, struct vnode *b_vp, int mod_counter)
   2188 {
   2189 	RF_ComponentLabel_t clabel;
   2190 	raidread_component_label(dev, b_vp, &clabel);
   2191 	clabel.mod_counter = mod_counter;
   2192 	clabel.clean = RF_RAID_DIRTY;
   2193 	raidwrite_component_label(dev, b_vp, &clabel);
   2194 	return(0);
   2195 }
   2196 
   2197 /* ARGSUSED */
   2198 int
   2199 raidread_component_label(dev, b_vp, clabel)
   2200 	dev_t dev;
   2201 	struct vnode *b_vp;
   2202 	RF_ComponentLabel_t *clabel;
   2203 {
   2204 	struct buf *bp;
   2205 	const struct bdevsw *bdev;
   2206 	int error;
   2207 
   2208 	/* XXX should probably ensure that we don't try to do this if
   2209 	   someone has changed rf_protected_sectors. */
   2210 
   2211 	if (b_vp == NULL) {
   2212 		/* For whatever reason, this component is not valid.
   2213 		   Don't try to read a component label from it. */
   2214 		return(EINVAL);
   2215 	}
   2216 
   2217 	/* get a block of the appropriate size... */
   2218 	bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
   2219 	bp->b_dev = dev;
   2220 
   2221 	/* get our ducks in a row for the read */
   2222 	bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
   2223 	bp->b_bcount = RF_COMPONENT_INFO_SIZE;
   2224 	bp->b_flags |= B_READ;
   2225  	bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
   2226 
   2227 	bdev = bdevsw_lookup(bp->b_dev);
   2228 	if (bdev == NULL)
   2229 		return (ENXIO);
   2230 	(*bdev->d_strategy)(bp);
   2231 
   2232 	error = biowait(bp);
   2233 
   2234 	if (!error) {
   2235 		memcpy(clabel, bp->b_data,
   2236 		       sizeof(RF_ComponentLabel_t));
   2237 #if 0
   2238 		rf_print_component_label( clabel );
   2239 #endif
   2240         } else {
   2241 #if 0
   2242 		printf("Failed to read RAID component label!\n");
   2243 #endif
   2244 	}
   2245 
   2246 	brelse(bp);
   2247 	return(error);
   2248 }
   2249 /* ARGSUSED */
   2250 int
   2251 raidwrite_component_label(dev, b_vp, clabel)
   2252 	dev_t dev;
   2253 	struct vnode *b_vp;
   2254 	RF_ComponentLabel_t *clabel;
   2255 {
   2256 	struct buf *bp;
   2257 	const struct bdevsw *bdev;
   2258 	int error;
   2259 
   2260 	/* get a block of the appropriate size... */
   2261 	bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
   2262 	bp->b_dev = dev;
   2263 
   2264 	/* get our ducks in a row for the write */
   2265 	bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
   2266 	bp->b_bcount = RF_COMPONENT_INFO_SIZE;
   2267 	bp->b_flags |= B_WRITE;
   2268  	bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
   2269 
   2270 	memset(bp->b_data, 0, RF_COMPONENT_INFO_SIZE );
   2271 
   2272 	memcpy(bp->b_data, clabel, sizeof(RF_ComponentLabel_t));
   2273 
   2274 	bdev = bdevsw_lookup(bp->b_dev);
   2275 	if (bdev == NULL)
   2276 		return (ENXIO);
   2277 	(*bdev->d_strategy)(bp);
   2278 	error = biowait(bp);
   2279 	brelse(bp);
   2280 	if (error) {
   2281 #if 1
   2282 		printf("Failed to write RAID component info!\n");
   2283 #endif
   2284 	}
   2285 
   2286 	return(error);
   2287 }
   2288 
   2289 void
   2290 rf_markalldirty(raidPtr)
   2291 	RF_Raid_t *raidPtr;
   2292 {
   2293 	RF_ComponentLabel_t clabel;
   2294 	int r,c;
   2295 
   2296 	raidPtr->mod_counter++;
   2297 	for (r = 0; r < raidPtr->numRow; r++) {
   2298 		for (c = 0; c < raidPtr->numCol; c++) {
   2299 			/* we don't want to touch (at all) a disk that has
   2300 			   failed */
   2301 			if (!RF_DEAD_DISK(raidPtr->Disks[r][c].status)) {
   2302 				raidread_component_label(
   2303 					raidPtr->Disks[r][c].dev,
   2304 					raidPtr->raid_cinfo[r][c].ci_vp,
   2305 					&clabel);
   2306 				if (clabel.status == rf_ds_spared) {
   2307 					/* XXX do something special...
   2308 					 but whatever you do, don't
   2309 					 try to access it!! */
   2310 				} else {
   2311 #if 0
   2312 				clabel.status =
   2313 					raidPtr->Disks[r][c].status;
   2314 				raidwrite_component_label(
   2315 					raidPtr->Disks[r][c].dev,
   2316 					raidPtr->raid_cinfo[r][c].ci_vp,
   2317 					&clabel);
   2318 #endif
   2319 				raidmarkdirty(
   2320 				       raidPtr->Disks[r][c].dev,
   2321 				       raidPtr->raid_cinfo[r][c].ci_vp,
   2322 				       raidPtr->mod_counter);
   2323 				}
   2324 			}
   2325 		}
   2326 	}
   2327 	/* printf("Component labels marked dirty.\n"); */
   2328 #if 0
   2329 	for( c = 0; c < raidPtr->numSpare ; c++) {
   2330 		sparecol = raidPtr->numCol + c;
   2331 		if (raidPtr->Disks[r][sparecol].status == rf_ds_used_spare) {
   2332 			/*
   2333 
   2334 			   XXX this is where we get fancy and map this spare
   2335 			   into it's correct spot in the array.
   2336 
   2337 			 */
   2338 			/*
   2339 
   2340 			   we claim this disk is "optimal" if it's
   2341 			   rf_ds_used_spare, as that means it should be
   2342 			   directly substitutable for the disk it replaced.
   2343 			   We note that too...
   2344 
   2345 			 */
   2346 
   2347 			for(i=0;i<raidPtr->numRow;i++) {
   2348 				for(j=0;j<raidPtr->numCol;j++) {
   2349 					if ((raidPtr->Disks[i][j].spareRow ==
   2350 					     r) &&
   2351 					    (raidPtr->Disks[i][j].spareCol ==
   2352 					     sparecol)) {
   2353 						srow = r;
   2354 						scol = sparecol;
   2355 						break;
   2356 					}
   2357 				}
   2358 			}
   2359 
   2360 			raidread_component_label(
   2361 				      raidPtr->Disks[r][sparecol].dev,
   2362 				      raidPtr->raid_cinfo[r][sparecol].ci_vp,
   2363 				      &clabel);
   2364 			/* make sure status is noted */
   2365 			clabel.version = RF_COMPONENT_LABEL_VERSION;
   2366 			clabel.mod_counter = raidPtr->mod_counter;
   2367 			clabel.serial_number = raidPtr->serial_number;
   2368 			clabel.row = srow;
   2369 			clabel.column = scol;
   2370 			clabel.num_rows = raidPtr->numRow;
   2371 			clabel.num_columns = raidPtr->numCol;
   2372 			clabel.clean = RF_RAID_DIRTY; /* changed in a bit*/
   2373 			clabel.status = rf_ds_optimal;
   2374 			raidwrite_component_label(
   2375 				      raidPtr->Disks[r][sparecol].dev,
   2376 				      raidPtr->raid_cinfo[r][sparecol].ci_vp,
   2377 				      &clabel);
   2378 			raidmarkclean( raidPtr->Disks[r][sparecol].dev,
   2379 			              raidPtr->raid_cinfo[r][sparecol].ci_vp);
   2380 		}
   2381 	}
   2382 
   2383 #endif
   2384 }
   2385 
   2386 
   2387 void
   2388 rf_update_component_labels(raidPtr, final)
   2389 	RF_Raid_t *raidPtr;
   2390 	int final;
   2391 {
   2392 	RF_ComponentLabel_t clabel;
   2393 	int sparecol;
   2394 	int r,c;
   2395 	int i,j;
   2396 	int srow, scol;
   2397 
   2398 	srow = -1;
   2399 	scol = -1;
   2400 
   2401 	/* XXX should do extra checks to make sure things really are clean,
   2402 	   rather than blindly setting the clean bit... */
   2403 
   2404 	raidPtr->mod_counter++;
   2405 
   2406 	for (r = 0; r < raidPtr->numRow; r++) {
   2407 		for (c = 0; c < raidPtr->numCol; c++) {
   2408 			if (raidPtr->Disks[r][c].status == rf_ds_optimal) {
   2409 				raidread_component_label(
   2410 					raidPtr->Disks[r][c].dev,
   2411 					raidPtr->raid_cinfo[r][c].ci_vp,
   2412 					&clabel);
   2413 				/* make sure status is noted */
   2414 				clabel.status = rf_ds_optimal;
   2415 				/* bump the counter */
   2416 				clabel.mod_counter = raidPtr->mod_counter;
   2417 
   2418 				raidwrite_component_label(
   2419 					raidPtr->Disks[r][c].dev,
   2420 					raidPtr->raid_cinfo[r][c].ci_vp,
   2421 					&clabel);
   2422 				if (final == RF_FINAL_COMPONENT_UPDATE) {
   2423 					if (raidPtr->parity_good == RF_RAID_CLEAN) {
   2424 						raidmarkclean(
   2425 							      raidPtr->Disks[r][c].dev,
   2426 							      raidPtr->raid_cinfo[r][c].ci_vp,
   2427 							      raidPtr->mod_counter);
   2428 					}
   2429 				}
   2430 			}
   2431 			/* else we don't touch it.. */
   2432 		}
   2433 	}
   2434 
   2435 	for( c = 0; c < raidPtr->numSpare ; c++) {
   2436 		sparecol = raidPtr->numCol + c;
   2437 		/* Need to ensure that the reconstruct actually completed! */
   2438 		if (raidPtr->Disks[0][sparecol].status == rf_ds_used_spare) {
   2439 			/*
   2440 
   2441 			   we claim this disk is "optimal" if it's
   2442 			   rf_ds_used_spare, as that means it should be
   2443 			   directly substitutable for the disk it replaced.
   2444 			   We note that too...
   2445 
   2446 			 */
   2447 
   2448 			for(i=0;i<raidPtr->numRow;i++) {
   2449 				for(j=0;j<raidPtr->numCol;j++) {
   2450 					if ((raidPtr->Disks[i][j].spareRow ==
   2451 					     0) &&
   2452 					    (raidPtr->Disks[i][j].spareCol ==
   2453 					     sparecol)) {
   2454 						srow = i;
   2455 						scol = j;
   2456 						break;
   2457 					}
   2458 				}
   2459 			}
   2460 
   2461 			/* XXX shouldn't *really* need this... */
   2462 			raidread_component_label(
   2463 				      raidPtr->Disks[0][sparecol].dev,
   2464 				      raidPtr->raid_cinfo[0][sparecol].ci_vp,
   2465 				      &clabel);
   2466 			/* make sure status is noted */
   2467 
   2468 			raid_init_component_label(raidPtr, &clabel);
   2469 
   2470 			clabel.mod_counter = raidPtr->mod_counter;
   2471 			clabel.row = srow;
   2472 			clabel.column = scol;
   2473 			clabel.status = rf_ds_optimal;
   2474 
   2475 			raidwrite_component_label(
   2476 				      raidPtr->Disks[0][sparecol].dev,
   2477 				      raidPtr->raid_cinfo[0][sparecol].ci_vp,
   2478 				      &clabel);
   2479 			if (final == RF_FINAL_COMPONENT_UPDATE) {
   2480 				if (raidPtr->parity_good == RF_RAID_CLEAN) {
   2481 					raidmarkclean( raidPtr->Disks[0][sparecol].dev,
   2482 						       raidPtr->raid_cinfo[0][sparecol].ci_vp,
   2483 						       raidPtr->mod_counter);
   2484 				}
   2485 			}
   2486 		}
   2487 	}
   2488 	/* 	printf("Component labels updated\n"); */
   2489 }
   2490 
   2491 void
   2492 rf_close_component(raidPtr, vp, auto_configured)
   2493 	RF_Raid_t *raidPtr;
   2494 	struct vnode *vp;
   2495 	int auto_configured;
   2496 {
   2497 	struct proc *p;
   2498 
   2499 	p = raidPtr->engine_thread;
   2500 
   2501 	if (vp != NULL) {
   2502 		if (auto_configured == 1) {
   2503 			vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
   2504 			VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
   2505 			vput(vp);
   2506 
   2507 		} else {
   2508 			(void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
   2509 		}
   2510 	} else {
   2511 #if 0
   2512 		printf("vnode was NULL\n");
   2513 #endif
   2514 	}
   2515 }
   2516 
   2517 
   2518 void
   2519 rf_UnconfigureVnodes(raidPtr)
   2520 	RF_Raid_t *raidPtr;
   2521 {
   2522 	int r,c;
   2523 	struct proc *p;
   2524 	struct vnode *vp;
   2525 	int acd;
   2526 
   2527 
   2528 	/* We take this opportunity to close the vnodes like we should.. */
   2529 
   2530 	p = raidPtr->engine_thread;
   2531 
   2532 	for (r = 0; r < raidPtr->numRow; r++) {
   2533 		for (c = 0; c < raidPtr->numCol; c++) {
   2534 #if 0
   2535 			printf("raid%d: Closing vnode for row: %d col: %d\n",
   2536 			       raidPtr->raidid, r, c);
   2537 #endif
   2538 			vp = raidPtr->raid_cinfo[r][c].ci_vp;
   2539 			acd = raidPtr->Disks[r][c].auto_configured;
   2540 			rf_close_component(raidPtr, vp, acd);
   2541 			raidPtr->raid_cinfo[r][c].ci_vp = NULL;
   2542 			raidPtr->Disks[r][c].auto_configured = 0;
   2543 		}
   2544 	}
   2545 	for (r = 0; r < raidPtr->numSpare; r++) {
   2546 #if 0
   2547 		printf("raid%d: Closing vnode for spare: %d\n",
   2548 		       raidPtr->raidid, r);
   2549 #endif
   2550 		vp = raidPtr->raid_cinfo[0][raidPtr->numCol + r].ci_vp;
   2551 		acd = raidPtr->Disks[0][raidPtr->numCol + r].auto_configured;
   2552 		rf_close_component(raidPtr, vp, acd);
   2553 		raidPtr->raid_cinfo[0][raidPtr->numCol + r].ci_vp = NULL;
   2554 		raidPtr->Disks[0][raidPtr->numCol + r].auto_configured = 0;
   2555 	}
   2556 }
   2557 
   2558 
   2559 void
   2560 rf_ReconThread(req)
   2561 	struct rf_recon_req *req;
   2562 {
   2563 	int     s;
   2564 	RF_Raid_t *raidPtr;
   2565 
   2566 	s = splbio();
   2567 	raidPtr = (RF_Raid_t *) req->raidPtr;
   2568 	raidPtr->recon_in_progress = 1;
   2569 
   2570 	rf_FailDisk((RF_Raid_t *) req->raidPtr, req->row, req->col,
   2571 		    ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
   2572 
   2573 	/* XXX get rid of this! we don't need it at all.. */
   2574 	RF_Free(req, sizeof(*req));
   2575 
   2576 	raidPtr->recon_in_progress = 0;
   2577 	splx(s);
   2578 
   2579 	/* That's all... */
   2580 	kthread_exit(0);        /* does not return */
   2581 }
   2582 
   2583 void
   2584 rf_RewriteParityThread(raidPtr)
   2585 	RF_Raid_t *raidPtr;
   2586 {
   2587 	int retcode;
   2588 	int s;
   2589 
   2590 	raidPtr->parity_rewrite_in_progress = 1;
   2591 	s = splbio();
   2592 	retcode = rf_RewriteParity(raidPtr);
   2593 	splx(s);
   2594 	if (retcode) {
   2595 		printf("raid%d: Error re-writing parity!\n",raidPtr->raidid);
   2596 	} else {
   2597 		/* set the clean bit!  If we shutdown correctly,
   2598 		   the clean bit on each component label will get
   2599 		   set */
   2600 		raidPtr->parity_good = RF_RAID_CLEAN;
   2601 	}
   2602 	raidPtr->parity_rewrite_in_progress = 0;
   2603 
   2604 	/* Anyone waiting for us to stop?  If so, inform them... */
   2605 	if (raidPtr->waitShutdown) {
   2606 		wakeup(&raidPtr->parity_rewrite_in_progress);
   2607 	}
   2608 
   2609 	/* That's all... */
   2610 	kthread_exit(0);        /* does not return */
   2611 }
   2612 
   2613 
   2614 void
   2615 rf_CopybackThread(raidPtr)
   2616 	RF_Raid_t *raidPtr;
   2617 {
   2618 	int s;
   2619 
   2620 	raidPtr->copyback_in_progress = 1;
   2621 	s = splbio();
   2622 	rf_CopybackReconstructedData(raidPtr);
   2623 	splx(s);
   2624 	raidPtr->copyback_in_progress = 0;
   2625 
   2626 	/* That's all... */
   2627 	kthread_exit(0);        /* does not return */
   2628 }
   2629 
   2630 
   2631 void
   2632 rf_ReconstructInPlaceThread(req)
   2633 	struct rf_recon_req *req;
   2634 {
   2635 	int retcode;
   2636 	int s;
   2637 	RF_Raid_t *raidPtr;
   2638 
   2639 	s = splbio();
   2640 	raidPtr = req->raidPtr;
   2641 	raidPtr->recon_in_progress = 1;
   2642 	retcode = rf_ReconstructInPlace(raidPtr, req->row, req->col);
   2643 	RF_Free(req, sizeof(*req));
   2644 	raidPtr->recon_in_progress = 0;
   2645 	splx(s);
   2646 
   2647 	/* That's all... */
   2648 	kthread_exit(0);        /* does not return */
   2649 }
   2650 
   2651 RF_AutoConfig_t *
   2652 rf_find_raid_components()
   2653 {
   2654 	struct vnode *vp;
   2655 	struct disklabel label;
   2656 	struct device *dv;
   2657 	dev_t dev;
   2658 	int bmajor;
   2659 	int error;
   2660 	int i;
   2661 	int good_one;
   2662 	RF_ComponentLabel_t *clabel;
   2663 	RF_AutoConfig_t *ac_list;
   2664 	RF_AutoConfig_t *ac;
   2665 
   2666 
   2667 	/* initialize the AutoConfig list */
   2668 	ac_list = NULL;
   2669 
   2670 	/* we begin by trolling through *all* the devices on the system */
   2671 
   2672 	for (dv = alldevs.tqh_first; dv != NULL;
   2673 	     dv = dv->dv_list.tqe_next) {
   2674 
   2675 		/* we are only interested in disks... */
   2676 		if (dv->dv_class != DV_DISK)
   2677 			continue;
   2678 
   2679 		/* we don't care about floppies... */
   2680 		if (!strcmp(dv->dv_cfdata->cf_driver->cd_name,"fd")) {
   2681 			continue;
   2682 		}
   2683 
   2684 		/* we don't care about CD's... */
   2685 		if (!strcmp(dv->dv_cfdata->cf_driver->cd_name,"cd")) {
   2686 			continue;
   2687 		}
   2688 
   2689 		/* hdfd is the Atari/Hades floppy driver */
   2690 		if (!strcmp(dv->dv_cfdata->cf_driver->cd_name,"hdfd")) {
   2691 			continue;
   2692 		}
   2693 		/* fdisa is the Atari/Milan floppy driver */
   2694 		if (!strcmp(dv->dv_cfdata->cf_driver->cd_name,"fdisa")) {
   2695 			continue;
   2696 		}
   2697 
   2698 		/* need to find the device_name_to_block_device_major stuff */
   2699 		bmajor = devsw_name2blk(dv->dv_xname, NULL, 0);
   2700 
   2701 		/* get a vnode for the raw partition of this disk */
   2702 
   2703 		dev = MAKEDISKDEV(bmajor, dv->dv_unit, RAW_PART);
   2704 		if (bdevvp(dev, &vp))
   2705 			panic("RAID can't alloc vnode");
   2706 
   2707 		error = VOP_OPEN(vp, FREAD, NOCRED, 0);
   2708 
   2709 		if (error) {
   2710 			/* "Who cares."  Continue looking
   2711 			   for something that exists*/
   2712 			vput(vp);
   2713 			continue;
   2714 		}
   2715 
   2716 		/* Ok, the disk exists.  Go get the disklabel. */
   2717 		error = VOP_IOCTL(vp, DIOCGDINFO, (caddr_t)&label,
   2718 				  FREAD, NOCRED, 0);
   2719 		if (error) {
   2720 			/*
   2721 			 * XXX can't happen - open() would
   2722 			 * have errored out (or faked up one)
   2723 			 */
   2724 			printf("can't get label for dev %s%c (%d)!?!?\n",
   2725 			       dv->dv_xname, 'a' + RAW_PART, error);
   2726 		}
   2727 
   2728 		/* don't need this any more.  We'll allocate it again
   2729 		   a little later if we really do... */
   2730 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
   2731 		VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
   2732 		vput(vp);
   2733 
   2734 		for (i=0; i < label.d_npartitions; i++) {
   2735 			/* We only support partitions marked as RAID */
   2736 			if (label.d_partitions[i].p_fstype != FS_RAID)
   2737 				continue;
   2738 
   2739 			dev = MAKEDISKDEV(bmajor, dv->dv_unit, i);
   2740 			if (bdevvp(dev, &vp))
   2741 				panic("RAID can't alloc vnode");
   2742 
   2743 			error = VOP_OPEN(vp, FREAD, NOCRED, 0);
   2744 			if (error) {
   2745 				/* Whatever... */
   2746 				vput(vp);
   2747 				continue;
   2748 			}
   2749 
   2750 			good_one = 0;
   2751 
   2752 			clabel = (RF_ComponentLabel_t *)
   2753 				malloc(sizeof(RF_ComponentLabel_t),
   2754 				       M_RAIDFRAME, M_NOWAIT);
   2755 			if (clabel == NULL) {
   2756 				/* XXX CLEANUP HERE */
   2757 				printf("RAID auto config: out of memory!\n");
   2758 				return(NULL); /* XXX probably should panic? */
   2759 			}
   2760 
   2761 			if (!raidread_component_label(dev, vp, clabel)) {
   2762 				/* Got the label.  Does it look reasonable? */
   2763 				if (rf_reasonable_label(clabel) &&
   2764 				    (clabel->partitionSize <=
   2765 				     label.d_partitions[i].p_size)) {
   2766 #if DEBUG
   2767 					printf("Component on: %s%c: %d\n",
   2768 					       dv->dv_xname, 'a'+i,
   2769 					       label.d_partitions[i].p_size);
   2770 					rf_print_component_label(clabel);
   2771 #endif
   2772 					/* if it's reasonable, add it,
   2773 					   else ignore it. */
   2774 					ac = (RF_AutoConfig_t *)
   2775 						malloc(sizeof(RF_AutoConfig_t),
   2776 						       M_RAIDFRAME,
   2777 						       M_NOWAIT);
   2778 					if (ac == NULL) {
   2779 						/* XXX should panic?? */
   2780 						return(NULL);
   2781 					}
   2782 
   2783 					sprintf(ac->devname, "%s%c",
   2784 						dv->dv_xname, 'a'+i);
   2785 					ac->dev = dev;
   2786 					ac->vp = vp;
   2787 					ac->clabel = clabel;
   2788 					ac->next = ac_list;
   2789 					ac_list = ac;
   2790 					good_one = 1;
   2791 				}
   2792 			}
   2793 			if (!good_one) {
   2794 				/* cleanup */
   2795 				free(clabel, M_RAIDFRAME);
   2796 				vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
   2797 				VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
   2798 				vput(vp);
   2799 			}
   2800 		}
   2801 	}
   2802 	return(ac_list);
   2803 }
   2804 
   2805 static int
   2806 rf_reasonable_label(clabel)
   2807 	RF_ComponentLabel_t *clabel;
   2808 {
   2809 
   2810 	if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) ||
   2811 	     (clabel->version==RF_COMPONENT_LABEL_VERSION)) &&
   2812 	    ((clabel->clean == RF_RAID_CLEAN) ||
   2813 	     (clabel->clean == RF_RAID_DIRTY)) &&
   2814 	    clabel->row >=0 &&
   2815 	    clabel->column >= 0 &&
   2816 	    clabel->num_rows > 0 &&
   2817 	    clabel->num_columns > 0 &&
   2818 	    clabel->row < clabel->num_rows &&
   2819 	    clabel->column < clabel->num_columns &&
   2820 	    clabel->blockSize > 0 &&
   2821 	    clabel->numBlocks > 0) {
   2822 		/* label looks reasonable enough... */
   2823 		return(1);
   2824 	}
   2825 	return(0);
   2826 }
   2827 
   2828 
   2829 #if 0
   2830 void
   2831 rf_print_component_label(clabel)
   2832 	RF_ComponentLabel_t *clabel;
   2833 {
   2834 	printf("   Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
   2835 	       clabel->row, clabel->column,
   2836 	       clabel->num_rows, clabel->num_columns);
   2837 	printf("   Version: %d Serial Number: %d Mod Counter: %d\n",
   2838 	       clabel->version, clabel->serial_number,
   2839 	       clabel->mod_counter);
   2840 	printf("   Clean: %s Status: %d\n",
   2841 	       clabel->clean ? "Yes" : "No", clabel->status );
   2842 	printf("   sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
   2843 	       clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
   2844 	printf("   RAID Level: %c  blocksize: %d numBlocks: %d\n",
   2845 	       (char) clabel->parityConfig, clabel->blockSize,
   2846 	       clabel->numBlocks);
   2847 	printf("   Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No" );
   2848 	printf("   Contains root partition: %s\n",
   2849 	       clabel->root_partition ? "Yes" : "No" );
   2850 	printf("   Last configured as: raid%d\n", clabel->last_unit );
   2851 #if 0
   2852 	   printf("   Config order: %d\n", clabel->config_order);
   2853 #endif
   2854 
   2855 }
   2856 #endif
   2857 
   2858 RF_ConfigSet_t *
   2859 rf_create_auto_sets(ac_list)
   2860 	RF_AutoConfig_t *ac_list;
   2861 {
   2862 	RF_AutoConfig_t *ac;
   2863 	RF_ConfigSet_t *config_sets;
   2864 	RF_ConfigSet_t *cset;
   2865 	RF_AutoConfig_t *ac_next;
   2866 
   2867 
   2868 	config_sets = NULL;
   2869 
   2870 	/* Go through the AutoConfig list, and figure out which components
   2871 	   belong to what sets.  */
   2872 	ac = ac_list;
   2873 	while(ac!=NULL) {
   2874 		/* we're going to putz with ac->next, so save it here
   2875 		   for use at the end of the loop */
   2876 		ac_next = ac->next;
   2877 
   2878 		if (config_sets == NULL) {
   2879 			/* will need at least this one... */
   2880 			config_sets = (RF_ConfigSet_t *)
   2881 				malloc(sizeof(RF_ConfigSet_t),
   2882 				       M_RAIDFRAME, M_NOWAIT);
   2883 			if (config_sets == NULL) {
   2884 				panic("rf_create_auto_sets: No memory!\n");
   2885 			}
   2886 			/* this one is easy :) */
   2887 			config_sets->ac = ac;
   2888 			config_sets->next = NULL;
   2889 			config_sets->rootable = 0;
   2890 			ac->next = NULL;
   2891 		} else {
   2892 			/* which set does this component fit into? */
   2893 			cset = config_sets;
   2894 			while(cset!=NULL) {
   2895 				if (rf_does_it_fit(cset, ac)) {
   2896 					/* looks like it matches... */
   2897 					ac->next = cset->ac;
   2898 					cset->ac = ac;
   2899 					break;
   2900 				}
   2901 				cset = cset->next;
   2902 			}
   2903 			if (cset==NULL) {
   2904 				/* didn't find a match above... new set..*/
   2905 				cset = (RF_ConfigSet_t *)
   2906 					malloc(sizeof(RF_ConfigSet_t),
   2907 					       M_RAIDFRAME, M_NOWAIT);
   2908 				if (cset == NULL) {
   2909 					panic("rf_create_auto_sets: No memory!\n");
   2910 				}
   2911 				cset->ac = ac;
   2912 				ac->next = NULL;
   2913 				cset->next = config_sets;
   2914 				cset->rootable = 0;
   2915 				config_sets = cset;
   2916 			}
   2917 		}
   2918 		ac = ac_next;
   2919 	}
   2920 
   2921 
   2922 	return(config_sets);
   2923 }
   2924 
   2925 static int
   2926 rf_does_it_fit(cset, ac)
   2927 	RF_ConfigSet_t *cset;
   2928 	RF_AutoConfig_t *ac;
   2929 {
   2930 	RF_ComponentLabel_t *clabel1, *clabel2;
   2931 
   2932 	/* If this one matches the *first* one in the set, that's good
   2933 	   enough, since the other members of the set would have been
   2934 	   through here too... */
   2935 	/* note that we are not checking partitionSize here..
   2936 
   2937 	   Note that we are also not checking the mod_counters here.
   2938 	   If everything else matches execpt the mod_counter, that's
   2939 	   good enough for this test.  We will deal with the mod_counters
   2940 	   a little later in the autoconfiguration process.
   2941 
   2942 	    (clabel1->mod_counter == clabel2->mod_counter) &&
   2943 
   2944 	   The reason we don't check for this is that failed disks
   2945 	   will have lower modification counts.  If those disks are
   2946 	   not added to the set they used to belong to, then they will
   2947 	   form their own set, which may result in 2 different sets,
   2948 	   for example, competing to be configured at raid0, and
   2949 	   perhaps competing to be the root filesystem set.  If the
   2950 	   wrong ones get configured, or both attempt to become /,
   2951 	   weird behaviour and or serious lossage will occur.  Thus we
   2952 	   need to bring them into the fold here, and kick them out at
   2953 	   a later point.
   2954 
   2955 	*/
   2956 
   2957 	clabel1 = cset->ac->clabel;
   2958 	clabel2 = ac->clabel;
   2959 	if ((clabel1->version == clabel2->version) &&
   2960 	    (clabel1->serial_number == clabel2->serial_number) &&
   2961 	    (clabel1->num_rows == clabel2->num_rows) &&
   2962 	    (clabel1->num_columns == clabel2->num_columns) &&
   2963 	    (clabel1->sectPerSU == clabel2->sectPerSU) &&
   2964 	    (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
   2965 	    (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
   2966 	    (clabel1->parityConfig == clabel2->parityConfig) &&
   2967 	    (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
   2968 	    (clabel1->blockSize == clabel2->blockSize) &&
   2969 	    (clabel1->numBlocks == clabel2->numBlocks) &&
   2970 	    (clabel1->autoconfigure == clabel2->autoconfigure) &&
   2971 	    (clabel1->root_partition == clabel2->root_partition) &&
   2972 	    (clabel1->last_unit == clabel2->last_unit) &&
   2973 	    (clabel1->config_order == clabel2->config_order)) {
   2974 		/* if it get's here, it almost *has* to be a match */
   2975 	} else {
   2976 		/* it's not consistent with somebody in the set..
   2977 		   punt */
   2978 		return(0);
   2979 	}
   2980 	/* all was fine.. it must fit... */
   2981 	return(1);
   2982 }
   2983 
   2984 int
   2985 rf_have_enough_components(cset)
   2986 	RF_ConfigSet_t *cset;
   2987 {
   2988 	RF_AutoConfig_t *ac;
   2989 	RF_AutoConfig_t *auto_config;
   2990 	RF_ComponentLabel_t *clabel;
   2991 	int r,c;
   2992 	int num_rows;
   2993 	int num_cols;
   2994 	int num_missing;
   2995 	int mod_counter;
   2996 	int mod_counter_found;
   2997 	int even_pair_failed;
   2998 	char parity_type;
   2999 
   3000 
   3001 	/* check to see that we have enough 'live' components
   3002 	   of this set.  If so, we can configure it if necessary */
   3003 
   3004 	num_rows = cset->ac->clabel->num_rows;
   3005 	num_cols = cset->ac->clabel->num_columns;
   3006 	parity_type = cset->ac->clabel->parityConfig;
   3007 
   3008 	/* XXX Check for duplicate components!?!?!? */
   3009 
   3010 	/* Determine what the mod_counter is supposed to be for this set. */
   3011 
   3012 	mod_counter_found = 0;
   3013 	mod_counter = 0;
   3014 	ac = cset->ac;
   3015 	while(ac!=NULL) {
   3016 		if (mod_counter_found==0) {
   3017 			mod_counter = ac->clabel->mod_counter;
   3018 			mod_counter_found = 1;
   3019 		} else {
   3020 			if (ac->clabel->mod_counter > mod_counter) {
   3021 				mod_counter = ac->clabel->mod_counter;
   3022 			}
   3023 		}
   3024 		ac = ac->next;
   3025 	}
   3026 
   3027 	num_missing = 0;
   3028 	auto_config = cset->ac;
   3029 
   3030 	for(r=0; r<num_rows; r++) {
   3031 		even_pair_failed = 0;
   3032 		for(c=0; c<num_cols; c++) {
   3033 			ac = auto_config;
   3034 			while(ac!=NULL) {
   3035 				if ((ac->clabel->row == r) &&
   3036 				    (ac->clabel->column == c) &&
   3037 				    (ac->clabel->mod_counter == mod_counter)) {
   3038 					/* it's this one... */
   3039 #if DEBUG
   3040 					printf("Found: %s at %d,%d\n",
   3041 					       ac->devname,r,c);
   3042 #endif
   3043 					break;
   3044 				}
   3045 				ac=ac->next;
   3046 			}
   3047 			if (ac==NULL) {
   3048 				/* Didn't find one here! */
   3049 				/* special case for RAID 1, especially
   3050 				   where there are more than 2
   3051 				   components (where RAIDframe treats
   3052 				   things a little differently :( ) */
   3053 				if (parity_type == '1') {
   3054 					if (c%2 == 0) { /* even component */
   3055 						even_pair_failed = 1;
   3056 					} else { /* odd component.  If
   3057                                                     we're failed, and
   3058                                                     so is the even
   3059                                                     component, it's
   3060                                                     "Good Night, Charlie" */
   3061 						if (even_pair_failed == 1) {
   3062 							return(0);
   3063 						}
   3064 					}
   3065 				} else {
   3066 					/* normal accounting */
   3067 					num_missing++;
   3068 				}
   3069 			}
   3070 			if ((parity_type == '1') && (c%2 == 1)) {
   3071 				/* Just did an even component, and we didn't
   3072 				   bail.. reset the even_pair_failed flag,
   3073 				   and go on to the next component.... */
   3074 				even_pair_failed = 0;
   3075 			}
   3076 		}
   3077 	}
   3078 
   3079 	clabel = cset->ac->clabel;
   3080 
   3081 	if (((clabel->parityConfig == '0') && (num_missing > 0)) ||
   3082 	    ((clabel->parityConfig == '4') && (num_missing > 1)) ||
   3083 	    ((clabel->parityConfig == '5') && (num_missing > 1))) {
   3084 		/* XXX this needs to be made *much* more general */
   3085 		/* Too many failures */
   3086 		return(0);
   3087 	}
   3088 	/* otherwise, all is well, and we've got enough to take a kick
   3089 	   at autoconfiguring this set */
   3090 	return(1);
   3091 }
   3092 
   3093 void
   3094 rf_create_configuration(ac,config,raidPtr)
   3095 	RF_AutoConfig_t *ac;
   3096 	RF_Config_t *config;
   3097 	RF_Raid_t *raidPtr;
   3098 {
   3099 	RF_ComponentLabel_t *clabel;
   3100 	int i;
   3101 
   3102 	clabel = ac->clabel;
   3103 
   3104 	/* 1. Fill in the common stuff */
   3105 	config->numRow = clabel->num_rows;
   3106 	config->numCol = clabel->num_columns;
   3107 	config->numSpare = 0; /* XXX should this be set here? */
   3108 	config->sectPerSU = clabel->sectPerSU;
   3109 	config->SUsPerPU = clabel->SUsPerPU;
   3110 	config->SUsPerRU = clabel->SUsPerRU;
   3111 	config->parityConfig = clabel->parityConfig;
   3112 	/* XXX... */
   3113 	strcpy(config->diskQueueType,"fifo");
   3114 	config->maxOutstandingDiskReqs = clabel->maxOutstanding;
   3115 	config->layoutSpecificSize = 0; /* XXX ?? */
   3116 
   3117 	while(ac!=NULL) {
   3118 		/* row/col values will be in range due to the checks
   3119 		   in reasonable_label() */
   3120 		strcpy(config->devnames[ac->clabel->row][ac->clabel->column],
   3121 		       ac->devname);
   3122 		ac = ac->next;
   3123 	}
   3124 
   3125 	for(i=0;i<RF_MAXDBGV;i++) {
   3126 		config->debugVars[i][0] = NULL;
   3127 	}
   3128 }
   3129 
   3130 int
   3131 rf_set_autoconfig(raidPtr, new_value)
   3132 	RF_Raid_t *raidPtr;
   3133 	int new_value;
   3134 {
   3135 	RF_ComponentLabel_t clabel;
   3136 	struct vnode *vp;
   3137 	dev_t dev;
   3138 	int row, column;
   3139 
   3140 	raidPtr->autoconfigure = new_value;
   3141 	for(row=0; row<raidPtr->numRow; row++) {
   3142 		for(column=0; column<raidPtr->numCol; column++) {
   3143 			if (raidPtr->Disks[row][column].status ==
   3144 			    rf_ds_optimal) {
   3145 				dev = raidPtr->Disks[row][column].dev;
   3146 				vp = raidPtr->raid_cinfo[row][column].ci_vp;
   3147 				raidread_component_label(dev, vp, &clabel);
   3148 				clabel.autoconfigure = new_value;
   3149 				raidwrite_component_label(dev, vp, &clabel);
   3150 			}
   3151 		}
   3152 	}
   3153 	return(new_value);
   3154 }
   3155 
   3156 int
   3157 rf_set_rootpartition(raidPtr, new_value)
   3158 	RF_Raid_t *raidPtr;
   3159 	int new_value;
   3160 {
   3161 	RF_ComponentLabel_t clabel;
   3162 	struct vnode *vp;
   3163 	dev_t dev;
   3164 	int row, column;
   3165 
   3166 	raidPtr->root_partition = new_value;
   3167 	for(row=0; row<raidPtr->numRow; row++) {
   3168 		for(column=0; column<raidPtr->numCol; column++) {
   3169 			if (raidPtr->Disks[row][column].status ==
   3170 			    rf_ds_optimal) {
   3171 				dev = raidPtr->Disks[row][column].dev;
   3172 				vp = raidPtr->raid_cinfo[row][column].ci_vp;
   3173 				raidread_component_label(dev, vp, &clabel);
   3174 				clabel.root_partition = new_value;
   3175 				raidwrite_component_label(dev, vp, &clabel);
   3176 			}
   3177 		}
   3178 	}
   3179 	return(new_value);
   3180 }
   3181 
   3182 void
   3183 rf_release_all_vps(cset)
   3184 	RF_ConfigSet_t *cset;
   3185 {
   3186 	RF_AutoConfig_t *ac;
   3187 
   3188 	ac = cset->ac;
   3189 	while(ac!=NULL) {
   3190 		/* Close the vp, and give it back */
   3191 		if (ac->vp) {
   3192 			vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY);
   3193 			VOP_CLOSE(ac->vp, FREAD, NOCRED, 0);
   3194 			vput(ac->vp);
   3195 			ac->vp = NULL;
   3196 		}
   3197 		ac = ac->next;
   3198 	}
   3199 }
   3200 
   3201 
   3202 void
   3203 rf_cleanup_config_set(cset)
   3204 	RF_ConfigSet_t *cset;
   3205 {
   3206 	RF_AutoConfig_t *ac;
   3207 	RF_AutoConfig_t *next_ac;
   3208 
   3209 	ac = cset->ac;
   3210 	while(ac!=NULL) {
   3211 		next_ac = ac->next;
   3212 		/* nuke the label */
   3213 		free(ac->clabel, M_RAIDFRAME);
   3214 		/* cleanup the config structure */
   3215 		free(ac, M_RAIDFRAME);
   3216 		/* "next.." */
   3217 		ac = next_ac;
   3218 	}
   3219 	/* and, finally, nuke the config set */
   3220 	free(cset, M_RAIDFRAME);
   3221 }
   3222 
   3223 
   3224 void
   3225 raid_init_component_label(raidPtr, clabel)
   3226 	RF_Raid_t *raidPtr;
   3227 	RF_ComponentLabel_t *clabel;
   3228 {
   3229 	/* current version number */
   3230 	clabel->version = RF_COMPONENT_LABEL_VERSION;
   3231 	clabel->serial_number = raidPtr->serial_number;
   3232 	clabel->mod_counter = raidPtr->mod_counter;
   3233 	clabel->num_rows = raidPtr->numRow;
   3234 	clabel->num_columns = raidPtr->numCol;
   3235 	clabel->clean = RF_RAID_DIRTY; /* not clean */
   3236 	clabel->status = rf_ds_optimal; /* "It's good!" */
   3237 
   3238 	clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
   3239 	clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
   3240 	clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
   3241 
   3242 	clabel->blockSize = raidPtr->bytesPerSector;
   3243 	clabel->numBlocks = raidPtr->sectorsPerDisk;
   3244 
   3245 	/* XXX not portable */
   3246 	clabel->parityConfig = raidPtr->Layout.map->parityConfig;
   3247 	clabel->maxOutstanding = raidPtr->maxOutstanding;
   3248 	clabel->autoconfigure = raidPtr->autoconfigure;
   3249 	clabel->root_partition = raidPtr->root_partition;
   3250 	clabel->last_unit = raidPtr->raidid;
   3251 	clabel->config_order = raidPtr->config_order;
   3252 }
   3253 
   3254 int
   3255 rf_auto_config_set(cset,unit)
   3256 	RF_ConfigSet_t *cset;
   3257 	int *unit;
   3258 {
   3259 	RF_Raid_t *raidPtr;
   3260 	RF_Config_t *config;
   3261 	int raidID;
   3262 	int retcode;
   3263 
   3264 #if DEBUG
   3265 	printf("RAID autoconfigure\n");
   3266 #endif
   3267 
   3268 	retcode = 0;
   3269 	*unit = -1;
   3270 
   3271 	/* 1. Create a config structure */
   3272 
   3273 	config = (RF_Config_t *)malloc(sizeof(RF_Config_t),
   3274 				       M_RAIDFRAME,
   3275 				       M_NOWAIT);
   3276 	if (config==NULL) {
   3277 		printf("Out of mem!?!?\n");
   3278 				/* XXX do something more intelligent here. */
   3279 		return(1);
   3280 	}
   3281 
   3282 	memset(config, 0, sizeof(RF_Config_t));
   3283 
   3284 	/*
   3285 	   2. Figure out what RAID ID this one is supposed to live at
   3286 	   See if we can get the same RAID dev that it was configured
   3287 	   on last time..
   3288 	*/
   3289 
   3290 	raidID = cset->ac->clabel->last_unit;
   3291 	if ((raidID < 0) || (raidID >= numraid)) {
   3292 		/* let's not wander off into lala land. */
   3293 		raidID = numraid - 1;
   3294 	}
   3295 	if (raidPtrs[raidID]->valid != 0) {
   3296 
   3297 		/*
   3298 		   Nope... Go looking for an alternative...
   3299 		   Start high so we don't immediately use raid0 if that's
   3300 		   not taken.
   3301 		*/
   3302 
   3303 		for(raidID = numraid - 1; raidID >= 0; raidID--) {
   3304 			if (raidPtrs[raidID]->valid == 0) {
   3305 				/* can use this one! */
   3306 				break;
   3307 			}
   3308 		}
   3309 	}
   3310 
   3311 	if (raidID < 0) {
   3312 		/* punt... */
   3313 		printf("Unable to auto configure this set!\n");
   3314 		printf("(Out of RAID devs!)\n");
   3315 		return(1);
   3316 	}
   3317 
   3318 #if DEBUG
   3319 	printf("Configuring raid%d:\n",raidID);
   3320 #endif
   3321 
   3322 	raidPtr = raidPtrs[raidID];
   3323 
   3324 	/* XXX all this stuff should be done SOMEWHERE ELSE! */
   3325 	raidPtr->raidid = raidID;
   3326 	raidPtr->openings = RAIDOUTSTANDING;
   3327 
   3328 	/* 3. Build the configuration structure */
   3329 	rf_create_configuration(cset->ac, config, raidPtr);
   3330 
   3331 	/* 4. Do the configuration */
   3332 	retcode = rf_Configure(raidPtr, config, cset->ac);
   3333 
   3334 	if (retcode == 0) {
   3335 
   3336 		raidinit(raidPtrs[raidID]);
   3337 
   3338 		rf_markalldirty(raidPtrs[raidID]);
   3339 		raidPtrs[raidID]->autoconfigure = 1; /* XXX do this here? */
   3340 		if (cset->ac->clabel->root_partition==1) {
   3341 			/* everything configured just fine.  Make a note
   3342 			   that this set is eligible to be root. */
   3343 			cset->rootable = 1;
   3344 			/* XXX do this here? */
   3345 			raidPtrs[raidID]->root_partition = 1;
   3346 		}
   3347 	}
   3348 
   3349 	/* 5. Cleanup */
   3350 	free(config, M_RAIDFRAME);
   3351 
   3352 	*unit = raidID;
   3353 	return(retcode);
   3354 }
   3355 
   3356 void
   3357 rf_disk_unbusy(desc)
   3358 	RF_RaidAccessDesc_t *desc;
   3359 {
   3360 	struct buf *bp;
   3361 
   3362 	bp = (struct buf *)desc->bp;
   3363 	disk_unbusy(&raid_softc[desc->raidPtr->raidid].sc_dkdev,
   3364 			    (bp->b_bcount - bp->b_resid));
   3365 }
   3366