Home | History | Annotate | Line # | Download | only in raidframe
rf_netbsdkintf.c revision 1.117.6.3
      1 /*	$NetBSD: rf_netbsdkintf.c,v 1.117.6.3 2002/07/15 10:35:50 gehenna Exp $	*/
      2 /*-
      3  * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
      4  * All rights reserved.
      5  *
      6  * This code is derived from software contributed to The NetBSD Foundation
      7  * by Greg Oster; Jason R. Thorpe.
      8  *
      9  * Redistribution and use in source and binary forms, with or without
     10  * modification, are permitted provided that the following conditions
     11  * are met:
     12  * 1. Redistributions of source code must retain the above copyright
     13  *    notice, this list of conditions and the following disclaimer.
     14  * 2. Redistributions in binary form must reproduce the above copyright
     15  *    notice, this list of conditions and the following disclaimer in the
     16  *    documentation and/or other materials provided with the distribution.
     17  * 3. All advertising materials mentioning features or use of this software
     18  *    must display the following acknowledgement:
     19  *        This product includes software developed by the NetBSD
     20  *        Foundation, Inc. and its contributors.
     21  * 4. Neither the name of The NetBSD Foundation nor the names of its
     22  *    contributors may be used to endorse or promote products derived
     23  *    from this software without specific prior written permission.
     24  *
     25  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     26  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     27  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     28  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     29  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     30  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     31  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     32  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     33  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     34  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     35  * POSSIBILITY OF SUCH DAMAGE.
     36  */
     37 
     38 /*
     39  * Copyright (c) 1988 University of Utah.
     40  * Copyright (c) 1990, 1993
     41  *      The Regents of the University of California.  All rights reserved.
     42  *
     43  * This code is derived from software contributed to Berkeley by
     44  * the Systems Programming Group of the University of Utah Computer
     45  * Science Department.
     46  *
     47  * Redistribution and use in source and binary forms, with or without
     48  * modification, are permitted provided that the following conditions
     49  * are met:
     50  * 1. Redistributions of source code must retain the above copyright
     51  *    notice, this list of conditions and the following disclaimer.
     52  * 2. Redistributions in binary form must reproduce the above copyright
     53  *    notice, this list of conditions and the following disclaimer in the
     54  *    documentation and/or other materials provided with the distribution.
     55  * 3. All advertising materials mentioning features or use of this software
     56  *    must display the following acknowledgement:
     57  *      This product includes software developed by the University of
     58  *      California, Berkeley and its contributors.
     59  * 4. Neither the name of the University nor the names of its contributors
     60  *    may be used to endorse or promote products derived from this software
     61  *    without specific prior written permission.
     62  *
     63  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     64  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     65  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     66  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     67  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     68  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     69  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     70  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     71  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     72  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     73  * SUCH DAMAGE.
     74  *
     75  * from: Utah $Hdr: cd.c 1.6 90/11/28$
     76  *
     77  *      @(#)cd.c        8.2 (Berkeley) 11/16/93
     78  */
     79 
     80 
     81 
     82 
     83 /*
     84  * Copyright (c) 1995 Carnegie-Mellon University.
     85  * All rights reserved.
     86  *
     87  * Authors: Mark Holland, Jim Zelenka
     88  *
     89  * Permission to use, copy, modify and distribute this software and
     90  * its documentation is hereby granted, provided that both the copyright
     91  * notice and this permission notice appear in all copies of the
     92  * software, derivative works or modified versions, and any portions
     93  * thereof, and that both notices appear in supporting documentation.
     94  *
     95  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
     96  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
     97  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
     98  *
     99  * Carnegie Mellon requests users of this software to return to
    100  *
    101  *  Software Distribution Coordinator  or  Software.Distribution (at) CS.CMU.EDU
    102  *  School of Computer Science
    103  *  Carnegie Mellon University
    104  *  Pittsburgh PA 15213-3890
    105  *
    106  * any improvements or extensions that they make and grant Carnegie the
    107  * rights to redistribute these changes.
    108  */
    109 
    110 /***********************************************************
    111  *
    112  * rf_kintf.c -- the kernel interface routines for RAIDframe
    113  *
    114  ***********************************************************/
    115 
    116 #include <sys/cdefs.h>
    117 __KERNEL_RCSID(0, "$NetBSD: rf_netbsdkintf.c,v 1.117.6.3 2002/07/15 10:35:50 gehenna Exp $");
    118 
    119 #include <sys/param.h>
    120 #include <sys/errno.h>
    121 #include <sys/pool.h>
    122 #include <sys/queue.h>
    123 #include <sys/disk.h>
    124 #include <sys/device.h>
    125 #include <sys/stat.h>
    126 #include <sys/ioctl.h>
    127 #include <sys/fcntl.h>
    128 #include <sys/systm.h>
    129 #include <sys/namei.h>
    130 #include <sys/vnode.h>
    131 #include <sys/disklabel.h>
    132 #include <sys/conf.h>
    133 #include <sys/lock.h>
    134 #include <sys/buf.h>
    135 #include <sys/user.h>
    136 #include <sys/reboot.h>
    137 
    138 #include <dev/raidframe/raidframevar.h>
    139 #include <dev/raidframe/raidframeio.h>
    140 #include "raid.h"
    141 #include "opt_raid_autoconfig.h"
    142 #include "rf_raid.h"
    143 #include "rf_copyback.h"
    144 #include "rf_dag.h"
    145 #include "rf_dagflags.h"
    146 #include "rf_desc.h"
    147 #include "rf_diskqueue.h"
    148 #include "rf_acctrace.h"
    149 #include "rf_etimer.h"
    150 #include "rf_general.h"
    151 #include "rf_debugMem.h"
    152 #include "rf_kintf.h"
    153 #include "rf_options.h"
    154 #include "rf_driver.h"
    155 #include "rf_parityscan.h"
    156 #include "rf_debugprint.h"
    157 #include "rf_threadstuff.h"
    158 
    159 int     rf_kdebug_level = 0;
    160 
    161 #ifdef DEBUG
    162 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
    163 #else				/* DEBUG */
    164 #define db1_printf(a) { }
    165 #endif				/* DEBUG */
    166 
    167 static RF_Raid_t **raidPtrs;	/* global raid device descriptors */
    168 
    169 RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex)
    170 
    171 static RF_SparetWait_t *rf_sparet_wait_queue;	/* requests to install a
    172 						 * spare table */
    173 static RF_SparetWait_t *rf_sparet_resp_queue;	/* responses from
    174 						 * installation process */
    175 
    176 /* prototypes */
    177 static void KernelWakeupFunc(struct buf * bp);
    178 static void InitBP(struct buf * bp, struct vnode *, unsigned rw_flag,
    179 		   dev_t dev, RF_SectorNum_t startSect,
    180 		   RF_SectorCount_t numSect, caddr_t buf,
    181 		   void (*cbFunc) (struct buf *), void *cbArg,
    182 		   int logBytesPerSector, struct proc * b_proc);
    183 static void raidinit(RF_Raid_t *);
    184 
    185 void raidattach(int);
    186 
    187 dev_type_open(raidopen);
    188 dev_type_close(raidclose);
    189 dev_type_read(raidread);
    190 dev_type_write(raidwrite);
    191 dev_type_ioctl(raidioctl);
    192 dev_type_strategy(raidstrategy);
    193 dev_type_dump(raiddump);
    194 dev_type_size(raidsize);
    195 
    196 const struct bdevsw raid_bdevsw = {
    197 	raidopen, raidclose, raidstrategy, raidioctl,
    198 	raiddump, raidsize, D_DISK
    199 };
    200 
    201 const struct cdevsw raid_cdevsw = {
    202 	raidopen, raidclose, raidread, raidwrite, raidioctl,
    203 	nostop, notty, nopoll, nommap, D_DISK
    204 };
    205 
    206 /*
    207  * Pilfered from ccd.c
    208  */
    209 
    210 struct raidbuf {
    211 	struct buf rf_buf;	/* new I/O buf.  MUST BE FIRST!!! */
    212 	struct buf *rf_obp;	/* ptr. to original I/O buf */
    213 	int     rf_flags;	/* misc. flags */
    214 	RF_DiskQueueData_t *req;/* the request that this was part of.. */
    215 };
    216 
    217 /* component buffer pool */
    218 struct pool raidframe_cbufpool;
    219 
    220 #define RAIDGETBUF(rs) pool_get(&raidframe_cbufpool, PR_NOWAIT)
    221 #define	RAIDPUTBUF(rs, cbp) pool_put(&raidframe_cbufpool, cbp)
    222 
    223 /* XXX Not sure if the following should be replacing the raidPtrs above,
    224    or if it should be used in conjunction with that...
    225 */
    226 
    227 struct raid_softc {
    228 	int     sc_flags;	/* flags */
    229 	int     sc_cflags;	/* configuration flags */
    230 	size_t  sc_size;        /* size of the raid device */
    231 	char    sc_xname[20];	/* XXX external name */
    232 	struct disk sc_dkdev;	/* generic disk device info */
    233 	struct buf_queue buf_queue;	/* used for the device queue */
    234 };
    235 /* sc_flags */
    236 #define RAIDF_INITED	0x01	/* unit has been initialized */
    237 #define RAIDF_WLABEL	0x02	/* label area is writable */
    238 #define RAIDF_LABELLING	0x04	/* unit is currently being labelled */
    239 #define RAIDF_WANTED	0x40	/* someone is waiting to obtain a lock */
    240 #define RAIDF_LOCKED	0x80	/* unit is locked */
    241 
    242 #define	raidunit(x)	DISKUNIT(x)
    243 int numraid = 0;
    244 
    245 /*
    246  * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
    247  * Be aware that large numbers can allow the driver to consume a lot of
    248  * kernel memory, especially on writes, and in degraded mode reads.
    249  *
    250  * For example: with a stripe width of 64 blocks (32k) and 5 disks,
    251  * a single 64K write will typically require 64K for the old data,
    252  * 64K for the old parity, and 64K for the new parity, for a total
    253  * of 192K (if the parity buffer is not re-used immediately).
    254  * Even it if is used immediately, that's still 128K, which when multiplied
    255  * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
    256  *
    257  * Now in degraded mode, for example, a 64K read on the above setup may
    258  * require data reconstruction, which will require *all* of the 4 remaining
    259  * disks to participate -- 4 * 32K/disk == 128K again.
    260  */
    261 
    262 #ifndef RAIDOUTSTANDING
    263 #define RAIDOUTSTANDING   6
    264 #endif
    265 
    266 #define RAIDLABELDEV(dev)	\
    267 	(MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
    268 
    269 /* declared here, and made public, for the benefit of KVM stuff.. */
    270 struct raid_softc *raid_softc;
    271 
    272 static void raidgetdefaultlabel(RF_Raid_t *, struct raid_softc *,
    273 				     struct disklabel *);
    274 static void raidgetdisklabel(dev_t);
    275 static void raidmakedisklabel(struct raid_softc *);
    276 
    277 static int raidlock(struct raid_softc *);
    278 static void raidunlock(struct raid_softc *);
    279 
    280 static void rf_markalldirty(RF_Raid_t *);
    281 void rf_mountroot_hook(struct device *);
    282 
    283 struct device *raidrootdev;
    284 
    285 void rf_ReconThread(struct rf_recon_req *);
    286 /* XXX what I want is: */
    287 /*void rf_ReconThread(RF_Raid_t *raidPtr);  */
    288 void rf_RewriteParityThread(RF_Raid_t *raidPtr);
    289 void rf_CopybackThread(RF_Raid_t *raidPtr);
    290 void rf_ReconstructInPlaceThread(struct rf_recon_req *);
    291 void rf_buildroothack(void *);
    292 
    293 RF_AutoConfig_t *rf_find_raid_components(void);
    294 RF_ConfigSet_t *rf_create_auto_sets(RF_AutoConfig_t *);
    295 static int rf_does_it_fit(RF_ConfigSet_t *,RF_AutoConfig_t *);
    296 static int rf_reasonable_label(RF_ComponentLabel_t *);
    297 void rf_create_configuration(RF_AutoConfig_t *,RF_Config_t *, RF_Raid_t *);
    298 int rf_set_autoconfig(RF_Raid_t *, int);
    299 int rf_set_rootpartition(RF_Raid_t *, int);
    300 void rf_release_all_vps(RF_ConfigSet_t *);
    301 void rf_cleanup_config_set(RF_ConfigSet_t *);
    302 int rf_have_enough_components(RF_ConfigSet_t *);
    303 int rf_auto_config_set(RF_ConfigSet_t *, int *);
    304 
    305 static int raidautoconfig = 0; /* Debugging, mostly.  Set to 0 to not
    306 				  allow autoconfig to take place.
    307 			          Note that this is overridden by having
    308 			          RAID_AUTOCONFIG as an option in the
    309 			          kernel config file.  */
    310 
    311 void
    312 raidattach(num)
    313 	int     num;
    314 {
    315 	int raidID;
    316 	int i, rc;
    317 	RF_AutoConfig_t *ac_list; /* autoconfig list */
    318 	RF_ConfigSet_t *config_sets;
    319 
    320 #ifdef DEBUG
    321 	printf("raidattach: Asked for %d units\n", num);
    322 #endif
    323 
    324 	if (num <= 0) {
    325 #ifdef DIAGNOSTIC
    326 		panic("raidattach: count <= 0");
    327 #endif
    328 		return;
    329 	}
    330 	/* This is where all the initialization stuff gets done. */
    331 
    332 	numraid = num;
    333 
    334 	/* Make some space for requested number of units... */
    335 
    336 	RF_Calloc(raidPtrs, num, sizeof(RF_Raid_t *), (RF_Raid_t **));
    337 	if (raidPtrs == NULL) {
    338 		panic("raidPtrs is NULL!!\n");
    339 	}
    340 
    341 	/* Initialize the component buffer pool. */
    342 	pool_init(&raidframe_cbufpool, sizeof(struct raidbuf), 0,
    343 	    0, 0, "raidpl", NULL);
    344 
    345 	rc = rf_mutex_init(&rf_sparet_wait_mutex);
    346 	if (rc) {
    347 		RF_PANIC();
    348 	}
    349 
    350 	rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
    351 
    352 	for (i = 0; i < num; i++)
    353 		raidPtrs[i] = NULL;
    354 	rc = rf_BootRaidframe();
    355 	if (rc == 0)
    356 		printf("Kernelized RAIDframe activated\n");
    357 	else
    358 		panic("Serious error booting RAID!!\n");
    359 
    360 	/* put together some datastructures like the CCD device does.. This
    361 	 * lets us lock the device and what-not when it gets opened. */
    362 
    363 	raid_softc = (struct raid_softc *)
    364 		malloc(num * sizeof(struct raid_softc),
    365 		       M_RAIDFRAME, M_NOWAIT);
    366 	if (raid_softc == NULL) {
    367 		printf("WARNING: no memory for RAIDframe driver\n");
    368 		return;
    369 	}
    370 
    371 	memset(raid_softc, 0, num * sizeof(struct raid_softc));
    372 
    373 	raidrootdev = (struct device *)malloc(num * sizeof(struct device),
    374 					      M_RAIDFRAME, M_NOWAIT);
    375 	if (raidrootdev == NULL) {
    376 		panic("No memory for RAIDframe driver!!?!?!\n");
    377 	}
    378 
    379 	for (raidID = 0; raidID < num; raidID++) {
    380 		BUFQ_INIT(&raid_softc[raidID].buf_queue);
    381 
    382 		raidrootdev[raidID].dv_class  = DV_DISK;
    383 		raidrootdev[raidID].dv_cfdata = NULL;
    384 		raidrootdev[raidID].dv_unit   = raidID;
    385 		raidrootdev[raidID].dv_parent = NULL;
    386 		raidrootdev[raidID].dv_flags  = 0;
    387 		sprintf(raidrootdev[raidID].dv_xname,"raid%d",raidID);
    388 
    389 		RF_Calloc(raidPtrs[raidID], 1, sizeof(RF_Raid_t),
    390 			  (RF_Raid_t *));
    391 		if (raidPtrs[raidID] == NULL) {
    392 			printf("WARNING: raidPtrs[%d] is NULL\n", raidID);
    393 			numraid = raidID;
    394 			return;
    395 		}
    396 	}
    397 
    398 #ifdef RAID_AUTOCONFIG
    399 	raidautoconfig = 1;
    400 #endif
    401 
    402 if (raidautoconfig) {
    403 	/* 1. locate all RAID components on the system */
    404 
    405 #if DEBUG
    406 	printf("Searching for raid components...\n");
    407 #endif
    408 	ac_list = rf_find_raid_components();
    409 
    410 	/* 2. sort them into their respective sets */
    411 
    412 	config_sets = rf_create_auto_sets(ac_list);
    413 
    414 	/* 3. evaluate each set and configure the valid ones
    415 	   This gets done in rf_buildroothack() */
    416 
    417 	/* schedule the creation of the thread to do the
    418 	   "/ on RAID" stuff */
    419 
    420 	kthread_create(rf_buildroothack,config_sets);
    421 
    422 #if 0
    423 	mountroothook_establish(rf_mountroot_hook, &raidrootdev[0]);
    424 #endif
    425 }
    426 
    427 }
    428 
    429 void
    430 rf_buildroothack(arg)
    431 	void *arg;
    432 {
    433 	RF_ConfigSet_t *config_sets = arg;
    434 	RF_ConfigSet_t *cset;
    435 	RF_ConfigSet_t *next_cset;
    436 	int retcode;
    437 	int raidID;
    438 	int rootID;
    439 	int num_root;
    440 
    441 	rootID = 0;
    442 	num_root = 0;
    443 	cset = config_sets;
    444 	while(cset != NULL ) {
    445 		next_cset = cset->next;
    446 		if (rf_have_enough_components(cset) &&
    447 		    cset->ac->clabel->autoconfigure==1) {
    448 			retcode = rf_auto_config_set(cset,&raidID);
    449 			if (!retcode) {
    450 				if (cset->rootable) {
    451 					rootID = raidID;
    452 					num_root++;
    453 				}
    454 			} else {
    455 				/* The autoconfig didn't work :( */
    456 #if DEBUG
    457 				printf("Autoconfig failed with code %d for raid%d\n", retcode, raidID);
    458 #endif
    459 				rf_release_all_vps(cset);
    460 			}
    461 		} else {
    462 			/* we're not autoconfiguring this set...
    463 			   release the associated resources */
    464 			rf_release_all_vps(cset);
    465 		}
    466 		/* cleanup */
    467 		rf_cleanup_config_set(cset);
    468 		cset = next_cset;
    469 	}
    470 
    471 	/* we found something bootable... */
    472 
    473 	if (num_root == 1) {
    474 		booted_device = &raidrootdev[rootID];
    475 	} else if (num_root > 1) {
    476 		/* we can't guess.. require the user to answer... */
    477 		boothowto |= RB_ASKNAME;
    478 	}
    479 }
    480 
    481 
    482 int
    483 raidsize(dev)
    484 	dev_t   dev;
    485 {
    486 	struct raid_softc *rs;
    487 	struct disklabel *lp;
    488 	int     part, unit, omask, size;
    489 
    490 	unit = raidunit(dev);
    491 	if (unit >= numraid)
    492 		return (-1);
    493 	rs = &raid_softc[unit];
    494 
    495 	if ((rs->sc_flags & RAIDF_INITED) == 0)
    496 		return (-1);
    497 
    498 	part = DISKPART(dev);
    499 	omask = rs->sc_dkdev.dk_openmask & (1 << part);
    500 	lp = rs->sc_dkdev.dk_label;
    501 
    502 	if (omask == 0 && raidopen(dev, 0, S_IFBLK, curproc))
    503 		return (-1);
    504 
    505 	if (lp->d_partitions[part].p_fstype != FS_SWAP)
    506 		size = -1;
    507 	else
    508 		size = lp->d_partitions[part].p_size *
    509 		    (lp->d_secsize / DEV_BSIZE);
    510 
    511 	if (omask == 0 && raidclose(dev, 0, S_IFBLK, curproc))
    512 		return (-1);
    513 
    514 	return (size);
    515 
    516 }
    517 
    518 int
    519 raiddump(dev, blkno, va, size)
    520 	dev_t   dev;
    521 	daddr_t blkno;
    522 	caddr_t va;
    523 	size_t  size;
    524 {
    525 	/* Not implemented. */
    526 	return ENXIO;
    527 }
    528 /* ARGSUSED */
    529 int
    530 raidopen(dev, flags, fmt, p)
    531 	dev_t   dev;
    532 	int     flags, fmt;
    533 	struct proc *p;
    534 {
    535 	int     unit = raidunit(dev);
    536 	struct raid_softc *rs;
    537 	struct disklabel *lp;
    538 	int     part, pmask;
    539 	int     error = 0;
    540 
    541 	if (unit >= numraid)
    542 		return (ENXIO);
    543 	rs = &raid_softc[unit];
    544 
    545 	if ((error = raidlock(rs)) != 0)
    546 		return (error);
    547 	lp = rs->sc_dkdev.dk_label;
    548 
    549 	part = DISKPART(dev);
    550 	pmask = (1 << part);
    551 
    552 	db1_printf(("Opening raid device number: %d partition: %d\n",
    553 		unit, part));
    554 
    555 
    556 	if ((rs->sc_flags & RAIDF_INITED) &&
    557 	    (rs->sc_dkdev.dk_openmask == 0))
    558 		raidgetdisklabel(dev);
    559 
    560 	/* make sure that this partition exists */
    561 
    562 	if (part != RAW_PART) {
    563 		db1_printf(("Not a raw partition..\n"));
    564 		if (((rs->sc_flags & RAIDF_INITED) == 0) ||
    565 		    ((part >= lp->d_npartitions) ||
    566 			(lp->d_partitions[part].p_fstype == FS_UNUSED))) {
    567 			error = ENXIO;
    568 			raidunlock(rs);
    569 			db1_printf(("Bailing out...\n"));
    570 			return (error);
    571 		}
    572 	}
    573 	/* Prevent this unit from being unconfigured while open. */
    574 	switch (fmt) {
    575 	case S_IFCHR:
    576 		rs->sc_dkdev.dk_copenmask |= pmask;
    577 		break;
    578 
    579 	case S_IFBLK:
    580 		rs->sc_dkdev.dk_bopenmask |= pmask;
    581 		break;
    582 	}
    583 
    584 	if ((rs->sc_dkdev.dk_openmask == 0) &&
    585 	    ((rs->sc_flags & RAIDF_INITED) != 0)) {
    586 		/* First one... mark things as dirty... Note that we *MUST*
    587 		 have done a configure before this.  I DO NOT WANT TO BE
    588 		 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
    589 		 THAT THEY BELONG TOGETHER!!!!! */
    590 		/* XXX should check to see if we're only open for reading
    591 		   here... If so, we needn't do this, but then need some
    592 		   other way of keeping track of what's happened.. */
    593 
    594 		rf_markalldirty( raidPtrs[unit] );
    595 	}
    596 
    597 
    598 	rs->sc_dkdev.dk_openmask =
    599 	    rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
    600 
    601 	raidunlock(rs);
    602 
    603 	return (error);
    604 
    605 
    606 }
    607 /* ARGSUSED */
    608 int
    609 raidclose(dev, flags, fmt, p)
    610 	dev_t   dev;
    611 	int     flags, fmt;
    612 	struct proc *p;
    613 {
    614 	int     unit = raidunit(dev);
    615 	struct raid_softc *rs;
    616 	int     error = 0;
    617 	int     part;
    618 
    619 	if (unit >= numraid)
    620 		return (ENXIO);
    621 	rs = &raid_softc[unit];
    622 
    623 	if ((error = raidlock(rs)) != 0)
    624 		return (error);
    625 
    626 	part = DISKPART(dev);
    627 
    628 	/* ...that much closer to allowing unconfiguration... */
    629 	switch (fmt) {
    630 	case S_IFCHR:
    631 		rs->sc_dkdev.dk_copenmask &= ~(1 << part);
    632 		break;
    633 
    634 	case S_IFBLK:
    635 		rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
    636 		break;
    637 	}
    638 	rs->sc_dkdev.dk_openmask =
    639 	    rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
    640 
    641 	if ((rs->sc_dkdev.dk_openmask == 0) &&
    642 	    ((rs->sc_flags & RAIDF_INITED) != 0)) {
    643 		/* Last one... device is not unconfigured yet.
    644 		   Device shutdown has taken care of setting the
    645 		   clean bits if RAIDF_INITED is not set
    646 		   mark things as clean... */
    647 #if 0
    648 		printf("Last one on raid%d.  Updating status.\n",unit);
    649 #endif
    650 		rf_update_component_labels(raidPtrs[unit],
    651 						 RF_FINAL_COMPONENT_UPDATE);
    652 		if (doing_shutdown) {
    653 			/* last one, and we're going down, so
    654 			   lights out for this RAID set too. */
    655 			error = rf_Shutdown(raidPtrs[unit]);
    656 
    657 			/* It's no longer initialized... */
    658 			rs->sc_flags &= ~RAIDF_INITED;
    659 
    660 			/* Detach the disk. */
    661 			disk_detach(&rs->sc_dkdev);
    662 		}
    663 	}
    664 
    665 	raidunlock(rs);
    666 	return (0);
    667 
    668 }
    669 
    670 void
    671 raidstrategy(bp)
    672 	struct buf *bp;
    673 {
    674 	int s;
    675 
    676 	unsigned int raidID = raidunit(bp->b_dev);
    677 	RF_Raid_t *raidPtr;
    678 	struct raid_softc *rs = &raid_softc[raidID];
    679 	struct disklabel *lp;
    680 	int     wlabel;
    681 
    682 	if ((rs->sc_flags & RAIDF_INITED) ==0) {
    683 		bp->b_error = ENXIO;
    684 		bp->b_flags |= B_ERROR;
    685 		bp->b_resid = bp->b_bcount;
    686 		biodone(bp);
    687 		return;
    688 	}
    689 	if (raidID >= numraid || !raidPtrs[raidID]) {
    690 		bp->b_error = ENODEV;
    691 		bp->b_flags |= B_ERROR;
    692 		bp->b_resid = bp->b_bcount;
    693 		biodone(bp);
    694 		return;
    695 	}
    696 	raidPtr = raidPtrs[raidID];
    697 	if (!raidPtr->valid) {
    698 		bp->b_error = ENODEV;
    699 		bp->b_flags |= B_ERROR;
    700 		bp->b_resid = bp->b_bcount;
    701 		biodone(bp);
    702 		return;
    703 	}
    704 	if (bp->b_bcount == 0) {
    705 		db1_printf(("b_bcount is zero..\n"));
    706 		biodone(bp);
    707 		return;
    708 	}
    709 	lp = rs->sc_dkdev.dk_label;
    710 
    711 	/*
    712 	 * Do bounds checking and adjust transfer.  If there's an
    713 	 * error, the bounds check will flag that for us.
    714 	 */
    715 
    716 	wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
    717 	if (DISKPART(bp->b_dev) != RAW_PART)
    718 		if (bounds_check_with_label(bp, lp, wlabel) <= 0) {
    719 			db1_printf(("Bounds check failed!!:%d %d\n",
    720 				(int) bp->b_blkno, (int) wlabel));
    721 			biodone(bp);
    722 			return;
    723 		}
    724 	s = splbio();
    725 
    726 	bp->b_resid = 0;
    727 
    728 	/* stuff it onto our queue */
    729 	BUFQ_INSERT_TAIL(&rs->buf_queue, bp);
    730 
    731 	raidstart(raidPtrs[raidID]);
    732 
    733 	splx(s);
    734 }
    735 /* ARGSUSED */
    736 int
    737 raidread(dev, uio, flags)
    738 	dev_t   dev;
    739 	struct uio *uio;
    740 	int     flags;
    741 {
    742 	int     unit = raidunit(dev);
    743 	struct raid_softc *rs;
    744 	int     part;
    745 
    746 	if (unit >= numraid)
    747 		return (ENXIO);
    748 	rs = &raid_softc[unit];
    749 
    750 	if ((rs->sc_flags & RAIDF_INITED) == 0)
    751 		return (ENXIO);
    752 	part = DISKPART(dev);
    753 
    754 	db1_printf(("raidread: unit: %d partition: %d\n", unit, part));
    755 
    756 	return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
    757 
    758 }
    759 /* ARGSUSED */
    760 int
    761 raidwrite(dev, uio, flags)
    762 	dev_t   dev;
    763 	struct uio *uio;
    764 	int     flags;
    765 {
    766 	int     unit = raidunit(dev);
    767 	struct raid_softc *rs;
    768 
    769 	if (unit >= numraid)
    770 		return (ENXIO);
    771 	rs = &raid_softc[unit];
    772 
    773 	if ((rs->sc_flags & RAIDF_INITED) == 0)
    774 		return (ENXIO);
    775 	db1_printf(("raidwrite\n"));
    776 	return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
    777 
    778 }
    779 
    780 int
    781 raidioctl(dev, cmd, data, flag, p)
    782 	dev_t   dev;
    783 	u_long  cmd;
    784 	caddr_t data;
    785 	int     flag;
    786 	struct proc *p;
    787 {
    788 	int     unit = raidunit(dev);
    789 	int     error = 0;
    790 	int     part, pmask;
    791 	struct raid_softc *rs;
    792 	RF_Config_t *k_cfg, *u_cfg;
    793 	RF_Raid_t *raidPtr;
    794 	RF_RaidDisk_t *diskPtr;
    795 	RF_AccTotals_t *totals;
    796 	RF_DeviceConfig_t *d_cfg, **ucfgp;
    797 	u_char *specific_buf;
    798 	int retcode = 0;
    799 	int row;
    800 	int column;
    801 	int raidid;
    802 	struct rf_recon_req *rrcopy, *rr;
    803 	RF_ComponentLabel_t *clabel;
    804 	RF_ComponentLabel_t ci_label;
    805 	RF_ComponentLabel_t **clabel_ptr;
    806 	RF_SingleComponent_t *sparePtr,*componentPtr;
    807 	RF_SingleComponent_t hot_spare;
    808 	RF_SingleComponent_t component;
    809 	RF_ProgressInfo_t progressInfo, **progressInfoPtr;
    810 	int i, j, d;
    811 #ifdef __HAVE_OLD_DISKLABEL
    812 	struct disklabel newlabel;
    813 #endif
    814 
    815 	if (unit >= numraid)
    816 		return (ENXIO);
    817 	rs = &raid_softc[unit];
    818 	raidPtr = raidPtrs[unit];
    819 
    820 	db1_printf(("raidioctl: %d %d %d %d\n", (int) dev,
    821 		(int) DISKPART(dev), (int) unit, (int) cmd));
    822 
    823 	/* Must be open for writes for these commands... */
    824 	switch (cmd) {
    825 	case DIOCSDINFO:
    826 	case DIOCWDINFO:
    827 #ifdef __HAVE_OLD_DISKLABEL
    828 	case ODIOCWDINFO:
    829 	case ODIOCSDINFO:
    830 #endif
    831 	case DIOCWLABEL:
    832 		if ((flag & FWRITE) == 0)
    833 			return (EBADF);
    834 	}
    835 
    836 	/* Must be initialized for these... */
    837 	switch (cmd) {
    838 	case DIOCGDINFO:
    839 	case DIOCSDINFO:
    840 	case DIOCWDINFO:
    841 #ifdef __HAVE_OLD_DISKLABEL
    842 	case ODIOCGDINFO:
    843 	case ODIOCWDINFO:
    844 	case ODIOCSDINFO:
    845 	case ODIOCGDEFLABEL:
    846 #endif
    847 	case DIOCGPART:
    848 	case DIOCWLABEL:
    849 	case DIOCGDEFLABEL:
    850 	case RAIDFRAME_SHUTDOWN:
    851 	case RAIDFRAME_REWRITEPARITY:
    852 	case RAIDFRAME_GET_INFO:
    853 	case RAIDFRAME_RESET_ACCTOTALS:
    854 	case RAIDFRAME_GET_ACCTOTALS:
    855 	case RAIDFRAME_KEEP_ACCTOTALS:
    856 	case RAIDFRAME_GET_SIZE:
    857 	case RAIDFRAME_FAIL_DISK:
    858 	case RAIDFRAME_COPYBACK:
    859 	case RAIDFRAME_CHECK_RECON_STATUS:
    860 	case RAIDFRAME_CHECK_RECON_STATUS_EXT:
    861 	case RAIDFRAME_GET_COMPONENT_LABEL:
    862 	case RAIDFRAME_SET_COMPONENT_LABEL:
    863 	case RAIDFRAME_ADD_HOT_SPARE:
    864 	case RAIDFRAME_REMOVE_HOT_SPARE:
    865 	case RAIDFRAME_INIT_LABELS:
    866 	case RAIDFRAME_REBUILD_IN_PLACE:
    867 	case RAIDFRAME_CHECK_PARITY:
    868 	case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
    869 	case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
    870 	case RAIDFRAME_CHECK_COPYBACK_STATUS:
    871 	case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
    872 	case RAIDFRAME_SET_AUTOCONFIG:
    873 	case RAIDFRAME_SET_ROOT:
    874 	case RAIDFRAME_DELETE_COMPONENT:
    875 	case RAIDFRAME_INCORPORATE_HOT_SPARE:
    876 		if ((rs->sc_flags & RAIDF_INITED) == 0)
    877 			return (ENXIO);
    878 	}
    879 
    880 	switch (cmd) {
    881 
    882 		/* configure the system */
    883 	case RAIDFRAME_CONFIGURE:
    884 
    885 		if (raidPtr->valid) {
    886 			/* There is a valid RAID set running on this unit! */
    887 			printf("raid%d: Device already configured!\n",unit);
    888 			return(EINVAL);
    889 		}
    890 
    891 		/* copy-in the configuration information */
    892 		/* data points to a pointer to the configuration structure */
    893 
    894 		u_cfg = *((RF_Config_t **) data);
    895 		RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
    896 		if (k_cfg == NULL) {
    897 			return (ENOMEM);
    898 		}
    899 		retcode = copyin((caddr_t) u_cfg, (caddr_t) k_cfg,
    900 		    sizeof(RF_Config_t));
    901 		if (retcode) {
    902 			RF_Free(k_cfg, sizeof(RF_Config_t));
    903 			db1_printf(("rf_ioctl: retcode=%d copyin.1\n",
    904 				retcode));
    905 			return (retcode);
    906 		}
    907 		/* allocate a buffer for the layout-specific data, and copy it
    908 		 * in */
    909 		if (k_cfg->layoutSpecificSize) {
    910 			if (k_cfg->layoutSpecificSize > 10000) {
    911 				/* sanity check */
    912 				RF_Free(k_cfg, sizeof(RF_Config_t));
    913 				return (EINVAL);
    914 			}
    915 			RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
    916 			    (u_char *));
    917 			if (specific_buf == NULL) {
    918 				RF_Free(k_cfg, sizeof(RF_Config_t));
    919 				return (ENOMEM);
    920 			}
    921 			retcode = copyin(k_cfg->layoutSpecific,
    922 			    (caddr_t) specific_buf,
    923 			    k_cfg->layoutSpecificSize);
    924 			if (retcode) {
    925 				RF_Free(k_cfg, sizeof(RF_Config_t));
    926 				RF_Free(specific_buf,
    927 					k_cfg->layoutSpecificSize);
    928 				db1_printf(("rf_ioctl: retcode=%d copyin.2\n",
    929 					retcode));
    930 				return (retcode);
    931 			}
    932 		} else
    933 			specific_buf = NULL;
    934 		k_cfg->layoutSpecific = specific_buf;
    935 
    936 		/* should do some kind of sanity check on the configuration.
    937 		 * Store the sum of all the bytes in the last byte? */
    938 
    939 		/* configure the system */
    940 
    941 		/*
    942 		 * Clear the entire RAID descriptor, just to make sure
    943 		 *  there is no stale data left in the case of a
    944 		 *  reconfiguration
    945 		 */
    946 		memset((char *) raidPtr, 0, sizeof(RF_Raid_t));
    947 		raidPtr->raidid = unit;
    948 
    949 		retcode = rf_Configure(raidPtr, k_cfg, NULL);
    950 
    951 		if (retcode == 0) {
    952 
    953 			/* allow this many simultaneous IO's to
    954 			   this RAID device */
    955 			raidPtr->openings = RAIDOUTSTANDING;
    956 
    957 			raidinit(raidPtr);
    958 			rf_markalldirty(raidPtr);
    959 		}
    960 		/* free the buffers.  No return code here. */
    961 		if (k_cfg->layoutSpecificSize) {
    962 			RF_Free(specific_buf, k_cfg->layoutSpecificSize);
    963 		}
    964 		RF_Free(k_cfg, sizeof(RF_Config_t));
    965 
    966 		return (retcode);
    967 
    968 		/* shutdown the system */
    969 	case RAIDFRAME_SHUTDOWN:
    970 
    971 		if ((error = raidlock(rs)) != 0)
    972 			return (error);
    973 
    974 		/*
    975 		 * If somebody has a partition mounted, we shouldn't
    976 		 * shutdown.
    977 		 */
    978 
    979 		part = DISKPART(dev);
    980 		pmask = (1 << part);
    981 		if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
    982 		    ((rs->sc_dkdev.dk_bopenmask & pmask) &&
    983 			(rs->sc_dkdev.dk_copenmask & pmask))) {
    984 			raidunlock(rs);
    985 			return (EBUSY);
    986 		}
    987 
    988 		retcode = rf_Shutdown(raidPtr);
    989 
    990 		/* It's no longer initialized... */
    991 		rs->sc_flags &= ~RAIDF_INITED;
    992 
    993 		/* Detach the disk. */
    994 		disk_detach(&rs->sc_dkdev);
    995 
    996 		raidunlock(rs);
    997 
    998 		return (retcode);
    999 	case RAIDFRAME_GET_COMPONENT_LABEL:
   1000 		clabel_ptr = (RF_ComponentLabel_t **) data;
   1001 		/* need to read the component label for the disk indicated
   1002 		   by row,column in clabel */
   1003 
   1004 		/* For practice, let's get it directly fromdisk, rather
   1005 		   than from the in-core copy */
   1006 		RF_Malloc( clabel, sizeof( RF_ComponentLabel_t ),
   1007 			   (RF_ComponentLabel_t *));
   1008 		if (clabel == NULL)
   1009 			return (ENOMEM);
   1010 
   1011 		memset((char *) clabel, 0, sizeof(RF_ComponentLabel_t));
   1012 
   1013 		retcode = copyin( *clabel_ptr, clabel,
   1014 				  sizeof(RF_ComponentLabel_t));
   1015 
   1016 		if (retcode) {
   1017 			RF_Free( clabel, sizeof(RF_ComponentLabel_t));
   1018 			return(retcode);
   1019 		}
   1020 
   1021 		row = clabel->row;
   1022 		column = clabel->column;
   1023 
   1024 		if ((row < 0) || (row >= raidPtr->numRow) ||
   1025 		    (column < 0) || (column >= raidPtr->numCol +
   1026 				     raidPtr->numSpare)) {
   1027 			RF_Free( clabel, sizeof(RF_ComponentLabel_t));
   1028 			return(EINVAL);
   1029 		}
   1030 
   1031 		raidread_component_label(raidPtr->Disks[row][column].dev,
   1032 				raidPtr->raid_cinfo[row][column].ci_vp,
   1033 				clabel );
   1034 
   1035 		retcode = copyout((caddr_t) clabel,
   1036 				  (caddr_t) *clabel_ptr,
   1037 				  sizeof(RF_ComponentLabel_t));
   1038 		RF_Free( clabel, sizeof(RF_ComponentLabel_t));
   1039 		return (retcode);
   1040 
   1041 	case RAIDFRAME_SET_COMPONENT_LABEL:
   1042 		clabel = (RF_ComponentLabel_t *) data;
   1043 
   1044 		/* XXX check the label for valid stuff... */
   1045 		/* Note that some things *should not* get modified --
   1046 		   the user should be re-initing the labels instead of
   1047 		   trying to patch things.
   1048 		   */
   1049 
   1050 		raidid = raidPtr->raidid;
   1051 		printf("raid%d: Got component label:\n", raidid);
   1052 		printf("raid%d: Version: %d\n", raidid, clabel->version);
   1053 		printf("raid%d: Serial Number: %d\n", raidid, clabel->serial_number);
   1054 		printf("raid%d: Mod counter: %d\n", raidid, clabel->mod_counter);
   1055 		printf("raid%d: Row: %d\n", raidid, clabel->row);
   1056 		printf("raid%d: Column: %d\n", raidid, clabel->column);
   1057 		printf("raid%d: Num Rows: %d\n", raidid, clabel->num_rows);
   1058 		printf("raid%d: Num Columns: %d\n", raidid, clabel->num_columns);
   1059 		printf("raid%d: Clean: %d\n", raidid, clabel->clean);
   1060 		printf("raid%d: Status: %d\n", raidid, clabel->status);
   1061 
   1062 		row = clabel->row;
   1063 		column = clabel->column;
   1064 
   1065 		if ((row < 0) || (row >= raidPtr->numRow) ||
   1066 		    (column < 0) || (column >= raidPtr->numCol)) {
   1067 			return(EINVAL);
   1068 		}
   1069 
   1070 		/* XXX this isn't allowed to do anything for now :-) */
   1071 
   1072 		/* XXX and before it is, we need to fill in the rest
   1073 		   of the fields!?!?!?! */
   1074 #if 0
   1075 		raidwrite_component_label(
   1076                             raidPtr->Disks[row][column].dev,
   1077 			    raidPtr->raid_cinfo[row][column].ci_vp,
   1078 			    clabel );
   1079 #endif
   1080 		return (0);
   1081 
   1082 	case RAIDFRAME_INIT_LABELS:
   1083 		clabel = (RF_ComponentLabel_t *) data;
   1084 		/*
   1085 		   we only want the serial number from
   1086 		   the above.  We get all the rest of the information
   1087 		   from the config that was used to create this RAID
   1088 		   set.
   1089 		   */
   1090 
   1091 		raidPtr->serial_number = clabel->serial_number;
   1092 
   1093 		raid_init_component_label(raidPtr, &ci_label);
   1094 		ci_label.serial_number = clabel->serial_number;
   1095 
   1096 		for(row=0;row<raidPtr->numRow;row++) {
   1097 			ci_label.row = row;
   1098 			for(column=0;column<raidPtr->numCol;column++) {
   1099 				diskPtr = &raidPtr->Disks[row][column];
   1100 				if (!RF_DEAD_DISK(diskPtr->status)) {
   1101 					ci_label.partitionSize = diskPtr->partitionSize;
   1102 					ci_label.column = column;
   1103 					raidwrite_component_label(
   1104 					  raidPtr->Disks[row][column].dev,
   1105 					  raidPtr->raid_cinfo[row][column].ci_vp,
   1106 					  &ci_label );
   1107 				}
   1108 			}
   1109 		}
   1110 
   1111 		return (retcode);
   1112 	case RAIDFRAME_SET_AUTOCONFIG:
   1113 		d = rf_set_autoconfig(raidPtr, *(int *) data);
   1114 		printf("raid%d: New autoconfig value is: %d\n",
   1115 		       raidPtr->raidid, d);
   1116 		*(int *) data = d;
   1117 		return (retcode);
   1118 
   1119 	case RAIDFRAME_SET_ROOT:
   1120 		d = rf_set_rootpartition(raidPtr, *(int *) data);
   1121 		printf("raid%d: New rootpartition value is: %d\n",
   1122 		       raidPtr->raidid, d);
   1123 		*(int *) data = d;
   1124 		return (retcode);
   1125 
   1126 		/* initialize all parity */
   1127 	case RAIDFRAME_REWRITEPARITY:
   1128 
   1129 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1130 			/* Parity for RAID 0 is trivially correct */
   1131 			raidPtr->parity_good = RF_RAID_CLEAN;
   1132 			return(0);
   1133 		}
   1134 
   1135 		if (raidPtr->parity_rewrite_in_progress == 1) {
   1136 			/* Re-write is already in progress! */
   1137 			return(EINVAL);
   1138 		}
   1139 
   1140 		retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
   1141 					   rf_RewriteParityThread,
   1142 					   raidPtr,"raid_parity");
   1143 		return (retcode);
   1144 
   1145 
   1146 	case RAIDFRAME_ADD_HOT_SPARE:
   1147 		sparePtr = (RF_SingleComponent_t *) data;
   1148 		memcpy( &hot_spare, sparePtr, sizeof(RF_SingleComponent_t));
   1149 		retcode = rf_add_hot_spare(raidPtr, &hot_spare);
   1150 		return(retcode);
   1151 
   1152 	case RAIDFRAME_REMOVE_HOT_SPARE:
   1153 		return(retcode);
   1154 
   1155 	case RAIDFRAME_DELETE_COMPONENT:
   1156 		componentPtr = (RF_SingleComponent_t *)data;
   1157 		memcpy( &component, componentPtr,
   1158 			sizeof(RF_SingleComponent_t));
   1159 		retcode = rf_delete_component(raidPtr, &component);
   1160 		return(retcode);
   1161 
   1162 	case RAIDFRAME_INCORPORATE_HOT_SPARE:
   1163 		componentPtr = (RF_SingleComponent_t *)data;
   1164 		memcpy( &component, componentPtr,
   1165 			sizeof(RF_SingleComponent_t));
   1166 		retcode = rf_incorporate_hot_spare(raidPtr, &component);
   1167 		return(retcode);
   1168 
   1169 	case RAIDFRAME_REBUILD_IN_PLACE:
   1170 
   1171 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1172 			/* Can't do this on a RAID 0!! */
   1173 			return(EINVAL);
   1174 		}
   1175 
   1176 		if (raidPtr->recon_in_progress == 1) {
   1177 			/* a reconstruct is already in progress! */
   1178 			return(EINVAL);
   1179 		}
   1180 
   1181 		componentPtr = (RF_SingleComponent_t *) data;
   1182 		memcpy( &component, componentPtr,
   1183 			sizeof(RF_SingleComponent_t));
   1184 		row = component.row;
   1185 		column = component.column;
   1186 		printf("raid%d: Rebuild: %d %d\n", raidPtr->raidid,
   1187 		       row, column);
   1188 		if ((row < 0) || (row >= raidPtr->numRow) ||
   1189 		    (column < 0) || (column >= raidPtr->numCol)) {
   1190 			return(EINVAL);
   1191 		}
   1192 
   1193 		RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
   1194 		if (rrcopy == NULL)
   1195 			return(ENOMEM);
   1196 
   1197 		rrcopy->raidPtr = (void *) raidPtr;
   1198 		rrcopy->row = row;
   1199 		rrcopy->col = column;
   1200 
   1201 		retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
   1202 					   rf_ReconstructInPlaceThread,
   1203 					   rrcopy,"raid_reconip");
   1204 		return(retcode);
   1205 
   1206 	case RAIDFRAME_GET_INFO:
   1207 		if (!raidPtr->valid)
   1208 			return (ENODEV);
   1209 		ucfgp = (RF_DeviceConfig_t **) data;
   1210 		RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t),
   1211 			  (RF_DeviceConfig_t *));
   1212 		if (d_cfg == NULL)
   1213 			return (ENOMEM);
   1214 		memset((char *) d_cfg, 0, sizeof(RF_DeviceConfig_t));
   1215 		d_cfg->rows = raidPtr->numRow;
   1216 		d_cfg->cols = raidPtr->numCol;
   1217 		d_cfg->ndevs = raidPtr->numRow * raidPtr->numCol;
   1218 		if (d_cfg->ndevs >= RF_MAX_DISKS) {
   1219 			RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
   1220 			return (ENOMEM);
   1221 		}
   1222 		d_cfg->nspares = raidPtr->numSpare;
   1223 		if (d_cfg->nspares >= RF_MAX_DISKS) {
   1224 			RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
   1225 			return (ENOMEM);
   1226 		}
   1227 		d_cfg->maxqdepth = raidPtr->maxQueueDepth;
   1228 		d = 0;
   1229 		for (i = 0; i < d_cfg->rows; i++) {
   1230 			for (j = 0; j < d_cfg->cols; j++) {
   1231 				d_cfg->devs[d] = raidPtr->Disks[i][j];
   1232 				d++;
   1233 			}
   1234 		}
   1235 		for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) {
   1236 			d_cfg->spares[i] = raidPtr->Disks[0][j];
   1237 		}
   1238 		retcode = copyout((caddr_t) d_cfg, (caddr_t) * ucfgp,
   1239 				  sizeof(RF_DeviceConfig_t));
   1240 		RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
   1241 
   1242 		return (retcode);
   1243 
   1244 	case RAIDFRAME_CHECK_PARITY:
   1245 		*(int *) data = raidPtr->parity_good;
   1246 		return (0);
   1247 
   1248 	case RAIDFRAME_RESET_ACCTOTALS:
   1249 		memset(&raidPtr->acc_totals, 0, sizeof(raidPtr->acc_totals));
   1250 		return (0);
   1251 
   1252 	case RAIDFRAME_GET_ACCTOTALS:
   1253 		totals = (RF_AccTotals_t *) data;
   1254 		*totals = raidPtr->acc_totals;
   1255 		return (0);
   1256 
   1257 	case RAIDFRAME_KEEP_ACCTOTALS:
   1258 		raidPtr->keep_acc_totals = *(int *)data;
   1259 		return (0);
   1260 
   1261 	case RAIDFRAME_GET_SIZE:
   1262 		*(int *) data = raidPtr->totalSectors;
   1263 		return (0);
   1264 
   1265 		/* fail a disk & optionally start reconstruction */
   1266 	case RAIDFRAME_FAIL_DISK:
   1267 
   1268 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1269 			/* Can't do this on a RAID 0!! */
   1270 			return(EINVAL);
   1271 		}
   1272 
   1273 		rr = (struct rf_recon_req *) data;
   1274 
   1275 		if (rr->row < 0 || rr->row >= raidPtr->numRow
   1276 		    || rr->col < 0 || rr->col >= raidPtr->numCol)
   1277 			return (EINVAL);
   1278 
   1279 		printf("raid%d: Failing the disk: row: %d col: %d\n",
   1280 		       unit, rr->row, rr->col);
   1281 
   1282 		/* make a copy of the recon request so that we don't rely on
   1283 		 * the user's buffer */
   1284 		RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
   1285 		if (rrcopy == NULL)
   1286 			return(ENOMEM);
   1287 		memcpy(rrcopy, rr, sizeof(*rr));
   1288 		rrcopy->raidPtr = (void *) raidPtr;
   1289 
   1290 		retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
   1291 					   rf_ReconThread,
   1292 					   rrcopy,"raid_recon");
   1293 		return (0);
   1294 
   1295 		/* invoke a copyback operation after recon on whatever disk
   1296 		 * needs it, if any */
   1297 	case RAIDFRAME_COPYBACK:
   1298 
   1299 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1300 			/* This makes no sense on a RAID 0!! */
   1301 			return(EINVAL);
   1302 		}
   1303 
   1304 		if (raidPtr->copyback_in_progress == 1) {
   1305 			/* Copyback is already in progress! */
   1306 			return(EINVAL);
   1307 		}
   1308 
   1309 		retcode = RF_CREATE_THREAD(raidPtr->copyback_thread,
   1310 					   rf_CopybackThread,
   1311 					   raidPtr,"raid_copyback");
   1312 		return (retcode);
   1313 
   1314 		/* return the percentage completion of reconstruction */
   1315 	case RAIDFRAME_CHECK_RECON_STATUS:
   1316 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1317 			/* This makes no sense on a RAID 0, so tell the
   1318 			   user it's done. */
   1319 			*(int *) data = 100;
   1320 			return(0);
   1321 		}
   1322 		row = 0; /* XXX we only consider a single row... */
   1323 		if (raidPtr->status[row] != rf_rs_reconstructing)
   1324 			*(int *) data = 100;
   1325 		else
   1326 			*(int *) data = raidPtr->reconControl[row]->percentComplete;
   1327 		return (0);
   1328 	case RAIDFRAME_CHECK_RECON_STATUS_EXT:
   1329 		progressInfoPtr = (RF_ProgressInfo_t **) data;
   1330 		row = 0; /* XXX we only consider a single row... */
   1331 		if (raidPtr->status[row] != rf_rs_reconstructing) {
   1332 			progressInfo.remaining = 0;
   1333 			progressInfo.completed = 100;
   1334 			progressInfo.total = 100;
   1335 		} else {
   1336 			progressInfo.total =
   1337 				raidPtr->reconControl[row]->numRUsTotal;
   1338 			progressInfo.completed =
   1339 				raidPtr->reconControl[row]->numRUsComplete;
   1340 			progressInfo.remaining = progressInfo.total -
   1341 				progressInfo.completed;
   1342 		}
   1343 		retcode = copyout((caddr_t) &progressInfo,
   1344 				  (caddr_t) *progressInfoPtr,
   1345 				  sizeof(RF_ProgressInfo_t));
   1346 		return (retcode);
   1347 
   1348 	case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
   1349 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1350 			/* This makes no sense on a RAID 0, so tell the
   1351 			   user it's done. */
   1352 			*(int *) data = 100;
   1353 			return(0);
   1354 		}
   1355 		if (raidPtr->parity_rewrite_in_progress == 1) {
   1356 			*(int *) data = 100 *
   1357 				raidPtr->parity_rewrite_stripes_done /
   1358 				raidPtr->Layout.numStripe;
   1359 		} else {
   1360 			*(int *) data = 100;
   1361 		}
   1362 		return (0);
   1363 
   1364 	case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
   1365 		progressInfoPtr = (RF_ProgressInfo_t **) data;
   1366 		if (raidPtr->parity_rewrite_in_progress == 1) {
   1367 			progressInfo.total = raidPtr->Layout.numStripe;
   1368 			progressInfo.completed =
   1369 				raidPtr->parity_rewrite_stripes_done;
   1370 			progressInfo.remaining = progressInfo.total -
   1371 				progressInfo.completed;
   1372 		} else {
   1373 			progressInfo.remaining = 0;
   1374 			progressInfo.completed = 100;
   1375 			progressInfo.total = 100;
   1376 		}
   1377 		retcode = copyout((caddr_t) &progressInfo,
   1378 				  (caddr_t) *progressInfoPtr,
   1379 				  sizeof(RF_ProgressInfo_t));
   1380 		return (retcode);
   1381 
   1382 	case RAIDFRAME_CHECK_COPYBACK_STATUS:
   1383 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1384 			/* This makes no sense on a RAID 0 */
   1385 			*(int *) data = 100;
   1386 			return(0);
   1387 		}
   1388 		if (raidPtr->copyback_in_progress == 1) {
   1389 			*(int *) data = 100 * raidPtr->copyback_stripes_done /
   1390 				raidPtr->Layout.numStripe;
   1391 		} else {
   1392 			*(int *) data = 100;
   1393 		}
   1394 		return (0);
   1395 
   1396 	case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
   1397 		progressInfoPtr = (RF_ProgressInfo_t **) data;
   1398 		if (raidPtr->copyback_in_progress == 1) {
   1399 			progressInfo.total = raidPtr->Layout.numStripe;
   1400 			progressInfo.completed =
   1401 				raidPtr->copyback_stripes_done;
   1402 			progressInfo.remaining = progressInfo.total -
   1403 				progressInfo.completed;
   1404 		} else {
   1405 			progressInfo.remaining = 0;
   1406 			progressInfo.completed = 100;
   1407 			progressInfo.total = 100;
   1408 		}
   1409 		retcode = copyout((caddr_t) &progressInfo,
   1410 				  (caddr_t) *progressInfoPtr,
   1411 				  sizeof(RF_ProgressInfo_t));
   1412 		return (retcode);
   1413 
   1414 		/* the sparetable daemon calls this to wait for the kernel to
   1415 		 * need a spare table. this ioctl does not return until a
   1416 		 * spare table is needed. XXX -- calling mpsleep here in the
   1417 		 * ioctl code is almost certainly wrong and evil. -- XXX XXX
   1418 		 * -- I should either compute the spare table in the kernel,
   1419 		 * or have a different -- XXX XXX -- interface (a different
   1420 		 * character device) for delivering the table     -- XXX */
   1421 #if 0
   1422 	case RAIDFRAME_SPARET_WAIT:
   1423 		RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1424 		while (!rf_sparet_wait_queue)
   1425 			mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE);
   1426 		waitreq = rf_sparet_wait_queue;
   1427 		rf_sparet_wait_queue = rf_sparet_wait_queue->next;
   1428 		RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1429 
   1430 		/* structure assignment */
   1431 		*((RF_SparetWait_t *) data) = *waitreq;
   1432 
   1433 		RF_Free(waitreq, sizeof(*waitreq));
   1434 		return (0);
   1435 
   1436 		/* wakes up a process waiting on SPARET_WAIT and puts an error
   1437 		 * code in it that will cause the dameon to exit */
   1438 	case RAIDFRAME_ABORT_SPARET_WAIT:
   1439 		RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
   1440 		waitreq->fcol = -1;
   1441 		RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1442 		waitreq->next = rf_sparet_wait_queue;
   1443 		rf_sparet_wait_queue = waitreq;
   1444 		RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1445 		wakeup(&rf_sparet_wait_queue);
   1446 		return (0);
   1447 
   1448 		/* used by the spare table daemon to deliver a spare table
   1449 		 * into the kernel */
   1450 	case RAIDFRAME_SEND_SPARET:
   1451 
   1452 		/* install the spare table */
   1453 		retcode = rf_SetSpareTable(raidPtr, *(void **) data);
   1454 
   1455 		/* respond to the requestor.  the return status of the spare
   1456 		 * table installation is passed in the "fcol" field */
   1457 		RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
   1458 		waitreq->fcol = retcode;
   1459 		RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1460 		waitreq->next = rf_sparet_resp_queue;
   1461 		rf_sparet_resp_queue = waitreq;
   1462 		wakeup(&rf_sparet_resp_queue);
   1463 		RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1464 
   1465 		return (retcode);
   1466 #endif
   1467 
   1468 	default:
   1469 		break; /* fall through to the os-specific code below */
   1470 
   1471 	}
   1472 
   1473 	if (!raidPtr->valid)
   1474 		return (EINVAL);
   1475 
   1476 	/*
   1477 	 * Add support for "regular" device ioctls here.
   1478 	 */
   1479 
   1480 	switch (cmd) {
   1481 	case DIOCGDINFO:
   1482 		*(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
   1483 		break;
   1484 #ifdef __HAVE_OLD_DISKLABEL
   1485 	case ODIOCGDINFO:
   1486 		newlabel = *(rs->sc_dkdev.dk_label);
   1487 		if (newlabel.d_npartitions > OLDMAXPARTITIONS)
   1488 			return ENOTTY;
   1489 		memcpy(data, &newlabel, sizeof (struct olddisklabel));
   1490 		break;
   1491 #endif
   1492 
   1493 	case DIOCGPART:
   1494 		((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
   1495 		((struct partinfo *) data)->part =
   1496 		    &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
   1497 		break;
   1498 
   1499 	case DIOCWDINFO:
   1500 	case DIOCSDINFO:
   1501 #ifdef __HAVE_OLD_DISKLABEL
   1502 	case ODIOCWDINFO:
   1503 	case ODIOCSDINFO:
   1504 #endif
   1505 	{
   1506 		struct disklabel *lp;
   1507 #ifdef __HAVE_OLD_DISKLABEL
   1508 		if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) {
   1509 			memset(&newlabel, 0, sizeof newlabel);
   1510 			memcpy(&newlabel, data, sizeof (struct olddisklabel));
   1511 			lp = &newlabel;
   1512 		} else
   1513 #endif
   1514 		lp = (struct disklabel *)data;
   1515 
   1516 		if ((error = raidlock(rs)) != 0)
   1517 			return (error);
   1518 
   1519 		rs->sc_flags |= RAIDF_LABELLING;
   1520 
   1521 		error = setdisklabel(rs->sc_dkdev.dk_label,
   1522 		    lp, 0, rs->sc_dkdev.dk_cpulabel);
   1523 		if (error == 0) {
   1524 			if (cmd == DIOCWDINFO
   1525 #ifdef __HAVE_OLD_DISKLABEL
   1526 			    || cmd == ODIOCWDINFO
   1527 #endif
   1528 			   )
   1529 				error = writedisklabel(RAIDLABELDEV(dev),
   1530 				    raidstrategy, rs->sc_dkdev.dk_label,
   1531 				    rs->sc_dkdev.dk_cpulabel);
   1532 		}
   1533 		rs->sc_flags &= ~RAIDF_LABELLING;
   1534 
   1535 		raidunlock(rs);
   1536 
   1537 		if (error)
   1538 			return (error);
   1539 		break;
   1540 	}
   1541 
   1542 	case DIOCWLABEL:
   1543 		if (*(int *) data != 0)
   1544 			rs->sc_flags |= RAIDF_WLABEL;
   1545 		else
   1546 			rs->sc_flags &= ~RAIDF_WLABEL;
   1547 		break;
   1548 
   1549 	case DIOCGDEFLABEL:
   1550 		raidgetdefaultlabel(raidPtr, rs, (struct disklabel *) data);
   1551 		break;
   1552 
   1553 #ifdef __HAVE_OLD_DISKLABEL
   1554 	case ODIOCGDEFLABEL:
   1555 		raidgetdefaultlabel(raidPtr, rs, &newlabel);
   1556 		if (newlabel.d_npartitions > OLDMAXPARTITIONS)
   1557 			return ENOTTY;
   1558 		memcpy(data, &newlabel, sizeof (struct olddisklabel));
   1559 		break;
   1560 #endif
   1561 
   1562 	default:
   1563 		retcode = ENOTTY;
   1564 	}
   1565 	return (retcode);
   1566 
   1567 }
   1568 
   1569 
   1570 /* raidinit -- complete the rest of the initialization for the
   1571    RAIDframe device.  */
   1572 
   1573 
   1574 static void
   1575 raidinit(raidPtr)
   1576 	RF_Raid_t *raidPtr;
   1577 {
   1578 	struct raid_softc *rs;
   1579 	int     unit;
   1580 
   1581 	unit = raidPtr->raidid;
   1582 
   1583 	rs = &raid_softc[unit];
   1584 
   1585 	/* XXX should check return code first... */
   1586 	rs->sc_flags |= RAIDF_INITED;
   1587 
   1588 	sprintf(rs->sc_xname, "raid%d", unit);	/* XXX doesn't check bounds. */
   1589 
   1590 	rs->sc_dkdev.dk_name = rs->sc_xname;
   1591 
   1592 	/* disk_attach actually creates space for the CPU disklabel, among
   1593 	 * other things, so it's critical to call this *BEFORE* we try putzing
   1594 	 * with disklabels. */
   1595 
   1596 	disk_attach(&rs->sc_dkdev);
   1597 
   1598 	/* XXX There may be a weird interaction here between this, and
   1599 	 * protectedSectors, as used in RAIDframe.  */
   1600 
   1601 	rs->sc_size = raidPtr->totalSectors;
   1602 
   1603 }
   1604 
   1605 /* wake up the daemon & tell it to get us a spare table
   1606  * XXX
   1607  * the entries in the queues should be tagged with the raidPtr
   1608  * so that in the extremely rare case that two recons happen at once,
   1609  * we know for which device were requesting a spare table
   1610  * XXX
   1611  *
   1612  * XXX This code is not currently used. GO
   1613  */
   1614 int
   1615 rf_GetSpareTableFromDaemon(req)
   1616 	RF_SparetWait_t *req;
   1617 {
   1618 	int     retcode;
   1619 
   1620 	RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1621 	req->next = rf_sparet_wait_queue;
   1622 	rf_sparet_wait_queue = req;
   1623 	wakeup(&rf_sparet_wait_queue);
   1624 
   1625 	/* mpsleep unlocks the mutex */
   1626 	while (!rf_sparet_resp_queue) {
   1627 		tsleep(&rf_sparet_resp_queue, PRIBIO,
   1628 		    "raidframe getsparetable", 0);
   1629 	}
   1630 	req = rf_sparet_resp_queue;
   1631 	rf_sparet_resp_queue = req->next;
   1632 	RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1633 
   1634 	retcode = req->fcol;
   1635 	RF_Free(req, sizeof(*req));	/* this is not the same req as we
   1636 					 * alloc'd */
   1637 	return (retcode);
   1638 }
   1639 
   1640 /* a wrapper around rf_DoAccess that extracts appropriate info from the
   1641  * bp & passes it down.
   1642  * any calls originating in the kernel must use non-blocking I/O
   1643  * do some extra sanity checking to return "appropriate" error values for
   1644  * certain conditions (to make some standard utilities work)
   1645  *
   1646  * Formerly known as: rf_DoAccessKernel
   1647  */
   1648 void
   1649 raidstart(raidPtr)
   1650 	RF_Raid_t *raidPtr;
   1651 {
   1652 	RF_SectorCount_t num_blocks, pb, sum;
   1653 	RF_RaidAddr_t raid_addr;
   1654 	int     retcode;
   1655 	struct partition *pp;
   1656 	daddr_t blocknum;
   1657 	int     unit;
   1658 	struct raid_softc *rs;
   1659 	int     do_async;
   1660 	struct buf *bp;
   1661 
   1662 	unit = raidPtr->raidid;
   1663 	rs = &raid_softc[unit];
   1664 
   1665 	/* quick check to see if anything has died recently */
   1666 	RF_LOCK_MUTEX(raidPtr->mutex);
   1667 	if (raidPtr->numNewFailures > 0) {
   1668 		rf_update_component_labels(raidPtr,
   1669 					   RF_NORMAL_COMPONENT_UPDATE);
   1670 		raidPtr->numNewFailures--;
   1671 	}
   1672 
   1673 	/* Check to see if we're at the limit... */
   1674 	while (raidPtr->openings > 0) {
   1675 		RF_UNLOCK_MUTEX(raidPtr->mutex);
   1676 
   1677 		/* get the next item, if any, from the queue */
   1678 		if ((bp = BUFQ_FIRST(&rs->buf_queue)) == NULL) {
   1679 			/* nothing more to do */
   1680 			return;
   1681 		}
   1682 		BUFQ_REMOVE(&rs->buf_queue, bp);
   1683 
   1684 		/* Ok, for the bp we have here, bp->b_blkno is relative to the
   1685 		 * partition.. Need to make it absolute to the underlying
   1686 		 * device.. */
   1687 
   1688 		blocknum = bp->b_blkno;
   1689 		if (DISKPART(bp->b_dev) != RAW_PART) {
   1690 			pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
   1691 			blocknum += pp->p_offset;
   1692 		}
   1693 
   1694 		db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
   1695 			    (int) blocknum));
   1696 
   1697 		db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
   1698 		db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
   1699 
   1700 		/* *THIS* is where we adjust what block we're going to...
   1701 		 * but DO NOT TOUCH bp->b_blkno!!! */
   1702 		raid_addr = blocknum;
   1703 
   1704 		num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
   1705 		pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
   1706 		sum = raid_addr + num_blocks + pb;
   1707 		if (1 || rf_debugKernelAccess) {
   1708 			db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
   1709 				    (int) raid_addr, (int) sum, (int) num_blocks,
   1710 				    (int) pb, (int) bp->b_resid));
   1711 		}
   1712 		if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
   1713 		    || (sum < num_blocks) || (sum < pb)) {
   1714 			bp->b_error = ENOSPC;
   1715 			bp->b_flags |= B_ERROR;
   1716 			bp->b_resid = bp->b_bcount;
   1717 			biodone(bp);
   1718 			RF_LOCK_MUTEX(raidPtr->mutex);
   1719 			continue;
   1720 		}
   1721 		/*
   1722 		 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
   1723 		 */
   1724 
   1725 		if (bp->b_bcount & raidPtr->sectorMask) {
   1726 			bp->b_error = EINVAL;
   1727 			bp->b_flags |= B_ERROR;
   1728 			bp->b_resid = bp->b_bcount;
   1729 			biodone(bp);
   1730 			RF_LOCK_MUTEX(raidPtr->mutex);
   1731 			continue;
   1732 
   1733 		}
   1734 		db1_printf(("Calling DoAccess..\n"));
   1735 
   1736 
   1737 		RF_LOCK_MUTEX(raidPtr->mutex);
   1738 		raidPtr->openings--;
   1739 		RF_UNLOCK_MUTEX(raidPtr->mutex);
   1740 
   1741 		/*
   1742 		 * Everything is async.
   1743 		 */
   1744 		do_async = 1;
   1745 
   1746 		disk_busy(&rs->sc_dkdev);
   1747 
   1748 		/* XXX we're still at splbio() here... do we *really*
   1749 		   need to be? */
   1750 
   1751 		/* don't ever condition on bp->b_flags & B_WRITE.
   1752 		 * always condition on B_READ instead */
   1753 
   1754 		retcode = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
   1755 				      RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
   1756 				      do_async, raid_addr, num_blocks,
   1757 				      bp->b_data, bp, RF_DAG_NONBLOCKING_IO);
   1758 
   1759 		RF_LOCK_MUTEX(raidPtr->mutex);
   1760 	}
   1761 	RF_UNLOCK_MUTEX(raidPtr->mutex);
   1762 }
   1763 
   1764 
   1765 
   1766 
   1767 /* invoke an I/O from kernel mode.  Disk queue should be locked upon entry */
   1768 
   1769 int
   1770 rf_DispatchKernelIO(queue, req)
   1771 	RF_DiskQueue_t *queue;
   1772 	RF_DiskQueueData_t *req;
   1773 {
   1774 	int     op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
   1775 	struct buf *bp;
   1776 	struct raidbuf *raidbp = NULL;
   1777 	struct raid_softc *rs;
   1778 	int     unit;
   1779 	int s;
   1780 
   1781 	s=0;
   1782 	/* s = splbio();*/ /* want to test this */
   1783 	/* XXX along with the vnode, we also need the softc associated with
   1784 	 * this device.. */
   1785 
   1786 	req->queue = queue;
   1787 
   1788 	unit = queue->raidPtr->raidid;
   1789 
   1790 	db1_printf(("DispatchKernelIO unit: %d\n", unit));
   1791 
   1792 	if (unit >= numraid) {
   1793 		printf("Invalid unit number: %d %d\n", unit, numraid);
   1794 		panic("Invalid Unit number in rf_DispatchKernelIO\n");
   1795 	}
   1796 	rs = &raid_softc[unit];
   1797 
   1798 	bp = req->bp;
   1799 #if 1
   1800 	/* XXX when there is a physical disk failure, someone is passing us a
   1801 	 * buffer that contains old stuff!!  Attempt to deal with this problem
   1802 	 * without taking a performance hit... (not sure where the real bug
   1803 	 * is.  It's buried in RAIDframe somewhere) :-(  GO ) */
   1804 
   1805 	if (bp->b_flags & B_ERROR) {
   1806 		bp->b_flags &= ~B_ERROR;
   1807 	}
   1808 	if (bp->b_error != 0) {
   1809 		bp->b_error = 0;
   1810 	}
   1811 #endif
   1812 	raidbp = RAIDGETBUF(rs);
   1813 
   1814 	raidbp->rf_flags = 0;	/* XXX not really used anywhere... */
   1815 
   1816 	/*
   1817 	 * context for raidiodone
   1818 	 */
   1819 	raidbp->rf_obp = bp;
   1820 	raidbp->req = req;
   1821 
   1822 	LIST_INIT(&raidbp->rf_buf.b_dep);
   1823 
   1824 	switch (req->type) {
   1825 	case RF_IO_TYPE_NOP:	/* used primarily to unlock a locked queue */
   1826 		/* XXX need to do something extra here.. */
   1827 		/* I'm leaving this in, as I've never actually seen it used,
   1828 		 * and I'd like folks to report it... GO */
   1829 		printf(("WAKEUP CALLED\n"));
   1830 		queue->numOutstanding++;
   1831 
   1832 		/* XXX need to glue the original buffer into this??  */
   1833 
   1834 		KernelWakeupFunc(&raidbp->rf_buf);
   1835 		break;
   1836 
   1837 	case RF_IO_TYPE_READ:
   1838 	case RF_IO_TYPE_WRITE:
   1839 
   1840 		if (req->tracerec) {
   1841 			RF_ETIMER_START(req->tracerec->timer);
   1842 		}
   1843 		InitBP(&raidbp->rf_buf, queue->rf_cinfo->ci_vp,
   1844 		    op | bp->b_flags, queue->rf_cinfo->ci_dev,
   1845 		    req->sectorOffset, req->numSector,
   1846 		    req->buf, KernelWakeupFunc, (void *) req,
   1847 		    queue->raidPtr->logBytesPerSector, req->b_proc);
   1848 
   1849 		if (rf_debugKernelAccess) {
   1850 			db1_printf(("dispatch: bp->b_blkno = %ld\n",
   1851 				(long) bp->b_blkno));
   1852 		}
   1853 		queue->numOutstanding++;
   1854 		queue->last_deq_sector = req->sectorOffset;
   1855 		/* acc wouldn't have been let in if there were any pending
   1856 		 * reqs at any other priority */
   1857 		queue->curPriority = req->priority;
   1858 
   1859 		db1_printf(("Going for %c to unit %d row %d col %d\n",
   1860 			req->type, unit, queue->row, queue->col));
   1861 		db1_printf(("sector %d count %d (%d bytes) %d\n",
   1862 			(int) req->sectorOffset, (int) req->numSector,
   1863 			(int) (req->numSector <<
   1864 			    queue->raidPtr->logBytesPerSector),
   1865 			(int) queue->raidPtr->logBytesPerSector));
   1866 		if ((raidbp->rf_buf.b_flags & B_READ) == 0) {
   1867 			raidbp->rf_buf.b_vp->v_numoutput++;
   1868 		}
   1869 		VOP_STRATEGY(&raidbp->rf_buf);
   1870 
   1871 		break;
   1872 
   1873 	default:
   1874 		panic("bad req->type in rf_DispatchKernelIO");
   1875 	}
   1876 	db1_printf(("Exiting from DispatchKernelIO\n"));
   1877 	/* splx(s); */ /* want to test this */
   1878 	return (0);
   1879 }
   1880 /* this is the callback function associated with a I/O invoked from
   1881    kernel code.
   1882  */
   1883 static void
   1884 KernelWakeupFunc(vbp)
   1885 	struct buf *vbp;
   1886 {
   1887 	RF_DiskQueueData_t *req = NULL;
   1888 	RF_DiskQueue_t *queue;
   1889 	struct raidbuf *raidbp = (struct raidbuf *) vbp;
   1890 	struct buf *bp;
   1891 	struct raid_softc *rs;
   1892 	int     unit;
   1893 	int s;
   1894 
   1895 	s = splbio();
   1896 	db1_printf(("recovering the request queue:\n"));
   1897 	req = raidbp->req;
   1898 
   1899 	bp = raidbp->rf_obp;
   1900 
   1901 	queue = (RF_DiskQueue_t *) req->queue;
   1902 
   1903 	if (raidbp->rf_buf.b_flags & B_ERROR) {
   1904 		bp->b_flags |= B_ERROR;
   1905 		bp->b_error = raidbp->rf_buf.b_error ?
   1906 		    raidbp->rf_buf.b_error : EIO;
   1907 	}
   1908 
   1909 	/* XXX methinks this could be wrong... */
   1910 #if 1
   1911 	bp->b_resid = raidbp->rf_buf.b_resid;
   1912 #endif
   1913 
   1914 	if (req->tracerec) {
   1915 		RF_ETIMER_STOP(req->tracerec->timer);
   1916 		RF_ETIMER_EVAL(req->tracerec->timer);
   1917 		RF_LOCK_MUTEX(rf_tracing_mutex);
   1918 		req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
   1919 		req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
   1920 		req->tracerec->num_phys_ios++;
   1921 		RF_UNLOCK_MUTEX(rf_tracing_mutex);
   1922 	}
   1923 	bp->b_bcount = raidbp->rf_buf.b_bcount;	/* XXXX ?? */
   1924 
   1925 	unit = queue->raidPtr->raidid;	/* *Much* simpler :-> */
   1926 
   1927 
   1928 	/* XXX Ok, let's get aggressive... If B_ERROR is set, let's go
   1929 	 * ballistic, and mark the component as hosed... */
   1930 
   1931 	if (bp->b_flags & B_ERROR) {
   1932 		/* Mark the disk as dead */
   1933 		/* but only mark it once... */
   1934 		if (queue->raidPtr->Disks[queue->row][queue->col].status ==
   1935 		    rf_ds_optimal) {
   1936 			printf("raid%d: IO Error.  Marking %s as failed.\n",
   1937 			    unit, queue->raidPtr->Disks[queue->row][queue->col].devname);
   1938 			queue->raidPtr->Disks[queue->row][queue->col].status =
   1939 			    rf_ds_failed;
   1940 			queue->raidPtr->status[queue->row] = rf_rs_degraded;
   1941 			queue->raidPtr->numFailures++;
   1942 			queue->raidPtr->numNewFailures++;
   1943 		} else {	/* Disk is already dead... */
   1944 			/* printf("Disk already marked as dead!\n"); */
   1945 		}
   1946 
   1947 	}
   1948 
   1949 	rs = &raid_softc[unit];
   1950 	RAIDPUTBUF(rs, raidbp);
   1951 
   1952 	rf_DiskIOComplete(queue, req, (bp->b_flags & B_ERROR) ? 1 : 0);
   1953 	(req->CompleteFunc) (req->argument, (bp->b_flags & B_ERROR) ? 1 : 0);
   1954 
   1955 	splx(s);
   1956 }
   1957 
   1958 
   1959 
   1960 /*
   1961  * initialize a buf structure for doing an I/O in the kernel.
   1962  */
   1963 static void
   1964 InitBP(bp, b_vp, rw_flag, dev, startSect, numSect, buf, cbFunc, cbArg,
   1965        logBytesPerSector, b_proc)
   1966 	struct buf *bp;
   1967 	struct vnode *b_vp;
   1968 	unsigned rw_flag;
   1969 	dev_t dev;
   1970 	RF_SectorNum_t startSect;
   1971 	RF_SectorCount_t numSect;
   1972 	caddr_t buf;
   1973 	void (*cbFunc) (struct buf *);
   1974 	void *cbArg;
   1975 	int logBytesPerSector;
   1976 	struct proc *b_proc;
   1977 {
   1978 	/* bp->b_flags       = B_PHYS | rw_flag; */
   1979 	bp->b_flags = B_CALL | rw_flag;	/* XXX need B_PHYS here too??? */
   1980 	bp->b_bcount = numSect << logBytesPerSector;
   1981 	bp->b_bufsize = bp->b_bcount;
   1982 	bp->b_error = 0;
   1983 	bp->b_dev = dev;
   1984 	bp->b_data = buf;
   1985 	bp->b_blkno = startSect;
   1986 	bp->b_resid = bp->b_bcount;	/* XXX is this right!??!?!! */
   1987 	if (bp->b_bcount == 0) {
   1988 		panic("bp->b_bcount is zero in InitBP!!\n");
   1989 	}
   1990 	bp->b_proc = b_proc;
   1991 	bp->b_iodone = cbFunc;
   1992 	bp->b_vp = b_vp;
   1993 
   1994 }
   1995 
   1996 static void
   1997 raidgetdefaultlabel(raidPtr, rs, lp)
   1998 	RF_Raid_t *raidPtr;
   1999 	struct raid_softc *rs;
   2000 	struct disklabel *lp;
   2001 {
   2002 	db1_printf(("Building a default label...\n"));
   2003 	memset(lp, 0, sizeof(*lp));
   2004 
   2005 	/* fabricate a label... */
   2006 	lp->d_secperunit = raidPtr->totalSectors;
   2007 	lp->d_secsize = raidPtr->bytesPerSector;
   2008 	lp->d_nsectors = raidPtr->Layout.dataSectorsPerStripe;
   2009 	lp->d_ntracks = 4 * raidPtr->numCol;
   2010 	lp->d_ncylinders = raidPtr->totalSectors /
   2011 		(lp->d_nsectors * lp->d_ntracks);
   2012 	lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
   2013 
   2014 	strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
   2015 	lp->d_type = DTYPE_RAID;
   2016 	strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
   2017 	lp->d_rpm = 3600;
   2018 	lp->d_interleave = 1;
   2019 	lp->d_flags = 0;
   2020 
   2021 	lp->d_partitions[RAW_PART].p_offset = 0;
   2022 	lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors;
   2023 	lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
   2024 	lp->d_npartitions = RAW_PART + 1;
   2025 
   2026 	lp->d_magic = DISKMAGIC;
   2027 	lp->d_magic2 = DISKMAGIC;
   2028 	lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
   2029 
   2030 }
   2031 /*
   2032  * Read the disklabel from the raid device.  If one is not present, fake one
   2033  * up.
   2034  */
   2035 static void
   2036 raidgetdisklabel(dev)
   2037 	dev_t   dev;
   2038 {
   2039 	int     unit = raidunit(dev);
   2040 	struct raid_softc *rs = &raid_softc[unit];
   2041 	char   *errstring;
   2042 	struct disklabel *lp = rs->sc_dkdev.dk_label;
   2043 	struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel;
   2044 	RF_Raid_t *raidPtr;
   2045 
   2046 	db1_printf(("Getting the disklabel...\n"));
   2047 
   2048 	memset(clp, 0, sizeof(*clp));
   2049 
   2050 	raidPtr = raidPtrs[unit];
   2051 
   2052 	raidgetdefaultlabel(raidPtr, rs, lp);
   2053 
   2054 	/*
   2055 	 * Call the generic disklabel extraction routine.
   2056 	 */
   2057 	errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
   2058 	    rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
   2059 	if (errstring)
   2060 		raidmakedisklabel(rs);
   2061 	else {
   2062 		int     i;
   2063 		struct partition *pp;
   2064 
   2065 		/*
   2066 		 * Sanity check whether the found disklabel is valid.
   2067 		 *
   2068 		 * This is necessary since total size of the raid device
   2069 		 * may vary when an interleave is changed even though exactly
   2070 		 * same componets are used, and old disklabel may used
   2071 		 * if that is found.
   2072 		 */
   2073 		if (lp->d_secperunit != rs->sc_size)
   2074 			printf("raid%d: WARNING: %s: "
   2075 			    "total sector size in disklabel (%d) != "
   2076 			    "the size of raid (%ld)\n", unit, rs->sc_xname,
   2077 			    lp->d_secperunit, (long) rs->sc_size);
   2078 		for (i = 0; i < lp->d_npartitions; i++) {
   2079 			pp = &lp->d_partitions[i];
   2080 			if (pp->p_offset + pp->p_size > rs->sc_size)
   2081 				printf("raid%d: WARNING: %s: end of partition `%c' "
   2082 				       "exceeds the size of raid (%ld)\n",
   2083 				       unit, rs->sc_xname, 'a' + i, (long) rs->sc_size);
   2084 		}
   2085 	}
   2086 
   2087 }
   2088 /*
   2089  * Take care of things one might want to take care of in the event
   2090  * that a disklabel isn't present.
   2091  */
   2092 static void
   2093 raidmakedisklabel(rs)
   2094 	struct raid_softc *rs;
   2095 {
   2096 	struct disklabel *lp = rs->sc_dkdev.dk_label;
   2097 	db1_printf(("Making a label..\n"));
   2098 
   2099 	/*
   2100 	 * For historical reasons, if there's no disklabel present
   2101 	 * the raw partition must be marked FS_BSDFFS.
   2102 	 */
   2103 
   2104 	lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
   2105 
   2106 	strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
   2107 
   2108 	lp->d_checksum = dkcksum(lp);
   2109 }
   2110 /*
   2111  * Lookup the provided name in the filesystem.  If the file exists,
   2112  * is a valid block device, and isn't being used by anyone else,
   2113  * set *vpp to the file's vnode.
   2114  * You'll find the original of this in ccd.c
   2115  */
   2116 int
   2117 raidlookup(path, p, vpp)
   2118 	char   *path;
   2119 	struct proc *p;
   2120 	struct vnode **vpp;	/* result */
   2121 {
   2122 	struct nameidata nd;
   2123 	struct vnode *vp;
   2124 	struct vattr va;
   2125 	int     error;
   2126 
   2127 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, path, p);
   2128 	if ((error = vn_open(&nd, FREAD | FWRITE, 0)) != 0) {
   2129 #if 0
   2130 		printf("RAIDframe: vn_open returned %d\n", error);
   2131 #endif
   2132 		return (error);
   2133 	}
   2134 	vp = nd.ni_vp;
   2135 	if (vp->v_usecount > 1) {
   2136 		VOP_UNLOCK(vp, 0);
   2137 		(void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
   2138 		return (EBUSY);
   2139 	}
   2140 	if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)) != 0) {
   2141 		VOP_UNLOCK(vp, 0);
   2142 		(void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
   2143 		return (error);
   2144 	}
   2145 	/* XXX: eventually we should handle VREG, too. */
   2146 	if (va.va_type != VBLK) {
   2147 		VOP_UNLOCK(vp, 0);
   2148 		(void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
   2149 		return (ENOTBLK);
   2150 	}
   2151 	VOP_UNLOCK(vp, 0);
   2152 	*vpp = vp;
   2153 	return (0);
   2154 }
   2155 /*
   2156  * Wait interruptibly for an exclusive lock.
   2157  *
   2158  * XXX
   2159  * Several drivers do this; it should be abstracted and made MP-safe.
   2160  * (Hmm... where have we seen this warning before :->  GO )
   2161  */
   2162 static int
   2163 raidlock(rs)
   2164 	struct raid_softc *rs;
   2165 {
   2166 	int     error;
   2167 
   2168 	while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
   2169 		rs->sc_flags |= RAIDF_WANTED;
   2170 		if ((error =
   2171 			tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
   2172 			return (error);
   2173 	}
   2174 	rs->sc_flags |= RAIDF_LOCKED;
   2175 	return (0);
   2176 }
   2177 /*
   2178  * Unlock and wake up any waiters.
   2179  */
   2180 static void
   2181 raidunlock(rs)
   2182 	struct raid_softc *rs;
   2183 {
   2184 
   2185 	rs->sc_flags &= ~RAIDF_LOCKED;
   2186 	if ((rs->sc_flags & RAIDF_WANTED) != 0) {
   2187 		rs->sc_flags &= ~RAIDF_WANTED;
   2188 		wakeup(rs);
   2189 	}
   2190 }
   2191 
   2192 
   2193 #define RF_COMPONENT_INFO_OFFSET  16384 /* bytes */
   2194 #define RF_COMPONENT_INFO_SIZE     1024 /* bytes */
   2195 
   2196 int
   2197 raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter)
   2198 {
   2199 	RF_ComponentLabel_t clabel;
   2200 	raidread_component_label(dev, b_vp, &clabel);
   2201 	clabel.mod_counter = mod_counter;
   2202 	clabel.clean = RF_RAID_CLEAN;
   2203 	raidwrite_component_label(dev, b_vp, &clabel);
   2204 	return(0);
   2205 }
   2206 
   2207 
   2208 int
   2209 raidmarkdirty(dev_t dev, struct vnode *b_vp, int mod_counter)
   2210 {
   2211 	RF_ComponentLabel_t clabel;
   2212 	raidread_component_label(dev, b_vp, &clabel);
   2213 	clabel.mod_counter = mod_counter;
   2214 	clabel.clean = RF_RAID_DIRTY;
   2215 	raidwrite_component_label(dev, b_vp, &clabel);
   2216 	return(0);
   2217 }
   2218 
   2219 /* ARGSUSED */
   2220 int
   2221 raidread_component_label(dev, b_vp, clabel)
   2222 	dev_t dev;
   2223 	struct vnode *b_vp;
   2224 	RF_ComponentLabel_t *clabel;
   2225 {
   2226 	struct buf *bp;
   2227 	const struct bdevsw *bdev;
   2228 	int error;
   2229 
   2230 	/* XXX should probably ensure that we don't try to do this if
   2231 	   someone has changed rf_protected_sectors. */
   2232 
   2233 	if (b_vp == NULL) {
   2234 		/* For whatever reason, this component is not valid.
   2235 		   Don't try to read a component label from it. */
   2236 		return(EINVAL);
   2237 	}
   2238 
   2239 	/* get a block of the appropriate size... */
   2240 	bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
   2241 	bp->b_dev = dev;
   2242 
   2243 	/* get our ducks in a row for the read */
   2244 	bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
   2245 	bp->b_bcount = RF_COMPONENT_INFO_SIZE;
   2246 	bp->b_flags |= B_READ;
   2247  	bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
   2248 
   2249 	bdev = bdevsw_lookup(bp->b_dev);
   2250 	if (bdev == NULL)
   2251 		return (ENXIO);
   2252 	(*bdev->d_strategy)(bp);
   2253 
   2254 	error = biowait(bp);
   2255 
   2256 	if (!error) {
   2257 		memcpy(clabel, bp->b_data,
   2258 		       sizeof(RF_ComponentLabel_t));
   2259 #if 0
   2260 		rf_print_component_label( clabel );
   2261 #endif
   2262         } else {
   2263 #if 0
   2264 		printf("Failed to read RAID component label!\n");
   2265 #endif
   2266 	}
   2267 
   2268 	brelse(bp);
   2269 	return(error);
   2270 }
   2271 /* ARGSUSED */
   2272 int
   2273 raidwrite_component_label(dev, b_vp, clabel)
   2274 	dev_t dev;
   2275 	struct vnode *b_vp;
   2276 	RF_ComponentLabel_t *clabel;
   2277 {
   2278 	struct buf *bp;
   2279 	const struct bdevsw *bdev;
   2280 	int error;
   2281 
   2282 	/* get a block of the appropriate size... */
   2283 	bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
   2284 	bp->b_dev = dev;
   2285 
   2286 	/* get our ducks in a row for the write */
   2287 	bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
   2288 	bp->b_bcount = RF_COMPONENT_INFO_SIZE;
   2289 	bp->b_flags |= B_WRITE;
   2290  	bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
   2291 
   2292 	memset(bp->b_data, 0, RF_COMPONENT_INFO_SIZE );
   2293 
   2294 	memcpy(bp->b_data, clabel, sizeof(RF_ComponentLabel_t));
   2295 
   2296 	bdev = bdevsw_lookup(bp->b_dev);
   2297 	if (bdev == NULL)
   2298 		return (ENXIO);
   2299 	(*bdev->d_strategy)(bp);
   2300 	error = biowait(bp);
   2301 	brelse(bp);
   2302 	if (error) {
   2303 #if 1
   2304 		printf("Failed to write RAID component info!\n");
   2305 #endif
   2306 	}
   2307 
   2308 	return(error);
   2309 }
   2310 
   2311 void
   2312 rf_markalldirty(raidPtr)
   2313 	RF_Raid_t *raidPtr;
   2314 {
   2315 	RF_ComponentLabel_t clabel;
   2316 	int r,c;
   2317 
   2318 	raidPtr->mod_counter++;
   2319 	for (r = 0; r < raidPtr->numRow; r++) {
   2320 		for (c = 0; c < raidPtr->numCol; c++) {
   2321 			/* we don't want to touch (at all) a disk that has
   2322 			   failed */
   2323 			if (!RF_DEAD_DISK(raidPtr->Disks[r][c].status)) {
   2324 				raidread_component_label(
   2325 					raidPtr->Disks[r][c].dev,
   2326 					raidPtr->raid_cinfo[r][c].ci_vp,
   2327 					&clabel);
   2328 				if (clabel.status == rf_ds_spared) {
   2329 					/* XXX do something special...
   2330 					 but whatever you do, don't
   2331 					 try to access it!! */
   2332 				} else {
   2333 #if 0
   2334 				clabel.status =
   2335 					raidPtr->Disks[r][c].status;
   2336 				raidwrite_component_label(
   2337 					raidPtr->Disks[r][c].dev,
   2338 					raidPtr->raid_cinfo[r][c].ci_vp,
   2339 					&clabel);
   2340 #endif
   2341 				raidmarkdirty(
   2342 				       raidPtr->Disks[r][c].dev,
   2343 				       raidPtr->raid_cinfo[r][c].ci_vp,
   2344 				       raidPtr->mod_counter);
   2345 				}
   2346 			}
   2347 		}
   2348 	}
   2349 	/* printf("Component labels marked dirty.\n"); */
   2350 #if 0
   2351 	for( c = 0; c < raidPtr->numSpare ; c++) {
   2352 		sparecol = raidPtr->numCol + c;
   2353 		if (raidPtr->Disks[r][sparecol].status == rf_ds_used_spare) {
   2354 			/*
   2355 
   2356 			   XXX this is where we get fancy and map this spare
   2357 			   into it's correct spot in the array.
   2358 
   2359 			 */
   2360 			/*
   2361 
   2362 			   we claim this disk is "optimal" if it's
   2363 			   rf_ds_used_spare, as that means it should be
   2364 			   directly substitutable for the disk it replaced.
   2365 			   We note that too...
   2366 
   2367 			 */
   2368 
   2369 			for(i=0;i<raidPtr->numRow;i++) {
   2370 				for(j=0;j<raidPtr->numCol;j++) {
   2371 					if ((raidPtr->Disks[i][j].spareRow ==
   2372 					     r) &&
   2373 					    (raidPtr->Disks[i][j].spareCol ==
   2374 					     sparecol)) {
   2375 						srow = r;
   2376 						scol = sparecol;
   2377 						break;
   2378 					}
   2379 				}
   2380 			}
   2381 
   2382 			raidread_component_label(
   2383 				      raidPtr->Disks[r][sparecol].dev,
   2384 				      raidPtr->raid_cinfo[r][sparecol].ci_vp,
   2385 				      &clabel);
   2386 			/* make sure status is noted */
   2387 			clabel.version = RF_COMPONENT_LABEL_VERSION;
   2388 			clabel.mod_counter = raidPtr->mod_counter;
   2389 			clabel.serial_number = raidPtr->serial_number;
   2390 			clabel.row = srow;
   2391 			clabel.column = scol;
   2392 			clabel.num_rows = raidPtr->numRow;
   2393 			clabel.num_columns = raidPtr->numCol;
   2394 			clabel.clean = RF_RAID_DIRTY; /* changed in a bit*/
   2395 			clabel.status = rf_ds_optimal;
   2396 			raidwrite_component_label(
   2397 				      raidPtr->Disks[r][sparecol].dev,
   2398 				      raidPtr->raid_cinfo[r][sparecol].ci_vp,
   2399 				      &clabel);
   2400 			raidmarkclean( raidPtr->Disks[r][sparecol].dev,
   2401 			              raidPtr->raid_cinfo[r][sparecol].ci_vp);
   2402 		}
   2403 	}
   2404 
   2405 #endif
   2406 }
   2407 
   2408 
   2409 void
   2410 rf_update_component_labels(raidPtr, final)
   2411 	RF_Raid_t *raidPtr;
   2412 	int final;
   2413 {
   2414 	RF_ComponentLabel_t clabel;
   2415 	int sparecol;
   2416 	int r,c;
   2417 	int i,j;
   2418 	int srow, scol;
   2419 
   2420 	srow = -1;
   2421 	scol = -1;
   2422 
   2423 	/* XXX should do extra checks to make sure things really are clean,
   2424 	   rather than blindly setting the clean bit... */
   2425 
   2426 	raidPtr->mod_counter++;
   2427 
   2428 	for (r = 0; r < raidPtr->numRow; r++) {
   2429 		for (c = 0; c < raidPtr->numCol; c++) {
   2430 			if (raidPtr->Disks[r][c].status == rf_ds_optimal) {
   2431 				raidread_component_label(
   2432 					raidPtr->Disks[r][c].dev,
   2433 					raidPtr->raid_cinfo[r][c].ci_vp,
   2434 					&clabel);
   2435 				/* make sure status is noted */
   2436 				clabel.status = rf_ds_optimal;
   2437 				/* bump the counter */
   2438 				clabel.mod_counter = raidPtr->mod_counter;
   2439 
   2440 				raidwrite_component_label(
   2441 					raidPtr->Disks[r][c].dev,
   2442 					raidPtr->raid_cinfo[r][c].ci_vp,
   2443 					&clabel);
   2444 				if (final == RF_FINAL_COMPONENT_UPDATE) {
   2445 					if (raidPtr->parity_good == RF_RAID_CLEAN) {
   2446 						raidmarkclean(
   2447 							      raidPtr->Disks[r][c].dev,
   2448 							      raidPtr->raid_cinfo[r][c].ci_vp,
   2449 							      raidPtr->mod_counter);
   2450 					}
   2451 				}
   2452 			}
   2453 			/* else we don't touch it.. */
   2454 		}
   2455 	}
   2456 
   2457 	for( c = 0; c < raidPtr->numSpare ; c++) {
   2458 		sparecol = raidPtr->numCol + c;
   2459 		/* Need to ensure that the reconstruct actually completed! */
   2460 		if (raidPtr->Disks[0][sparecol].status == rf_ds_used_spare) {
   2461 			/*
   2462 
   2463 			   we claim this disk is "optimal" if it's
   2464 			   rf_ds_used_spare, as that means it should be
   2465 			   directly substitutable for the disk it replaced.
   2466 			   We note that too...
   2467 
   2468 			 */
   2469 
   2470 			for(i=0;i<raidPtr->numRow;i++) {
   2471 				for(j=0;j<raidPtr->numCol;j++) {
   2472 					if ((raidPtr->Disks[i][j].spareRow ==
   2473 					     0) &&
   2474 					    (raidPtr->Disks[i][j].spareCol ==
   2475 					     sparecol)) {
   2476 						srow = i;
   2477 						scol = j;
   2478 						break;
   2479 					}
   2480 				}
   2481 			}
   2482 
   2483 			/* XXX shouldn't *really* need this... */
   2484 			raidread_component_label(
   2485 				      raidPtr->Disks[0][sparecol].dev,
   2486 				      raidPtr->raid_cinfo[0][sparecol].ci_vp,
   2487 				      &clabel);
   2488 			/* make sure status is noted */
   2489 
   2490 			raid_init_component_label(raidPtr, &clabel);
   2491 
   2492 			clabel.mod_counter = raidPtr->mod_counter;
   2493 			clabel.row = srow;
   2494 			clabel.column = scol;
   2495 			clabel.status = rf_ds_optimal;
   2496 
   2497 			raidwrite_component_label(
   2498 				      raidPtr->Disks[0][sparecol].dev,
   2499 				      raidPtr->raid_cinfo[0][sparecol].ci_vp,
   2500 				      &clabel);
   2501 			if (final == RF_FINAL_COMPONENT_UPDATE) {
   2502 				if (raidPtr->parity_good == RF_RAID_CLEAN) {
   2503 					raidmarkclean( raidPtr->Disks[0][sparecol].dev,
   2504 						       raidPtr->raid_cinfo[0][sparecol].ci_vp,
   2505 						       raidPtr->mod_counter);
   2506 				}
   2507 			}
   2508 		}
   2509 	}
   2510 	/* 	printf("Component labels updated\n"); */
   2511 }
   2512 
   2513 void
   2514 rf_close_component(raidPtr, vp, auto_configured)
   2515 	RF_Raid_t *raidPtr;
   2516 	struct vnode *vp;
   2517 	int auto_configured;
   2518 {
   2519 	struct proc *p;
   2520 
   2521 	p = raidPtr->engine_thread;
   2522 
   2523 	if (vp != NULL) {
   2524 		if (auto_configured == 1) {
   2525 			vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
   2526 			VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
   2527 			vput(vp);
   2528 
   2529 		} else {
   2530 			(void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
   2531 		}
   2532 	} else {
   2533 #if 0
   2534 		printf("vnode was NULL\n");
   2535 #endif
   2536 	}
   2537 }
   2538 
   2539 
   2540 void
   2541 rf_UnconfigureVnodes(raidPtr)
   2542 	RF_Raid_t *raidPtr;
   2543 {
   2544 	int r,c;
   2545 	struct proc *p;
   2546 	struct vnode *vp;
   2547 	int acd;
   2548 
   2549 
   2550 	/* We take this opportunity to close the vnodes like we should.. */
   2551 
   2552 	p = raidPtr->engine_thread;
   2553 
   2554 	for (r = 0; r < raidPtr->numRow; r++) {
   2555 		for (c = 0; c < raidPtr->numCol; c++) {
   2556 #if 0
   2557 			printf("raid%d: Closing vnode for row: %d col: %d\n",
   2558 			       raidPtr->raidid, r, c);
   2559 #endif
   2560 			vp = raidPtr->raid_cinfo[r][c].ci_vp;
   2561 			acd = raidPtr->Disks[r][c].auto_configured;
   2562 			rf_close_component(raidPtr, vp, acd);
   2563 			raidPtr->raid_cinfo[r][c].ci_vp = NULL;
   2564 			raidPtr->Disks[r][c].auto_configured = 0;
   2565 		}
   2566 	}
   2567 	for (r = 0; r < raidPtr->numSpare; r++) {
   2568 #if 0
   2569 		printf("raid%d: Closing vnode for spare: %d\n",
   2570 		       raidPtr->raidid, r);
   2571 #endif
   2572 		vp = raidPtr->raid_cinfo[0][raidPtr->numCol + r].ci_vp;
   2573 		acd = raidPtr->Disks[0][raidPtr->numCol + r].auto_configured;
   2574 		rf_close_component(raidPtr, vp, acd);
   2575 		raidPtr->raid_cinfo[0][raidPtr->numCol + r].ci_vp = NULL;
   2576 		raidPtr->Disks[0][raidPtr->numCol + r].auto_configured = 0;
   2577 	}
   2578 }
   2579 
   2580 
   2581 void
   2582 rf_ReconThread(req)
   2583 	struct rf_recon_req *req;
   2584 {
   2585 	int     s;
   2586 	RF_Raid_t *raidPtr;
   2587 
   2588 	s = splbio();
   2589 	raidPtr = (RF_Raid_t *) req->raidPtr;
   2590 	raidPtr->recon_in_progress = 1;
   2591 
   2592 	rf_FailDisk((RF_Raid_t *) req->raidPtr, req->row, req->col,
   2593 		    ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
   2594 
   2595 	/* XXX get rid of this! we don't need it at all.. */
   2596 	RF_Free(req, sizeof(*req));
   2597 
   2598 	raidPtr->recon_in_progress = 0;
   2599 	splx(s);
   2600 
   2601 	/* That's all... */
   2602 	kthread_exit(0);        /* does not return */
   2603 }
   2604 
   2605 void
   2606 rf_RewriteParityThread(raidPtr)
   2607 	RF_Raid_t *raidPtr;
   2608 {
   2609 	int retcode;
   2610 	int s;
   2611 
   2612 	raidPtr->parity_rewrite_in_progress = 1;
   2613 	s = splbio();
   2614 	retcode = rf_RewriteParity(raidPtr);
   2615 	splx(s);
   2616 	if (retcode) {
   2617 		printf("raid%d: Error re-writing parity!\n",raidPtr->raidid);
   2618 	} else {
   2619 		/* set the clean bit!  If we shutdown correctly,
   2620 		   the clean bit on each component label will get
   2621 		   set */
   2622 		raidPtr->parity_good = RF_RAID_CLEAN;
   2623 	}
   2624 	raidPtr->parity_rewrite_in_progress = 0;
   2625 
   2626 	/* Anyone waiting for us to stop?  If so, inform them... */
   2627 	if (raidPtr->waitShutdown) {
   2628 		wakeup(&raidPtr->parity_rewrite_in_progress);
   2629 	}
   2630 
   2631 	/* That's all... */
   2632 	kthread_exit(0);        /* does not return */
   2633 }
   2634 
   2635 
   2636 void
   2637 rf_CopybackThread(raidPtr)
   2638 	RF_Raid_t *raidPtr;
   2639 {
   2640 	int s;
   2641 
   2642 	raidPtr->copyback_in_progress = 1;
   2643 	s = splbio();
   2644 	rf_CopybackReconstructedData(raidPtr);
   2645 	splx(s);
   2646 	raidPtr->copyback_in_progress = 0;
   2647 
   2648 	/* That's all... */
   2649 	kthread_exit(0);        /* does not return */
   2650 }
   2651 
   2652 
   2653 void
   2654 rf_ReconstructInPlaceThread(req)
   2655 	struct rf_recon_req *req;
   2656 {
   2657 	int retcode;
   2658 	int s;
   2659 	RF_Raid_t *raidPtr;
   2660 
   2661 	s = splbio();
   2662 	raidPtr = req->raidPtr;
   2663 	raidPtr->recon_in_progress = 1;
   2664 	retcode = rf_ReconstructInPlace(raidPtr, req->row, req->col);
   2665 	RF_Free(req, sizeof(*req));
   2666 	raidPtr->recon_in_progress = 0;
   2667 	splx(s);
   2668 
   2669 	/* That's all... */
   2670 	kthread_exit(0);        /* does not return */
   2671 }
   2672 
   2673 void
   2674 rf_mountroot_hook(dev)
   2675 	struct device *dev;
   2676 {
   2677 
   2678 }
   2679 
   2680 
   2681 RF_AutoConfig_t *
   2682 rf_find_raid_components()
   2683 {
   2684 	struct vnode *vp;
   2685 	struct disklabel label;
   2686 	struct device *dv;
   2687 	dev_t dev;
   2688 	int bmajor;
   2689 	int error;
   2690 	int i;
   2691 	int good_one;
   2692 	RF_ComponentLabel_t *clabel;
   2693 	RF_AutoConfig_t *ac_list;
   2694 	RF_AutoConfig_t *ac;
   2695 
   2696 
   2697 	/* initialize the AutoConfig list */
   2698 	ac_list = NULL;
   2699 
   2700 	/* we begin by trolling through *all* the devices on the system */
   2701 
   2702 	for (dv = alldevs.tqh_first; dv != NULL;
   2703 	     dv = dv->dv_list.tqe_next) {
   2704 
   2705 		/* we are only interested in disks... */
   2706 		if (dv->dv_class != DV_DISK)
   2707 			continue;
   2708 
   2709 		/* we don't care about floppies... */
   2710 		if (!strcmp(dv->dv_cfdata->cf_driver->cd_name,"fd")) {
   2711 			continue;
   2712 		}
   2713 		/* hdfd is the Atari/Hades floppy driver */
   2714 		if (!strcmp(dv->dv_cfdata->cf_driver->cd_name,"hdfd")) {
   2715 			continue;
   2716 		}
   2717 		/* fdisa is the Atari/Milan floppy driver */
   2718 		if (!strcmp(dv->dv_cfdata->cf_driver->cd_name,"fdisa")) {
   2719 			continue;
   2720 		}
   2721 
   2722 		/* need to find the device_name_to_block_device_major stuff */
   2723 		bmajor = devsw_name2blk(dv->dv_xname, NULL, 0);
   2724 
   2725 		/* get a vnode for the raw partition of this disk */
   2726 
   2727 		dev = MAKEDISKDEV(bmajor, dv->dv_unit, RAW_PART);
   2728 		if (bdevvp(dev, &vp))
   2729 			panic("RAID can't alloc vnode");
   2730 
   2731 		error = VOP_OPEN(vp, FREAD, NOCRED, 0);
   2732 
   2733 		if (error) {
   2734 			/* "Who cares."  Continue looking
   2735 			   for something that exists*/
   2736 			vput(vp);
   2737 			continue;
   2738 		}
   2739 
   2740 		/* Ok, the disk exists.  Go get the disklabel. */
   2741 		error = VOP_IOCTL(vp, DIOCGDINFO, (caddr_t)&label,
   2742 				  FREAD, NOCRED, 0);
   2743 		if (error) {
   2744 			/*
   2745 			 * XXX can't happen - open() would
   2746 			 * have errored out (or faked up one)
   2747 			 */
   2748 			printf("can't get label for dev %s%c (%d)!?!?\n",
   2749 			       dv->dv_xname, 'a' + RAW_PART, error);
   2750 		}
   2751 
   2752 		/* don't need this any more.  We'll allocate it again
   2753 		   a little later if we really do... */
   2754 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
   2755 		VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
   2756 		vput(vp);
   2757 
   2758 		for (i=0; i < label.d_npartitions; i++) {
   2759 			/* We only support partitions marked as RAID */
   2760 			if (label.d_partitions[i].p_fstype != FS_RAID)
   2761 				continue;
   2762 
   2763 			dev = MAKEDISKDEV(bmajor, dv->dv_unit, i);
   2764 			if (bdevvp(dev, &vp))
   2765 				panic("RAID can't alloc vnode");
   2766 
   2767 			error = VOP_OPEN(vp, FREAD, NOCRED, 0);
   2768 			if (error) {
   2769 				/* Whatever... */
   2770 				vput(vp);
   2771 				continue;
   2772 			}
   2773 
   2774 			good_one = 0;
   2775 
   2776 			clabel = (RF_ComponentLabel_t *)
   2777 				malloc(sizeof(RF_ComponentLabel_t),
   2778 				       M_RAIDFRAME, M_NOWAIT);
   2779 			if (clabel == NULL) {
   2780 				/* XXX CLEANUP HERE */
   2781 				printf("RAID auto config: out of memory!\n");
   2782 				return(NULL); /* XXX probably should panic? */
   2783 			}
   2784 
   2785 			if (!raidread_component_label(dev, vp, clabel)) {
   2786 				/* Got the label.  Does it look reasonable? */
   2787 				if (rf_reasonable_label(clabel) &&
   2788 				    (clabel->partitionSize <=
   2789 				     label.d_partitions[i].p_size)) {
   2790 #if DEBUG
   2791 					printf("Component on: %s%c: %d\n",
   2792 					       dv->dv_xname, 'a'+i,
   2793 					       label.d_partitions[i].p_size);
   2794 					rf_print_component_label(clabel);
   2795 #endif
   2796 					/* if it's reasonable, add it,
   2797 					   else ignore it. */
   2798 					ac = (RF_AutoConfig_t *)
   2799 						malloc(sizeof(RF_AutoConfig_t),
   2800 						       M_RAIDFRAME,
   2801 						       M_NOWAIT);
   2802 					if (ac == NULL) {
   2803 						/* XXX should panic?? */
   2804 						return(NULL);
   2805 					}
   2806 
   2807 					sprintf(ac->devname, "%s%c",
   2808 						dv->dv_xname, 'a'+i);
   2809 					ac->dev = dev;
   2810 					ac->vp = vp;
   2811 					ac->clabel = clabel;
   2812 					ac->next = ac_list;
   2813 					ac_list = ac;
   2814 					good_one = 1;
   2815 				}
   2816 			}
   2817 			if (!good_one) {
   2818 				/* cleanup */
   2819 				free(clabel, M_RAIDFRAME);
   2820 				vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
   2821 				VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
   2822 				vput(vp);
   2823 			}
   2824 		}
   2825 	}
   2826 	return(ac_list);
   2827 }
   2828 
   2829 static int
   2830 rf_reasonable_label(clabel)
   2831 	RF_ComponentLabel_t *clabel;
   2832 {
   2833 
   2834 	if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) ||
   2835 	     (clabel->version==RF_COMPONENT_LABEL_VERSION)) &&
   2836 	    ((clabel->clean == RF_RAID_CLEAN) ||
   2837 	     (clabel->clean == RF_RAID_DIRTY)) &&
   2838 	    clabel->row >=0 &&
   2839 	    clabel->column >= 0 &&
   2840 	    clabel->num_rows > 0 &&
   2841 	    clabel->num_columns > 0 &&
   2842 	    clabel->row < clabel->num_rows &&
   2843 	    clabel->column < clabel->num_columns &&
   2844 	    clabel->blockSize > 0 &&
   2845 	    clabel->numBlocks > 0) {
   2846 		/* label looks reasonable enough... */
   2847 		return(1);
   2848 	}
   2849 	return(0);
   2850 }
   2851 
   2852 
   2853 void
   2854 rf_print_component_label(clabel)
   2855 	RF_ComponentLabel_t *clabel;
   2856 {
   2857 	printf("   Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
   2858 	       clabel->row, clabel->column,
   2859 	       clabel->num_rows, clabel->num_columns);
   2860 	printf("   Version: %d Serial Number: %d Mod Counter: %d\n",
   2861 	       clabel->version, clabel->serial_number,
   2862 	       clabel->mod_counter);
   2863 	printf("   Clean: %s Status: %d\n",
   2864 	       clabel->clean ? "Yes" : "No", clabel->status );
   2865 	printf("   sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
   2866 	       clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
   2867 	printf("   RAID Level: %c  blocksize: %d numBlocks: %d\n",
   2868 	       (char) clabel->parityConfig, clabel->blockSize,
   2869 	       clabel->numBlocks);
   2870 	printf("   Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No" );
   2871 	printf("   Contains root partition: %s\n",
   2872 	       clabel->root_partition ? "Yes" : "No" );
   2873 	printf("   Last configured as: raid%d\n", clabel->last_unit );
   2874 #if 0
   2875 	   printf("   Config order: %d\n", clabel->config_order);
   2876 #endif
   2877 
   2878 }
   2879 
   2880 RF_ConfigSet_t *
   2881 rf_create_auto_sets(ac_list)
   2882 	RF_AutoConfig_t *ac_list;
   2883 {
   2884 	RF_AutoConfig_t *ac;
   2885 	RF_ConfigSet_t *config_sets;
   2886 	RF_ConfigSet_t *cset;
   2887 	RF_AutoConfig_t *ac_next;
   2888 
   2889 
   2890 	config_sets = NULL;
   2891 
   2892 	/* Go through the AutoConfig list, and figure out which components
   2893 	   belong to what sets.  */
   2894 	ac = ac_list;
   2895 	while(ac!=NULL) {
   2896 		/* we're going to putz with ac->next, so save it here
   2897 		   for use at the end of the loop */
   2898 		ac_next = ac->next;
   2899 
   2900 		if (config_sets == NULL) {
   2901 			/* will need at least this one... */
   2902 			config_sets = (RF_ConfigSet_t *)
   2903 				malloc(sizeof(RF_ConfigSet_t),
   2904 				       M_RAIDFRAME, M_NOWAIT);
   2905 			if (config_sets == NULL) {
   2906 				panic("rf_create_auto_sets: No memory!\n");
   2907 			}
   2908 			/* this one is easy :) */
   2909 			config_sets->ac = ac;
   2910 			config_sets->next = NULL;
   2911 			config_sets->rootable = 0;
   2912 			ac->next = NULL;
   2913 		} else {
   2914 			/* which set does this component fit into? */
   2915 			cset = config_sets;
   2916 			while(cset!=NULL) {
   2917 				if (rf_does_it_fit(cset, ac)) {
   2918 					/* looks like it matches... */
   2919 					ac->next = cset->ac;
   2920 					cset->ac = ac;
   2921 					break;
   2922 				}
   2923 				cset = cset->next;
   2924 			}
   2925 			if (cset==NULL) {
   2926 				/* didn't find a match above... new set..*/
   2927 				cset = (RF_ConfigSet_t *)
   2928 					malloc(sizeof(RF_ConfigSet_t),
   2929 					       M_RAIDFRAME, M_NOWAIT);
   2930 				if (cset == NULL) {
   2931 					panic("rf_create_auto_sets: No memory!\n");
   2932 				}
   2933 				cset->ac = ac;
   2934 				ac->next = NULL;
   2935 				cset->next = config_sets;
   2936 				cset->rootable = 0;
   2937 				config_sets = cset;
   2938 			}
   2939 		}
   2940 		ac = ac_next;
   2941 	}
   2942 
   2943 
   2944 	return(config_sets);
   2945 }
   2946 
   2947 static int
   2948 rf_does_it_fit(cset, ac)
   2949 	RF_ConfigSet_t *cset;
   2950 	RF_AutoConfig_t *ac;
   2951 {
   2952 	RF_ComponentLabel_t *clabel1, *clabel2;
   2953 
   2954 	/* If this one matches the *first* one in the set, that's good
   2955 	   enough, since the other members of the set would have been
   2956 	   through here too... */
   2957 	/* note that we are not checking partitionSize here..
   2958 
   2959 	   Note that we are also not checking the mod_counters here.
   2960 	   If everything else matches execpt the mod_counter, that's
   2961 	   good enough for this test.  We will deal with the mod_counters
   2962 	   a little later in the autoconfiguration process.
   2963 
   2964 	    (clabel1->mod_counter == clabel2->mod_counter) &&
   2965 
   2966 	   The reason we don't check for this is that failed disks
   2967 	   will have lower modification counts.  If those disks are
   2968 	   not added to the set they used to belong to, then they will
   2969 	   form their own set, which may result in 2 different sets,
   2970 	   for example, competing to be configured at raid0, and
   2971 	   perhaps competing to be the root filesystem set.  If the
   2972 	   wrong ones get configured, or both attempt to become /,
   2973 	   weird behaviour and or serious lossage will occur.  Thus we
   2974 	   need to bring them into the fold here, and kick them out at
   2975 	   a later point.
   2976 
   2977 	*/
   2978 
   2979 	clabel1 = cset->ac->clabel;
   2980 	clabel2 = ac->clabel;
   2981 	if ((clabel1->version == clabel2->version) &&
   2982 	    (clabel1->serial_number == clabel2->serial_number) &&
   2983 	    (clabel1->num_rows == clabel2->num_rows) &&
   2984 	    (clabel1->num_columns == clabel2->num_columns) &&
   2985 	    (clabel1->sectPerSU == clabel2->sectPerSU) &&
   2986 	    (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
   2987 	    (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
   2988 	    (clabel1->parityConfig == clabel2->parityConfig) &&
   2989 	    (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
   2990 	    (clabel1->blockSize == clabel2->blockSize) &&
   2991 	    (clabel1->numBlocks == clabel2->numBlocks) &&
   2992 	    (clabel1->autoconfigure == clabel2->autoconfigure) &&
   2993 	    (clabel1->root_partition == clabel2->root_partition) &&
   2994 	    (clabel1->last_unit == clabel2->last_unit) &&
   2995 	    (clabel1->config_order == clabel2->config_order)) {
   2996 		/* if it get's here, it almost *has* to be a match */
   2997 	} else {
   2998 		/* it's not consistent with somebody in the set..
   2999 		   punt */
   3000 		return(0);
   3001 	}
   3002 	/* all was fine.. it must fit... */
   3003 	return(1);
   3004 }
   3005 
   3006 int
   3007 rf_have_enough_components(cset)
   3008 	RF_ConfigSet_t *cset;
   3009 {
   3010 	RF_AutoConfig_t *ac;
   3011 	RF_AutoConfig_t *auto_config;
   3012 	RF_ComponentLabel_t *clabel;
   3013 	int r,c;
   3014 	int num_rows;
   3015 	int num_cols;
   3016 	int num_missing;
   3017 	int mod_counter;
   3018 	int mod_counter_found;
   3019 	int even_pair_failed;
   3020 	char parity_type;
   3021 
   3022 
   3023 	/* check to see that we have enough 'live' components
   3024 	   of this set.  If so, we can configure it if necessary */
   3025 
   3026 	num_rows = cset->ac->clabel->num_rows;
   3027 	num_cols = cset->ac->clabel->num_columns;
   3028 	parity_type = cset->ac->clabel->parityConfig;
   3029 
   3030 	/* XXX Check for duplicate components!?!?!? */
   3031 
   3032 	/* Determine what the mod_counter is supposed to be for this set. */
   3033 
   3034 	mod_counter_found = 0;
   3035 	mod_counter = 0;
   3036 	ac = cset->ac;
   3037 	while(ac!=NULL) {
   3038 		if (mod_counter_found==0) {
   3039 			mod_counter = ac->clabel->mod_counter;
   3040 			mod_counter_found = 1;
   3041 		} else {
   3042 			if (ac->clabel->mod_counter > mod_counter) {
   3043 				mod_counter = ac->clabel->mod_counter;
   3044 			}
   3045 		}
   3046 		ac = ac->next;
   3047 	}
   3048 
   3049 	num_missing = 0;
   3050 	auto_config = cset->ac;
   3051 
   3052 	for(r=0; r<num_rows; r++) {
   3053 		even_pair_failed = 0;
   3054 		for(c=0; c<num_cols; c++) {
   3055 			ac = auto_config;
   3056 			while(ac!=NULL) {
   3057 				if ((ac->clabel->row == r) &&
   3058 				    (ac->clabel->column == c) &&
   3059 				    (ac->clabel->mod_counter == mod_counter)) {
   3060 					/* it's this one... */
   3061 #if DEBUG
   3062 					printf("Found: %s at %d,%d\n",
   3063 					       ac->devname,r,c);
   3064 #endif
   3065 					break;
   3066 				}
   3067 				ac=ac->next;
   3068 			}
   3069 			if (ac==NULL) {
   3070 				/* Didn't find one here! */
   3071 				/* special case for RAID 1, especially
   3072 				   where there are more than 2
   3073 				   components (where RAIDframe treats
   3074 				   things a little differently :( ) */
   3075 				if (parity_type == '1') {
   3076 					if (c%2 == 0) { /* even component */
   3077 						even_pair_failed = 1;
   3078 					} else { /* odd component.  If
   3079                                                     we're failed, and
   3080                                                     so is the even
   3081                                                     component, it's
   3082                                                     "Good Night, Charlie" */
   3083 						if (even_pair_failed == 1) {
   3084 							return(0);
   3085 						}
   3086 					}
   3087 				} else {
   3088 					/* normal accounting */
   3089 					num_missing++;
   3090 				}
   3091 			}
   3092 			if ((parity_type == '1') && (c%2 == 1)) {
   3093 				/* Just did an even component, and we didn't
   3094 				   bail.. reset the even_pair_failed flag,
   3095 				   and go on to the next component.... */
   3096 				even_pair_failed = 0;
   3097 			}
   3098 		}
   3099 	}
   3100 
   3101 	clabel = cset->ac->clabel;
   3102 
   3103 	if (((clabel->parityConfig == '0') && (num_missing > 0)) ||
   3104 	    ((clabel->parityConfig == '4') && (num_missing > 1)) ||
   3105 	    ((clabel->parityConfig == '5') && (num_missing > 1))) {
   3106 		/* XXX this needs to be made *much* more general */
   3107 		/* Too many failures */
   3108 		return(0);
   3109 	}
   3110 	/* otherwise, all is well, and we've got enough to take a kick
   3111 	   at autoconfiguring this set */
   3112 	return(1);
   3113 }
   3114 
   3115 void
   3116 rf_create_configuration(ac,config,raidPtr)
   3117 	RF_AutoConfig_t *ac;
   3118 	RF_Config_t *config;
   3119 	RF_Raid_t *raidPtr;
   3120 {
   3121 	RF_ComponentLabel_t *clabel;
   3122 	int i;
   3123 
   3124 	clabel = ac->clabel;
   3125 
   3126 	/* 1. Fill in the common stuff */
   3127 	config->numRow = clabel->num_rows;
   3128 	config->numCol = clabel->num_columns;
   3129 	config->numSpare = 0; /* XXX should this be set here? */
   3130 	config->sectPerSU = clabel->sectPerSU;
   3131 	config->SUsPerPU = clabel->SUsPerPU;
   3132 	config->SUsPerRU = clabel->SUsPerRU;
   3133 	config->parityConfig = clabel->parityConfig;
   3134 	/* XXX... */
   3135 	strcpy(config->diskQueueType,"fifo");
   3136 	config->maxOutstandingDiskReqs = clabel->maxOutstanding;
   3137 	config->layoutSpecificSize = 0; /* XXX ?? */
   3138 
   3139 	while(ac!=NULL) {
   3140 		/* row/col values will be in range due to the checks
   3141 		   in reasonable_label() */
   3142 		strcpy(config->devnames[ac->clabel->row][ac->clabel->column],
   3143 		       ac->devname);
   3144 		ac = ac->next;
   3145 	}
   3146 
   3147 	for(i=0;i<RF_MAXDBGV;i++) {
   3148 		config->debugVars[i][0] = NULL;
   3149 	}
   3150 }
   3151 
   3152 int
   3153 rf_set_autoconfig(raidPtr, new_value)
   3154 	RF_Raid_t *raidPtr;
   3155 	int new_value;
   3156 {
   3157 	RF_ComponentLabel_t clabel;
   3158 	struct vnode *vp;
   3159 	dev_t dev;
   3160 	int row, column;
   3161 
   3162 	raidPtr->autoconfigure = new_value;
   3163 	for(row=0; row<raidPtr->numRow; row++) {
   3164 		for(column=0; column<raidPtr->numCol; column++) {
   3165 			if (raidPtr->Disks[row][column].status ==
   3166 			    rf_ds_optimal) {
   3167 				dev = raidPtr->Disks[row][column].dev;
   3168 				vp = raidPtr->raid_cinfo[row][column].ci_vp;
   3169 				raidread_component_label(dev, vp, &clabel);
   3170 				clabel.autoconfigure = new_value;
   3171 				raidwrite_component_label(dev, vp, &clabel);
   3172 			}
   3173 		}
   3174 	}
   3175 	return(new_value);
   3176 }
   3177 
   3178 int
   3179 rf_set_rootpartition(raidPtr, new_value)
   3180 	RF_Raid_t *raidPtr;
   3181 	int new_value;
   3182 {
   3183 	RF_ComponentLabel_t clabel;
   3184 	struct vnode *vp;
   3185 	dev_t dev;
   3186 	int row, column;
   3187 
   3188 	raidPtr->root_partition = new_value;
   3189 	for(row=0; row<raidPtr->numRow; row++) {
   3190 		for(column=0; column<raidPtr->numCol; column++) {
   3191 			if (raidPtr->Disks[row][column].status ==
   3192 			    rf_ds_optimal) {
   3193 				dev = raidPtr->Disks[row][column].dev;
   3194 				vp = raidPtr->raid_cinfo[row][column].ci_vp;
   3195 				raidread_component_label(dev, vp, &clabel);
   3196 				clabel.root_partition = new_value;
   3197 				raidwrite_component_label(dev, vp, &clabel);
   3198 			}
   3199 		}
   3200 	}
   3201 	return(new_value);
   3202 }
   3203 
   3204 void
   3205 rf_release_all_vps(cset)
   3206 	RF_ConfigSet_t *cset;
   3207 {
   3208 	RF_AutoConfig_t *ac;
   3209 
   3210 	ac = cset->ac;
   3211 	while(ac!=NULL) {
   3212 		/* Close the vp, and give it back */
   3213 		if (ac->vp) {
   3214 			vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY);
   3215 			VOP_CLOSE(ac->vp, FREAD, NOCRED, 0);
   3216 			vput(ac->vp);
   3217 			ac->vp = NULL;
   3218 		}
   3219 		ac = ac->next;
   3220 	}
   3221 }
   3222 
   3223 
   3224 void
   3225 rf_cleanup_config_set(cset)
   3226 	RF_ConfigSet_t *cset;
   3227 {
   3228 	RF_AutoConfig_t *ac;
   3229 	RF_AutoConfig_t *next_ac;
   3230 
   3231 	ac = cset->ac;
   3232 	while(ac!=NULL) {
   3233 		next_ac = ac->next;
   3234 		/* nuke the label */
   3235 		free(ac->clabel, M_RAIDFRAME);
   3236 		/* cleanup the config structure */
   3237 		free(ac, M_RAIDFRAME);
   3238 		/* "next.." */
   3239 		ac = next_ac;
   3240 	}
   3241 	/* and, finally, nuke the config set */
   3242 	free(cset, M_RAIDFRAME);
   3243 }
   3244 
   3245 
   3246 void
   3247 raid_init_component_label(raidPtr, clabel)
   3248 	RF_Raid_t *raidPtr;
   3249 	RF_ComponentLabel_t *clabel;
   3250 {
   3251 	/* current version number */
   3252 	clabel->version = RF_COMPONENT_LABEL_VERSION;
   3253 	clabel->serial_number = raidPtr->serial_number;
   3254 	clabel->mod_counter = raidPtr->mod_counter;
   3255 	clabel->num_rows = raidPtr->numRow;
   3256 	clabel->num_columns = raidPtr->numCol;
   3257 	clabel->clean = RF_RAID_DIRTY; /* not clean */
   3258 	clabel->status = rf_ds_optimal; /* "It's good!" */
   3259 
   3260 	clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
   3261 	clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
   3262 	clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
   3263 
   3264 	clabel->blockSize = raidPtr->bytesPerSector;
   3265 	clabel->numBlocks = raidPtr->sectorsPerDisk;
   3266 
   3267 	/* XXX not portable */
   3268 	clabel->parityConfig = raidPtr->Layout.map->parityConfig;
   3269 	clabel->maxOutstanding = raidPtr->maxOutstanding;
   3270 	clabel->autoconfigure = raidPtr->autoconfigure;
   3271 	clabel->root_partition = raidPtr->root_partition;
   3272 	clabel->last_unit = raidPtr->raidid;
   3273 	clabel->config_order = raidPtr->config_order;
   3274 }
   3275 
   3276 int
   3277 rf_auto_config_set(cset,unit)
   3278 	RF_ConfigSet_t *cset;
   3279 	int *unit;
   3280 {
   3281 	RF_Raid_t *raidPtr;
   3282 	RF_Config_t *config;
   3283 	int raidID;
   3284 	int retcode;
   3285 
   3286 	printf("RAID autoconfigure\n");
   3287 
   3288 	retcode = 0;
   3289 	*unit = -1;
   3290 
   3291 	/* 1. Create a config structure */
   3292 
   3293 	config = (RF_Config_t *)malloc(sizeof(RF_Config_t),
   3294 				       M_RAIDFRAME,
   3295 				       M_NOWAIT);
   3296 	if (config==NULL) {
   3297 		printf("Out of mem!?!?\n");
   3298 				/* XXX do something more intelligent here. */
   3299 		return(1);
   3300 	}
   3301 
   3302 	memset(config, 0, sizeof(RF_Config_t));
   3303 
   3304 	/* XXX raidID needs to be set correctly.. */
   3305 
   3306 	/*
   3307 	   2. Figure out what RAID ID this one is supposed to live at
   3308 	   See if we can get the same RAID dev that it was configured
   3309 	   on last time..
   3310 	*/
   3311 
   3312 	raidID = cset->ac->clabel->last_unit;
   3313 	if ((raidID < 0) || (raidID >= numraid)) {
   3314 		/* let's not wander off into lala land. */
   3315 		raidID = numraid - 1;
   3316 	}
   3317 	if (raidPtrs[raidID]->valid != 0) {
   3318 
   3319 		/*
   3320 		   Nope... Go looking for an alternative...
   3321 		   Start high so we don't immediately use raid0 if that's
   3322 		   not taken.
   3323 		*/
   3324 
   3325 		for(raidID = numraid - 1; raidID >= 0; raidID--) {
   3326 			if (raidPtrs[raidID]->valid == 0) {
   3327 				/* can use this one! */
   3328 				break;
   3329 			}
   3330 		}
   3331 	}
   3332 
   3333 	if (raidID < 0) {
   3334 		/* punt... */
   3335 		printf("Unable to auto configure this set!\n");
   3336 		printf("(Out of RAID devs!)\n");
   3337 		return(1);
   3338 	}
   3339 	printf("Configuring raid%d:\n",raidID);
   3340 	raidPtr = raidPtrs[raidID];
   3341 
   3342 	/* XXX all this stuff should be done SOMEWHERE ELSE! */
   3343 	raidPtr->raidid = raidID;
   3344 	raidPtr->openings = RAIDOUTSTANDING;
   3345 
   3346 	/* 3. Build the configuration structure */
   3347 	rf_create_configuration(cset->ac, config, raidPtr);
   3348 
   3349 	/* 4. Do the configuration */
   3350 	retcode = rf_Configure(raidPtr, config, cset->ac);
   3351 
   3352 	if (retcode == 0) {
   3353 
   3354 		raidinit(raidPtrs[raidID]);
   3355 
   3356 		rf_markalldirty(raidPtrs[raidID]);
   3357 		raidPtrs[raidID]->autoconfigure = 1; /* XXX do this here? */
   3358 		if (cset->ac->clabel->root_partition==1) {
   3359 			/* everything configured just fine.  Make a note
   3360 			   that this set is eligible to be root. */
   3361 			cset->rootable = 1;
   3362 			/* XXX do this here? */
   3363 			raidPtrs[raidID]->root_partition = 1;
   3364 		}
   3365 	}
   3366 
   3367 	/* 5. Cleanup */
   3368 	free(config, M_RAIDFRAME);
   3369 
   3370 	*unit = raidID;
   3371 	return(retcode);
   3372 }
   3373 
   3374 void
   3375 rf_disk_unbusy(desc)
   3376 	RF_RaidAccessDesc_t *desc;
   3377 {
   3378 	struct buf *bp;
   3379 
   3380 	bp = (struct buf *)desc->bp;
   3381 	disk_unbusy(&raid_softc[desc->raidPtr->raidid].sc_dkdev,
   3382 			    (bp->b_bcount - bp->b_resid));
   3383 }
   3384