Home | History | Annotate | Line # | Download | only in raidframe
rf_netbsdkintf.c revision 1.136
      1 /*	$NetBSD: rf_netbsdkintf.c,v 1.136 2002/09/22 03:56:08 oster Exp $	*/
      2 /*-
      3  * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
      4  * All rights reserved.
      5  *
      6  * This code is derived from software contributed to The NetBSD Foundation
      7  * by Greg Oster; Jason R. Thorpe.
      8  *
      9  * Redistribution and use in source and binary forms, with or without
     10  * modification, are permitted provided that the following conditions
     11  * are met:
     12  * 1. Redistributions of source code must retain the above copyright
     13  *    notice, this list of conditions and the following disclaimer.
     14  * 2. Redistributions in binary form must reproduce the above copyright
     15  *    notice, this list of conditions and the following disclaimer in the
     16  *    documentation and/or other materials provided with the distribution.
     17  * 3. All advertising materials mentioning features or use of this software
     18  *    must display the following acknowledgement:
     19  *        This product includes software developed by the NetBSD
     20  *        Foundation, Inc. and its contributors.
     21  * 4. Neither the name of The NetBSD Foundation nor the names of its
     22  *    contributors may be used to endorse or promote products derived
     23  *    from this software without specific prior written permission.
     24  *
     25  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     26  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     27  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     28  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     29  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     30  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     31  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     32  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     33  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     34  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     35  * POSSIBILITY OF SUCH DAMAGE.
     36  */
     37 
     38 /*
     39  * Copyright (c) 1988 University of Utah.
     40  * Copyright (c) 1990, 1993
     41  *      The Regents of the University of California.  All rights reserved.
     42  *
     43  * This code is derived from software contributed to Berkeley by
     44  * the Systems Programming Group of the University of Utah Computer
     45  * Science Department.
     46  *
     47  * Redistribution and use in source and binary forms, with or without
     48  * modification, are permitted provided that the following conditions
     49  * are met:
     50  * 1. Redistributions of source code must retain the above copyright
     51  *    notice, this list of conditions and the following disclaimer.
     52  * 2. Redistributions in binary form must reproduce the above copyright
     53  *    notice, this list of conditions and the following disclaimer in the
     54  *    documentation and/or other materials provided with the distribution.
     55  * 3. All advertising materials mentioning features or use of this software
     56  *    must display the following acknowledgement:
     57  *      This product includes software developed by the University of
     58  *      California, Berkeley and its contributors.
     59  * 4. Neither the name of the University nor the names of its contributors
     60  *    may be used to endorse or promote products derived from this software
     61  *    without specific prior written permission.
     62  *
     63  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     64  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     65  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     66  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     67  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     68  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     69  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     70  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     71  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     72  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     73  * SUCH DAMAGE.
     74  *
     75  * from: Utah $Hdr: cd.c 1.6 90/11/28$
     76  *
     77  *      @(#)cd.c        8.2 (Berkeley) 11/16/93
     78  */
     79 
     80 
     81 
     82 
     83 /*
     84  * Copyright (c) 1995 Carnegie-Mellon University.
     85  * All rights reserved.
     86  *
     87  * Authors: Mark Holland, Jim Zelenka
     88  *
     89  * Permission to use, copy, modify and distribute this software and
     90  * its documentation is hereby granted, provided that both the copyright
     91  * notice and this permission notice appear in all copies of the
     92  * software, derivative works or modified versions, and any portions
     93  * thereof, and that both notices appear in supporting documentation.
     94  *
     95  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
     96  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
     97  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
     98  *
     99  * Carnegie Mellon requests users of this software to return to
    100  *
    101  *  Software Distribution Coordinator  or  Software.Distribution (at) CS.CMU.EDU
    102  *  School of Computer Science
    103  *  Carnegie Mellon University
    104  *  Pittsburgh PA 15213-3890
    105  *
    106  * any improvements or extensions that they make and grant Carnegie the
    107  * rights to redistribute these changes.
    108  */
    109 
    110 /***********************************************************
    111  *
    112  * rf_kintf.c -- the kernel interface routines for RAIDframe
    113  *
    114  ***********************************************************/
    115 
    116 #include <sys/cdefs.h>
    117 __KERNEL_RCSID(0, "$NetBSD: rf_netbsdkintf.c,v 1.136 2002/09/22 03:56:08 oster Exp $");
    118 
    119 #include <sys/param.h>
    120 #include <sys/errno.h>
    121 #include <sys/pool.h>
    122 #include <sys/queue.h>
    123 #include <sys/disk.h>
    124 #include <sys/device.h>
    125 #include <sys/stat.h>
    126 #include <sys/ioctl.h>
    127 #include <sys/fcntl.h>
    128 #include <sys/systm.h>
    129 #include <sys/namei.h>
    130 #include <sys/vnode.h>
    131 #include <sys/disklabel.h>
    132 #include <sys/conf.h>
    133 #include <sys/lock.h>
    134 #include <sys/buf.h>
    135 #include <sys/user.h>
    136 #include <sys/reboot.h>
    137 
    138 #include <dev/raidframe/raidframevar.h>
    139 #include <dev/raidframe/raidframeio.h>
    140 #include "raid.h"
    141 #include "opt_raid_autoconfig.h"
    142 #include "rf_raid.h"
    143 #include "rf_copyback.h"
    144 #include "rf_dag.h"
    145 #include "rf_dagflags.h"
    146 #include "rf_desc.h"
    147 #include "rf_diskqueue.h"
    148 #include "rf_etimer.h"
    149 #include "rf_general.h"
    150 #include "rf_kintf.h"
    151 #include "rf_options.h"
    152 #include "rf_driver.h"
    153 #include "rf_parityscan.h"
    154 #include "rf_threadstuff.h"
    155 
    156 #ifdef DEBUG
    157 int     rf_kdebug_level = 0;
    158 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
    159 #else				/* DEBUG */
    160 #define db1_printf(a) { }
    161 #endif				/* DEBUG */
    162 
    163 static RF_Raid_t **raidPtrs;	/* global raid device descriptors */
    164 
    165 RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex)
    166 
    167 static RF_SparetWait_t *rf_sparet_wait_queue;	/* requests to install a
    168 						 * spare table */
    169 static RF_SparetWait_t *rf_sparet_resp_queue;	/* responses from
    170 						 * installation process */
    171 
    172 /* prototypes */
    173 static void KernelWakeupFunc(struct buf * bp);
    174 static void InitBP(struct buf * bp, struct vnode *, unsigned rw_flag,
    175 		   dev_t dev, RF_SectorNum_t startSect,
    176 		   RF_SectorCount_t numSect, caddr_t buf,
    177 		   void (*cbFunc) (struct buf *), void *cbArg,
    178 		   int logBytesPerSector, struct proc * b_proc);
    179 static void raidinit(RF_Raid_t *);
    180 
    181 void raidattach(int);
    182 
    183 dev_type_open(raidopen);
    184 dev_type_close(raidclose);
    185 dev_type_read(raidread);
    186 dev_type_write(raidwrite);
    187 dev_type_ioctl(raidioctl);
    188 dev_type_strategy(raidstrategy);
    189 dev_type_dump(raiddump);
    190 dev_type_size(raidsize);
    191 
    192 const struct bdevsw raid_bdevsw = {
    193 	raidopen, raidclose, raidstrategy, raidioctl,
    194 	raiddump, raidsize, D_DISK
    195 };
    196 
    197 const struct cdevsw raid_cdevsw = {
    198 	raidopen, raidclose, raidread, raidwrite, raidioctl,
    199 	nostop, notty, nopoll, nommap, D_DISK
    200 };
    201 
    202 /*
    203  * Pilfered from ccd.c
    204  */
    205 
    206 struct raidbuf {
    207 	struct buf rf_buf;	/* new I/O buf.  MUST BE FIRST!!! */
    208 	struct buf *rf_obp;	/* ptr. to original I/O buf */
    209 	RF_DiskQueueData_t *req;/* the request that this was part of.. */
    210 };
    211 
    212 /* component buffer pool */
    213 struct pool raidframe_cbufpool;
    214 
    215 /* XXX Not sure if the following should be replacing the raidPtrs above,
    216    or if it should be used in conjunction with that...
    217 */
    218 
    219 struct raid_softc {
    220 	int     sc_flags;	/* flags */
    221 	int     sc_cflags;	/* configuration flags */
    222 	size_t  sc_size;        /* size of the raid device */
    223 	char    sc_xname[20];	/* XXX external name */
    224 	struct disk sc_dkdev;	/* generic disk device info */
    225 	struct bufq_state buf_queue;	/* used for the device queue */
    226 };
    227 /* sc_flags */
    228 #define RAIDF_INITED	0x01	/* unit has been initialized */
    229 #define RAIDF_WLABEL	0x02	/* label area is writable */
    230 #define RAIDF_LABELLING	0x04	/* unit is currently being labelled */
    231 #define RAIDF_WANTED	0x40	/* someone is waiting to obtain a lock */
    232 #define RAIDF_LOCKED	0x80	/* unit is locked */
    233 
    234 #define	raidunit(x)	DISKUNIT(x)
    235 int numraid = 0;
    236 
    237 /*
    238  * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
    239  * Be aware that large numbers can allow the driver to consume a lot of
    240  * kernel memory, especially on writes, and in degraded mode reads.
    241  *
    242  * For example: with a stripe width of 64 blocks (32k) and 5 disks,
    243  * a single 64K write will typically require 64K for the old data,
    244  * 64K for the old parity, and 64K for the new parity, for a total
    245  * of 192K (if the parity buffer is not re-used immediately).
    246  * Even it if is used immediately, that's still 128K, which when multiplied
    247  * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
    248  *
    249  * Now in degraded mode, for example, a 64K read on the above setup may
    250  * require data reconstruction, which will require *all* of the 4 remaining
    251  * disks to participate -- 4 * 32K/disk == 128K again.
    252  */
    253 
    254 #ifndef RAIDOUTSTANDING
    255 #define RAIDOUTSTANDING   6
    256 #endif
    257 
    258 #define RAIDLABELDEV(dev)	\
    259 	(MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
    260 
    261 /* declared here, and made public, for the benefit of KVM stuff.. */
    262 struct raid_softc *raid_softc;
    263 
    264 static void raidgetdefaultlabel(RF_Raid_t *, struct raid_softc *,
    265 				     struct disklabel *);
    266 static void raidgetdisklabel(dev_t);
    267 static void raidmakedisklabel(struct raid_softc *);
    268 
    269 static int raidlock(struct raid_softc *);
    270 static void raidunlock(struct raid_softc *);
    271 
    272 static void rf_markalldirty(RF_Raid_t *);
    273 
    274 struct device *raidrootdev;
    275 
    276 void rf_ReconThread(struct rf_recon_req *);
    277 /* XXX what I want is: */
    278 /*void rf_ReconThread(RF_Raid_t *raidPtr);  */
    279 void rf_RewriteParityThread(RF_Raid_t *raidPtr);
    280 void rf_CopybackThread(RF_Raid_t *raidPtr);
    281 void rf_ReconstructInPlaceThread(struct rf_recon_req *);
    282 void rf_buildroothack(void *);
    283 
    284 RF_AutoConfig_t *rf_find_raid_components(void);
    285 RF_ConfigSet_t *rf_create_auto_sets(RF_AutoConfig_t *);
    286 static int rf_does_it_fit(RF_ConfigSet_t *,RF_AutoConfig_t *);
    287 static int rf_reasonable_label(RF_ComponentLabel_t *);
    288 void rf_create_configuration(RF_AutoConfig_t *,RF_Config_t *, RF_Raid_t *);
    289 int rf_set_autoconfig(RF_Raid_t *, int);
    290 int rf_set_rootpartition(RF_Raid_t *, int);
    291 void rf_release_all_vps(RF_ConfigSet_t *);
    292 void rf_cleanup_config_set(RF_ConfigSet_t *);
    293 int rf_have_enough_components(RF_ConfigSet_t *);
    294 int rf_auto_config_set(RF_ConfigSet_t *, int *);
    295 
    296 static int raidautoconfig = 0; /* Debugging, mostly.  Set to 0 to not
    297 				  allow autoconfig to take place.
    298 			          Note that this is overridden by having
    299 			          RAID_AUTOCONFIG as an option in the
    300 			          kernel config file.  */
    301 
    302 void
    303 raidattach(num)
    304 	int     num;
    305 {
    306 	int raidID;
    307 	int i, rc;
    308 	RF_AutoConfig_t *ac_list; /* autoconfig list */
    309 	RF_ConfigSet_t *config_sets;
    310 
    311 #ifdef DEBUG
    312 	printf("raidattach: Asked for %d units\n", num);
    313 #endif
    314 
    315 	if (num <= 0) {
    316 #ifdef DIAGNOSTIC
    317 		panic("raidattach: count <= 0");
    318 #endif
    319 		return;
    320 	}
    321 	/* This is where all the initialization stuff gets done. */
    322 
    323 	numraid = num;
    324 
    325 	/* Make some space for requested number of units... */
    326 
    327 	RF_Calloc(raidPtrs, num, sizeof(RF_Raid_t *), (RF_Raid_t **));
    328 	if (raidPtrs == NULL) {
    329 		panic("raidPtrs is NULL!!\n");
    330 	}
    331 
    332 	/* Initialize the component buffer pool. */
    333 	pool_init(&raidframe_cbufpool, sizeof(struct raidbuf), 0,
    334 	    0, 0, "raidpl", NULL);
    335 
    336 	rc = rf_mutex_init(&rf_sparet_wait_mutex);
    337 	if (rc) {
    338 		RF_PANIC();
    339 	}
    340 
    341 	rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
    342 
    343 	for (i = 0; i < num; i++)
    344 		raidPtrs[i] = NULL;
    345 	rc = rf_BootRaidframe();
    346 	if (rc == 0)
    347 		printf("Kernelized RAIDframe activated\n");
    348 	else
    349 		panic("Serious error booting RAID!!\n");
    350 
    351 	/* put together some datastructures like the CCD device does.. This
    352 	 * lets us lock the device and what-not when it gets opened. */
    353 
    354 	raid_softc = (struct raid_softc *)
    355 		malloc(num * sizeof(struct raid_softc),
    356 		       M_RAIDFRAME, M_NOWAIT);
    357 	if (raid_softc == NULL) {
    358 		printf("WARNING: no memory for RAIDframe driver\n");
    359 		return;
    360 	}
    361 
    362 	memset(raid_softc, 0, num * sizeof(struct raid_softc));
    363 
    364 	raidrootdev = (struct device *)malloc(num * sizeof(struct device),
    365 					      M_RAIDFRAME, M_NOWAIT);
    366 	if (raidrootdev == NULL) {
    367 		panic("No memory for RAIDframe driver!!?!?!\n");
    368 	}
    369 
    370 	for (raidID = 0; raidID < num; raidID++) {
    371 		bufq_alloc(&raid_softc[raidID].buf_queue, BUFQ_FCFS);
    372 
    373 		raidrootdev[raidID].dv_class  = DV_DISK;
    374 		raidrootdev[raidID].dv_cfdata = NULL;
    375 		raidrootdev[raidID].dv_unit   = raidID;
    376 		raidrootdev[raidID].dv_parent = NULL;
    377 		raidrootdev[raidID].dv_flags  = 0;
    378 		sprintf(raidrootdev[raidID].dv_xname,"raid%d",raidID);
    379 
    380 		RF_Calloc(raidPtrs[raidID], 1, sizeof(RF_Raid_t),
    381 			  (RF_Raid_t *));
    382 		if (raidPtrs[raidID] == NULL) {
    383 			printf("WARNING: raidPtrs[%d] is NULL\n", raidID);
    384 			numraid = raidID;
    385 			return;
    386 		}
    387 	}
    388 
    389 #ifdef RAID_AUTOCONFIG
    390 	raidautoconfig = 1;
    391 #endif
    392 
    393 if (raidautoconfig) {
    394 	/* 1. locate all RAID components on the system */
    395 
    396 #if DEBUG
    397 	printf("Searching for raid components...\n");
    398 #endif
    399 	ac_list = rf_find_raid_components();
    400 
    401 	/* 2. sort them into their respective sets */
    402 
    403 	config_sets = rf_create_auto_sets(ac_list);
    404 
    405 	/* 3. evaluate each set and configure the valid ones
    406 	   This gets done in rf_buildroothack() */
    407 
    408 	/* schedule the creation of the thread to do the
    409 	   "/ on RAID" stuff */
    410 
    411 	kthread_create(rf_buildroothack,config_sets);
    412 
    413 }
    414 
    415 }
    416 
    417 void
    418 rf_buildroothack(arg)
    419 	void *arg;
    420 {
    421 	RF_ConfigSet_t *config_sets = arg;
    422 	RF_ConfigSet_t *cset;
    423 	RF_ConfigSet_t *next_cset;
    424 	int retcode;
    425 	int raidID;
    426 	int rootID;
    427 	int num_root;
    428 
    429 	rootID = 0;
    430 	num_root = 0;
    431 	cset = config_sets;
    432 	while(cset != NULL ) {
    433 		next_cset = cset->next;
    434 		if (rf_have_enough_components(cset) &&
    435 		    cset->ac->clabel->autoconfigure==1) {
    436 			retcode = rf_auto_config_set(cset,&raidID);
    437 			if (!retcode) {
    438 				if (cset->rootable) {
    439 					rootID = raidID;
    440 					num_root++;
    441 				}
    442 			} else {
    443 				/* The autoconfig didn't work :( */
    444 #if DEBUG
    445 				printf("Autoconfig failed with code %d for raid%d\n", retcode, raidID);
    446 #endif
    447 				rf_release_all_vps(cset);
    448 			}
    449 		} else {
    450 			/* we're not autoconfiguring this set...
    451 			   release the associated resources */
    452 			rf_release_all_vps(cset);
    453 		}
    454 		/* cleanup */
    455 		rf_cleanup_config_set(cset);
    456 		cset = next_cset;
    457 	}
    458 
    459 	/* we found something bootable... */
    460 
    461 	if (num_root == 1) {
    462 		booted_device = &raidrootdev[rootID];
    463 	} else if (num_root > 1) {
    464 		/* we can't guess.. require the user to answer... */
    465 		boothowto |= RB_ASKNAME;
    466 	}
    467 }
    468 
    469 
    470 int
    471 raidsize(dev)
    472 	dev_t   dev;
    473 {
    474 	struct raid_softc *rs;
    475 	struct disklabel *lp;
    476 	int     part, unit, omask, size;
    477 
    478 	unit = raidunit(dev);
    479 	if (unit >= numraid)
    480 		return (-1);
    481 	rs = &raid_softc[unit];
    482 
    483 	if ((rs->sc_flags & RAIDF_INITED) == 0)
    484 		return (-1);
    485 
    486 	part = DISKPART(dev);
    487 	omask = rs->sc_dkdev.dk_openmask & (1 << part);
    488 	lp = rs->sc_dkdev.dk_label;
    489 
    490 	if (omask == 0 && raidopen(dev, 0, S_IFBLK, curproc))
    491 		return (-1);
    492 
    493 	if (lp->d_partitions[part].p_fstype != FS_SWAP)
    494 		size = -1;
    495 	else
    496 		size = lp->d_partitions[part].p_size *
    497 		    (lp->d_secsize / DEV_BSIZE);
    498 
    499 	if (omask == 0 && raidclose(dev, 0, S_IFBLK, curproc))
    500 		return (-1);
    501 
    502 	return (size);
    503 
    504 }
    505 
    506 int
    507 raiddump(dev, blkno, va, size)
    508 	dev_t   dev;
    509 	daddr_t blkno;
    510 	caddr_t va;
    511 	size_t  size;
    512 {
    513 	/* Not implemented. */
    514 	return ENXIO;
    515 }
    516 /* ARGSUSED */
    517 int
    518 raidopen(dev, flags, fmt, p)
    519 	dev_t   dev;
    520 	int     flags, fmt;
    521 	struct proc *p;
    522 {
    523 	int     unit = raidunit(dev);
    524 	struct raid_softc *rs;
    525 	struct disklabel *lp;
    526 	int     part, pmask;
    527 	int     error = 0;
    528 
    529 	if (unit >= numraid)
    530 		return (ENXIO);
    531 	rs = &raid_softc[unit];
    532 
    533 	if ((error = raidlock(rs)) != 0)
    534 		return (error);
    535 	lp = rs->sc_dkdev.dk_label;
    536 
    537 	part = DISKPART(dev);
    538 	pmask = (1 << part);
    539 
    540 	db1_printf(("Opening raid device number: %d partition: %d\n",
    541 		unit, part));
    542 
    543 
    544 	if ((rs->sc_flags & RAIDF_INITED) &&
    545 	    (rs->sc_dkdev.dk_openmask == 0))
    546 		raidgetdisklabel(dev);
    547 
    548 	/* make sure that this partition exists */
    549 
    550 	if (part != RAW_PART) {
    551 		db1_printf(("Not a raw partition..\n"));
    552 		if (((rs->sc_flags & RAIDF_INITED) == 0) ||
    553 		    ((part >= lp->d_npartitions) ||
    554 			(lp->d_partitions[part].p_fstype == FS_UNUSED))) {
    555 			error = ENXIO;
    556 			raidunlock(rs);
    557 			db1_printf(("Bailing out...\n"));
    558 			return (error);
    559 		}
    560 	}
    561 	/* Prevent this unit from being unconfigured while open. */
    562 	switch (fmt) {
    563 	case S_IFCHR:
    564 		rs->sc_dkdev.dk_copenmask |= pmask;
    565 		break;
    566 
    567 	case S_IFBLK:
    568 		rs->sc_dkdev.dk_bopenmask |= pmask;
    569 		break;
    570 	}
    571 
    572 	if ((rs->sc_dkdev.dk_openmask == 0) &&
    573 	    ((rs->sc_flags & RAIDF_INITED) != 0)) {
    574 		/* First one... mark things as dirty... Note that we *MUST*
    575 		 have done a configure before this.  I DO NOT WANT TO BE
    576 		 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
    577 		 THAT THEY BELONG TOGETHER!!!!! */
    578 		/* XXX should check to see if we're only open for reading
    579 		   here... If so, we needn't do this, but then need some
    580 		   other way of keeping track of what's happened.. */
    581 
    582 		rf_markalldirty( raidPtrs[unit] );
    583 	}
    584 
    585 
    586 	rs->sc_dkdev.dk_openmask =
    587 	    rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
    588 
    589 	raidunlock(rs);
    590 
    591 	return (error);
    592 
    593 
    594 }
    595 /* ARGSUSED */
    596 int
    597 raidclose(dev, flags, fmt, p)
    598 	dev_t   dev;
    599 	int     flags, fmt;
    600 	struct proc *p;
    601 {
    602 	int     unit = raidunit(dev);
    603 	struct raid_softc *rs;
    604 	int     error = 0;
    605 	int     part;
    606 
    607 	if (unit >= numraid)
    608 		return (ENXIO);
    609 	rs = &raid_softc[unit];
    610 
    611 	if ((error = raidlock(rs)) != 0)
    612 		return (error);
    613 
    614 	part = DISKPART(dev);
    615 
    616 	/* ...that much closer to allowing unconfiguration... */
    617 	switch (fmt) {
    618 	case S_IFCHR:
    619 		rs->sc_dkdev.dk_copenmask &= ~(1 << part);
    620 		break;
    621 
    622 	case S_IFBLK:
    623 		rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
    624 		break;
    625 	}
    626 	rs->sc_dkdev.dk_openmask =
    627 	    rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
    628 
    629 	if ((rs->sc_dkdev.dk_openmask == 0) &&
    630 	    ((rs->sc_flags & RAIDF_INITED) != 0)) {
    631 		/* Last one... device is not unconfigured yet.
    632 		   Device shutdown has taken care of setting the
    633 		   clean bits if RAIDF_INITED is not set
    634 		   mark things as clean... */
    635 #if 0
    636 		printf("Last one on raid%d.  Updating status.\n",unit);
    637 #endif
    638 		rf_update_component_labels(raidPtrs[unit],
    639 						 RF_FINAL_COMPONENT_UPDATE);
    640 		if (doing_shutdown) {
    641 			/* last one, and we're going down, so
    642 			   lights out for this RAID set too. */
    643 			error = rf_Shutdown(raidPtrs[unit]);
    644 
    645 			/* It's no longer initialized... */
    646 			rs->sc_flags &= ~RAIDF_INITED;
    647 
    648 			/* Detach the disk. */
    649 			disk_detach(&rs->sc_dkdev);
    650 		}
    651 	}
    652 
    653 	raidunlock(rs);
    654 	return (0);
    655 
    656 }
    657 
    658 void
    659 raidstrategy(bp)
    660 	struct buf *bp;
    661 {
    662 	int s;
    663 
    664 	unsigned int raidID = raidunit(bp->b_dev);
    665 	RF_Raid_t *raidPtr;
    666 	struct raid_softc *rs = &raid_softc[raidID];
    667 	struct disklabel *lp;
    668 	int     wlabel;
    669 
    670 	if ((rs->sc_flags & RAIDF_INITED) ==0) {
    671 		bp->b_error = ENXIO;
    672 		bp->b_flags |= B_ERROR;
    673 		bp->b_resid = bp->b_bcount;
    674 		biodone(bp);
    675 		return;
    676 	}
    677 	if (raidID >= numraid || !raidPtrs[raidID]) {
    678 		bp->b_error = ENODEV;
    679 		bp->b_flags |= B_ERROR;
    680 		bp->b_resid = bp->b_bcount;
    681 		biodone(bp);
    682 		return;
    683 	}
    684 	raidPtr = raidPtrs[raidID];
    685 	if (!raidPtr->valid) {
    686 		bp->b_error = ENODEV;
    687 		bp->b_flags |= B_ERROR;
    688 		bp->b_resid = bp->b_bcount;
    689 		biodone(bp);
    690 		return;
    691 	}
    692 	if (bp->b_bcount == 0) {
    693 		db1_printf(("b_bcount is zero..\n"));
    694 		biodone(bp);
    695 		return;
    696 	}
    697 	lp = rs->sc_dkdev.dk_label;
    698 
    699 	/*
    700 	 * Do bounds checking and adjust transfer.  If there's an
    701 	 * error, the bounds check will flag that for us.
    702 	 */
    703 
    704 	wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
    705 	if (DISKPART(bp->b_dev) != RAW_PART)
    706 		if (bounds_check_with_label(bp, lp, wlabel) <= 0) {
    707 			db1_printf(("Bounds check failed!!:%d %d\n",
    708 				(int) bp->b_blkno, (int) wlabel));
    709 			biodone(bp);
    710 			return;
    711 		}
    712 	s = splbio();
    713 
    714 	bp->b_resid = 0;
    715 
    716 	/* stuff it onto our queue */
    717 	BUFQ_PUT(&rs->buf_queue, bp);
    718 
    719 	raidstart(raidPtrs[raidID]);
    720 
    721 	splx(s);
    722 }
    723 /* ARGSUSED */
    724 int
    725 raidread(dev, uio, flags)
    726 	dev_t   dev;
    727 	struct uio *uio;
    728 	int     flags;
    729 {
    730 	int     unit = raidunit(dev);
    731 	struct raid_softc *rs;
    732 	int     part;
    733 
    734 	if (unit >= numraid)
    735 		return (ENXIO);
    736 	rs = &raid_softc[unit];
    737 
    738 	if ((rs->sc_flags & RAIDF_INITED) == 0)
    739 		return (ENXIO);
    740 	part = DISKPART(dev);
    741 
    742 	db1_printf(("raidread: unit: %d partition: %d\n", unit, part));
    743 
    744 	return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
    745 
    746 }
    747 /* ARGSUSED */
    748 int
    749 raidwrite(dev, uio, flags)
    750 	dev_t   dev;
    751 	struct uio *uio;
    752 	int     flags;
    753 {
    754 	int     unit = raidunit(dev);
    755 	struct raid_softc *rs;
    756 
    757 	if (unit >= numraid)
    758 		return (ENXIO);
    759 	rs = &raid_softc[unit];
    760 
    761 	if ((rs->sc_flags & RAIDF_INITED) == 0)
    762 		return (ENXIO);
    763 	db1_printf(("raidwrite\n"));
    764 	return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
    765 
    766 }
    767 
    768 int
    769 raidioctl(dev, cmd, data, flag, p)
    770 	dev_t   dev;
    771 	u_long  cmd;
    772 	caddr_t data;
    773 	int     flag;
    774 	struct proc *p;
    775 {
    776 	int     unit = raidunit(dev);
    777 	int     error = 0;
    778 	int     part, pmask;
    779 	struct raid_softc *rs;
    780 	RF_Config_t *k_cfg, *u_cfg;
    781 	RF_Raid_t *raidPtr;
    782 	RF_RaidDisk_t *diskPtr;
    783 	RF_AccTotals_t *totals;
    784 	RF_DeviceConfig_t *d_cfg, **ucfgp;
    785 	u_char *specific_buf;
    786 	int retcode = 0;
    787 	int row;
    788 	int column;
    789 	int raidid;
    790 	struct rf_recon_req *rrcopy, *rr;
    791 	RF_ComponentLabel_t *clabel;
    792 	RF_ComponentLabel_t ci_label;
    793 	RF_ComponentLabel_t **clabel_ptr;
    794 	RF_SingleComponent_t *sparePtr,*componentPtr;
    795 	RF_SingleComponent_t hot_spare;
    796 	RF_SingleComponent_t component;
    797 	RF_ProgressInfo_t progressInfo, **progressInfoPtr;
    798 	int i, j, d;
    799 #ifdef __HAVE_OLD_DISKLABEL
    800 	struct disklabel newlabel;
    801 #endif
    802 
    803 	if (unit >= numraid)
    804 		return (ENXIO);
    805 	rs = &raid_softc[unit];
    806 	raidPtr = raidPtrs[unit];
    807 
    808 	db1_printf(("raidioctl: %d %d %d %d\n", (int) dev,
    809 		(int) DISKPART(dev), (int) unit, (int) cmd));
    810 
    811 	/* Must be open for writes for these commands... */
    812 	switch (cmd) {
    813 	case DIOCSDINFO:
    814 	case DIOCWDINFO:
    815 #ifdef __HAVE_OLD_DISKLABEL
    816 	case ODIOCWDINFO:
    817 	case ODIOCSDINFO:
    818 #endif
    819 	case DIOCWLABEL:
    820 		if ((flag & FWRITE) == 0)
    821 			return (EBADF);
    822 	}
    823 
    824 	/* Must be initialized for these... */
    825 	switch (cmd) {
    826 	case DIOCGDINFO:
    827 	case DIOCSDINFO:
    828 	case DIOCWDINFO:
    829 #ifdef __HAVE_OLD_DISKLABEL
    830 	case ODIOCGDINFO:
    831 	case ODIOCWDINFO:
    832 	case ODIOCSDINFO:
    833 	case ODIOCGDEFLABEL:
    834 #endif
    835 	case DIOCGPART:
    836 	case DIOCWLABEL:
    837 	case DIOCGDEFLABEL:
    838 	case RAIDFRAME_SHUTDOWN:
    839 	case RAIDFRAME_REWRITEPARITY:
    840 	case RAIDFRAME_GET_INFO:
    841 	case RAIDFRAME_RESET_ACCTOTALS:
    842 	case RAIDFRAME_GET_ACCTOTALS:
    843 	case RAIDFRAME_KEEP_ACCTOTALS:
    844 	case RAIDFRAME_GET_SIZE:
    845 	case RAIDFRAME_FAIL_DISK:
    846 	case RAIDFRAME_COPYBACK:
    847 	case RAIDFRAME_CHECK_RECON_STATUS:
    848 	case RAIDFRAME_CHECK_RECON_STATUS_EXT:
    849 	case RAIDFRAME_GET_COMPONENT_LABEL:
    850 	case RAIDFRAME_SET_COMPONENT_LABEL:
    851 	case RAIDFRAME_ADD_HOT_SPARE:
    852 	case RAIDFRAME_REMOVE_HOT_SPARE:
    853 	case RAIDFRAME_INIT_LABELS:
    854 	case RAIDFRAME_REBUILD_IN_PLACE:
    855 	case RAIDFRAME_CHECK_PARITY:
    856 	case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
    857 	case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
    858 	case RAIDFRAME_CHECK_COPYBACK_STATUS:
    859 	case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
    860 	case RAIDFRAME_SET_AUTOCONFIG:
    861 	case RAIDFRAME_SET_ROOT:
    862 	case RAIDFRAME_DELETE_COMPONENT:
    863 	case RAIDFRAME_INCORPORATE_HOT_SPARE:
    864 		if ((rs->sc_flags & RAIDF_INITED) == 0)
    865 			return (ENXIO);
    866 	}
    867 
    868 	switch (cmd) {
    869 
    870 		/* configure the system */
    871 	case RAIDFRAME_CONFIGURE:
    872 
    873 		if (raidPtr->valid) {
    874 			/* There is a valid RAID set running on this unit! */
    875 			printf("raid%d: Device already configured!\n",unit);
    876 			return(EINVAL);
    877 		}
    878 
    879 		/* copy-in the configuration information */
    880 		/* data points to a pointer to the configuration structure */
    881 
    882 		u_cfg = *((RF_Config_t **) data);
    883 		RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
    884 		if (k_cfg == NULL) {
    885 			return (ENOMEM);
    886 		}
    887 		retcode = copyin((caddr_t) u_cfg, (caddr_t) k_cfg,
    888 		    sizeof(RF_Config_t));
    889 		if (retcode) {
    890 			RF_Free(k_cfg, sizeof(RF_Config_t));
    891 			db1_printf(("rf_ioctl: retcode=%d copyin.1\n",
    892 				retcode));
    893 			return (retcode);
    894 		}
    895 		/* allocate a buffer for the layout-specific data, and copy it
    896 		 * in */
    897 		if (k_cfg->layoutSpecificSize) {
    898 			if (k_cfg->layoutSpecificSize > 10000) {
    899 				/* sanity check */
    900 				RF_Free(k_cfg, sizeof(RF_Config_t));
    901 				return (EINVAL);
    902 			}
    903 			RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
    904 			    (u_char *));
    905 			if (specific_buf == NULL) {
    906 				RF_Free(k_cfg, sizeof(RF_Config_t));
    907 				return (ENOMEM);
    908 			}
    909 			retcode = copyin(k_cfg->layoutSpecific,
    910 			    (caddr_t) specific_buf,
    911 			    k_cfg->layoutSpecificSize);
    912 			if (retcode) {
    913 				RF_Free(k_cfg, sizeof(RF_Config_t));
    914 				RF_Free(specific_buf,
    915 					k_cfg->layoutSpecificSize);
    916 				db1_printf(("rf_ioctl: retcode=%d copyin.2\n",
    917 					retcode));
    918 				return (retcode);
    919 			}
    920 		} else
    921 			specific_buf = NULL;
    922 		k_cfg->layoutSpecific = specific_buf;
    923 
    924 		/* should do some kind of sanity check on the configuration.
    925 		 * Store the sum of all the bytes in the last byte? */
    926 
    927 		/* configure the system */
    928 
    929 		/*
    930 		 * Clear the entire RAID descriptor, just to make sure
    931 		 *  there is no stale data left in the case of a
    932 		 *  reconfiguration
    933 		 */
    934 		memset((char *) raidPtr, 0, sizeof(RF_Raid_t));
    935 		raidPtr->raidid = unit;
    936 
    937 		retcode = rf_Configure(raidPtr, k_cfg, NULL);
    938 
    939 		if (retcode == 0) {
    940 
    941 			/* allow this many simultaneous IO's to
    942 			   this RAID device */
    943 			raidPtr->openings = RAIDOUTSTANDING;
    944 
    945 			raidinit(raidPtr);
    946 			rf_markalldirty(raidPtr);
    947 		}
    948 		/* free the buffers.  No return code here. */
    949 		if (k_cfg->layoutSpecificSize) {
    950 			RF_Free(specific_buf, k_cfg->layoutSpecificSize);
    951 		}
    952 		RF_Free(k_cfg, sizeof(RF_Config_t));
    953 
    954 		return (retcode);
    955 
    956 		/* shutdown the system */
    957 	case RAIDFRAME_SHUTDOWN:
    958 
    959 		if ((error = raidlock(rs)) != 0)
    960 			return (error);
    961 
    962 		/*
    963 		 * If somebody has a partition mounted, we shouldn't
    964 		 * shutdown.
    965 		 */
    966 
    967 		part = DISKPART(dev);
    968 		pmask = (1 << part);
    969 		if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
    970 		    ((rs->sc_dkdev.dk_bopenmask & pmask) &&
    971 			(rs->sc_dkdev.dk_copenmask & pmask))) {
    972 			raidunlock(rs);
    973 			return (EBUSY);
    974 		}
    975 
    976 		retcode = rf_Shutdown(raidPtr);
    977 
    978 		/* It's no longer initialized... */
    979 		rs->sc_flags &= ~RAIDF_INITED;
    980 
    981 		/* Detach the disk. */
    982 		disk_detach(&rs->sc_dkdev);
    983 
    984 		raidunlock(rs);
    985 
    986 		return (retcode);
    987 	case RAIDFRAME_GET_COMPONENT_LABEL:
    988 		clabel_ptr = (RF_ComponentLabel_t **) data;
    989 		/* need to read the component label for the disk indicated
    990 		   by row,column in clabel */
    991 
    992 		/* For practice, let's get it directly fromdisk, rather
    993 		   than from the in-core copy */
    994 		RF_Malloc( clabel, sizeof( RF_ComponentLabel_t ),
    995 			   (RF_ComponentLabel_t *));
    996 		if (clabel == NULL)
    997 			return (ENOMEM);
    998 
    999 		memset((char *) clabel, 0, sizeof(RF_ComponentLabel_t));
   1000 
   1001 		retcode = copyin( *clabel_ptr, clabel,
   1002 				  sizeof(RF_ComponentLabel_t));
   1003 
   1004 		if (retcode) {
   1005 			RF_Free( clabel, sizeof(RF_ComponentLabel_t));
   1006 			return(retcode);
   1007 		}
   1008 
   1009 		row = clabel->row;
   1010 		column = clabel->column;
   1011 
   1012 		if ((row < 0) || (row >= raidPtr->numRow) ||
   1013 		    (column < 0) || (column >= raidPtr->numCol +
   1014 				     raidPtr->numSpare)) {
   1015 			RF_Free( clabel, sizeof(RF_ComponentLabel_t));
   1016 			return(EINVAL);
   1017 		}
   1018 
   1019 		raidread_component_label(raidPtr->Disks[row][column].dev,
   1020 				raidPtr->raid_cinfo[row][column].ci_vp,
   1021 				clabel );
   1022 
   1023 		retcode = copyout((caddr_t) clabel,
   1024 				  (caddr_t) *clabel_ptr,
   1025 				  sizeof(RF_ComponentLabel_t));
   1026 		RF_Free( clabel, sizeof(RF_ComponentLabel_t));
   1027 		return (retcode);
   1028 
   1029 	case RAIDFRAME_SET_COMPONENT_LABEL:
   1030 		clabel = (RF_ComponentLabel_t *) data;
   1031 
   1032 		/* XXX check the label for valid stuff... */
   1033 		/* Note that some things *should not* get modified --
   1034 		   the user should be re-initing the labels instead of
   1035 		   trying to patch things.
   1036 		   */
   1037 
   1038 		raidid = raidPtr->raidid;
   1039 		printf("raid%d: Got component label:\n", raidid);
   1040 		printf("raid%d: Version: %d\n", raidid, clabel->version);
   1041 		printf("raid%d: Serial Number: %d\n", raidid, clabel->serial_number);
   1042 		printf("raid%d: Mod counter: %d\n", raidid, clabel->mod_counter);
   1043 		printf("raid%d: Row: %d\n", raidid, clabel->row);
   1044 		printf("raid%d: Column: %d\n", raidid, clabel->column);
   1045 		printf("raid%d: Num Rows: %d\n", raidid, clabel->num_rows);
   1046 		printf("raid%d: Num Columns: %d\n", raidid, clabel->num_columns);
   1047 		printf("raid%d: Clean: %d\n", raidid, clabel->clean);
   1048 		printf("raid%d: Status: %d\n", raidid, clabel->status);
   1049 
   1050 		row = clabel->row;
   1051 		column = clabel->column;
   1052 
   1053 		if ((row < 0) || (row >= raidPtr->numRow) ||
   1054 		    (column < 0) || (column >= raidPtr->numCol)) {
   1055 			return(EINVAL);
   1056 		}
   1057 
   1058 		/* XXX this isn't allowed to do anything for now :-) */
   1059 
   1060 		/* XXX and before it is, we need to fill in the rest
   1061 		   of the fields!?!?!?! */
   1062 #if 0
   1063 		raidwrite_component_label(
   1064                             raidPtr->Disks[row][column].dev,
   1065 			    raidPtr->raid_cinfo[row][column].ci_vp,
   1066 			    clabel );
   1067 #endif
   1068 		return (0);
   1069 
   1070 	case RAIDFRAME_INIT_LABELS:
   1071 		clabel = (RF_ComponentLabel_t *) data;
   1072 		/*
   1073 		   we only want the serial number from
   1074 		   the above.  We get all the rest of the information
   1075 		   from the config that was used to create this RAID
   1076 		   set.
   1077 		   */
   1078 
   1079 		raidPtr->serial_number = clabel->serial_number;
   1080 
   1081 		raid_init_component_label(raidPtr, &ci_label);
   1082 		ci_label.serial_number = clabel->serial_number;
   1083 
   1084 		for(row=0;row<raidPtr->numRow;row++) {
   1085 			ci_label.row = row;
   1086 			for(column=0;column<raidPtr->numCol;column++) {
   1087 				diskPtr = &raidPtr->Disks[row][column];
   1088 				if (!RF_DEAD_DISK(diskPtr->status)) {
   1089 					ci_label.partitionSize = diskPtr->partitionSize;
   1090 					ci_label.column = column;
   1091 					raidwrite_component_label(
   1092 					  raidPtr->Disks[row][column].dev,
   1093 					  raidPtr->raid_cinfo[row][column].ci_vp,
   1094 					  &ci_label );
   1095 				}
   1096 			}
   1097 		}
   1098 
   1099 		return (retcode);
   1100 	case RAIDFRAME_SET_AUTOCONFIG:
   1101 		d = rf_set_autoconfig(raidPtr, *(int *) data);
   1102 		printf("raid%d: New autoconfig value is: %d\n",
   1103 		       raidPtr->raidid, d);
   1104 		*(int *) data = d;
   1105 		return (retcode);
   1106 
   1107 	case RAIDFRAME_SET_ROOT:
   1108 		d = rf_set_rootpartition(raidPtr, *(int *) data);
   1109 		printf("raid%d: New rootpartition value is: %d\n",
   1110 		       raidPtr->raidid, d);
   1111 		*(int *) data = d;
   1112 		return (retcode);
   1113 
   1114 		/* initialize all parity */
   1115 	case RAIDFRAME_REWRITEPARITY:
   1116 
   1117 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1118 			/* Parity for RAID 0 is trivially correct */
   1119 			raidPtr->parity_good = RF_RAID_CLEAN;
   1120 			return(0);
   1121 		}
   1122 
   1123 		if (raidPtr->parity_rewrite_in_progress == 1) {
   1124 			/* Re-write is already in progress! */
   1125 			return(EINVAL);
   1126 		}
   1127 
   1128 		retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
   1129 					   rf_RewriteParityThread,
   1130 					   raidPtr,"raid_parity");
   1131 		return (retcode);
   1132 
   1133 
   1134 	case RAIDFRAME_ADD_HOT_SPARE:
   1135 		sparePtr = (RF_SingleComponent_t *) data;
   1136 		memcpy( &hot_spare, sparePtr, sizeof(RF_SingleComponent_t));
   1137 		retcode = rf_add_hot_spare(raidPtr, &hot_spare);
   1138 		return(retcode);
   1139 
   1140 	case RAIDFRAME_REMOVE_HOT_SPARE:
   1141 		return(retcode);
   1142 
   1143 	case RAIDFRAME_DELETE_COMPONENT:
   1144 		componentPtr = (RF_SingleComponent_t *)data;
   1145 		memcpy( &component, componentPtr,
   1146 			sizeof(RF_SingleComponent_t));
   1147 		retcode = rf_delete_component(raidPtr, &component);
   1148 		return(retcode);
   1149 
   1150 	case RAIDFRAME_INCORPORATE_HOT_SPARE:
   1151 		componentPtr = (RF_SingleComponent_t *)data;
   1152 		memcpy( &component, componentPtr,
   1153 			sizeof(RF_SingleComponent_t));
   1154 		retcode = rf_incorporate_hot_spare(raidPtr, &component);
   1155 		return(retcode);
   1156 
   1157 	case RAIDFRAME_REBUILD_IN_PLACE:
   1158 
   1159 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1160 			/* Can't do this on a RAID 0!! */
   1161 			return(EINVAL);
   1162 		}
   1163 
   1164 		if (raidPtr->recon_in_progress == 1) {
   1165 			/* a reconstruct is already in progress! */
   1166 			return(EINVAL);
   1167 		}
   1168 
   1169 		componentPtr = (RF_SingleComponent_t *) data;
   1170 		memcpy( &component, componentPtr,
   1171 			sizeof(RF_SingleComponent_t));
   1172 		row = component.row;
   1173 		column = component.column;
   1174 		printf("raid%d: Rebuild: %d %d\n", raidPtr->raidid,
   1175 		       row, column);
   1176 		if ((row < 0) || (row >= raidPtr->numRow) ||
   1177 		    (column < 0) || (column >= raidPtr->numCol)) {
   1178 			return(EINVAL);
   1179 		}
   1180 
   1181 		RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
   1182 		if (rrcopy == NULL)
   1183 			return(ENOMEM);
   1184 
   1185 		rrcopy->raidPtr = (void *) raidPtr;
   1186 		rrcopy->row = row;
   1187 		rrcopy->col = column;
   1188 
   1189 		retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
   1190 					   rf_ReconstructInPlaceThread,
   1191 					   rrcopy,"raid_reconip");
   1192 		return(retcode);
   1193 
   1194 	case RAIDFRAME_GET_INFO:
   1195 		if (!raidPtr->valid)
   1196 			return (ENODEV);
   1197 		ucfgp = (RF_DeviceConfig_t **) data;
   1198 		RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t),
   1199 			  (RF_DeviceConfig_t *));
   1200 		if (d_cfg == NULL)
   1201 			return (ENOMEM);
   1202 		memset((char *) d_cfg, 0, sizeof(RF_DeviceConfig_t));
   1203 		d_cfg->rows = raidPtr->numRow;
   1204 		d_cfg->cols = raidPtr->numCol;
   1205 		d_cfg->ndevs = raidPtr->numRow * raidPtr->numCol;
   1206 		if (d_cfg->ndevs >= RF_MAX_DISKS) {
   1207 			RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
   1208 			return (ENOMEM);
   1209 		}
   1210 		d_cfg->nspares = raidPtr->numSpare;
   1211 		if (d_cfg->nspares >= RF_MAX_DISKS) {
   1212 			RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
   1213 			return (ENOMEM);
   1214 		}
   1215 		d_cfg->maxqdepth = raidPtr->maxQueueDepth;
   1216 		d = 0;
   1217 		for (i = 0; i < d_cfg->rows; i++) {
   1218 			for (j = 0; j < d_cfg->cols; j++) {
   1219 				d_cfg->devs[d] = raidPtr->Disks[i][j];
   1220 				d++;
   1221 			}
   1222 		}
   1223 		for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) {
   1224 			d_cfg->spares[i] = raidPtr->Disks[0][j];
   1225 		}
   1226 		retcode = copyout((caddr_t) d_cfg, (caddr_t) * ucfgp,
   1227 				  sizeof(RF_DeviceConfig_t));
   1228 		RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
   1229 
   1230 		return (retcode);
   1231 
   1232 	case RAIDFRAME_CHECK_PARITY:
   1233 		*(int *) data = raidPtr->parity_good;
   1234 		return (0);
   1235 
   1236 	case RAIDFRAME_RESET_ACCTOTALS:
   1237 		memset(&raidPtr->acc_totals, 0, sizeof(raidPtr->acc_totals));
   1238 		return (0);
   1239 
   1240 	case RAIDFRAME_GET_ACCTOTALS:
   1241 		totals = (RF_AccTotals_t *) data;
   1242 		*totals = raidPtr->acc_totals;
   1243 		return (0);
   1244 
   1245 	case RAIDFRAME_KEEP_ACCTOTALS:
   1246 		raidPtr->keep_acc_totals = *(int *)data;
   1247 		return (0);
   1248 
   1249 	case RAIDFRAME_GET_SIZE:
   1250 		*(int *) data = raidPtr->totalSectors;
   1251 		return (0);
   1252 
   1253 		/* fail a disk & optionally start reconstruction */
   1254 	case RAIDFRAME_FAIL_DISK:
   1255 
   1256 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1257 			/* Can't do this on a RAID 0!! */
   1258 			return(EINVAL);
   1259 		}
   1260 
   1261 		rr = (struct rf_recon_req *) data;
   1262 
   1263 		if (rr->row < 0 || rr->row >= raidPtr->numRow
   1264 		    || rr->col < 0 || rr->col >= raidPtr->numCol)
   1265 			return (EINVAL);
   1266 
   1267 		printf("raid%d: Failing the disk: row: %d col: %d\n",
   1268 		       unit, rr->row, rr->col);
   1269 
   1270 		/* make a copy of the recon request so that we don't rely on
   1271 		 * the user's buffer */
   1272 		RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
   1273 		if (rrcopy == NULL)
   1274 			return(ENOMEM);
   1275 		memcpy(rrcopy, rr, sizeof(*rr));
   1276 		rrcopy->raidPtr = (void *) raidPtr;
   1277 
   1278 		retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
   1279 					   rf_ReconThread,
   1280 					   rrcopy,"raid_recon");
   1281 		return (0);
   1282 
   1283 		/* invoke a copyback operation after recon on whatever disk
   1284 		 * needs it, if any */
   1285 	case RAIDFRAME_COPYBACK:
   1286 
   1287 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1288 			/* This makes no sense on a RAID 0!! */
   1289 			return(EINVAL);
   1290 		}
   1291 
   1292 		if (raidPtr->copyback_in_progress == 1) {
   1293 			/* Copyback is already in progress! */
   1294 			return(EINVAL);
   1295 		}
   1296 
   1297 		retcode = RF_CREATE_THREAD(raidPtr->copyback_thread,
   1298 					   rf_CopybackThread,
   1299 					   raidPtr,"raid_copyback");
   1300 		return (retcode);
   1301 
   1302 		/* return the percentage completion of reconstruction */
   1303 	case RAIDFRAME_CHECK_RECON_STATUS:
   1304 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1305 			/* This makes no sense on a RAID 0, so tell the
   1306 			   user it's done. */
   1307 			*(int *) data = 100;
   1308 			return(0);
   1309 		}
   1310 		row = 0; /* XXX we only consider a single row... */
   1311 		if (raidPtr->status[row] != rf_rs_reconstructing)
   1312 			*(int *) data = 100;
   1313 		else
   1314 			*(int *) data = raidPtr->reconControl[row]->percentComplete;
   1315 		return (0);
   1316 	case RAIDFRAME_CHECK_RECON_STATUS_EXT:
   1317 		progressInfoPtr = (RF_ProgressInfo_t **) data;
   1318 		row = 0; /* XXX we only consider a single row... */
   1319 		if (raidPtr->status[row] != rf_rs_reconstructing) {
   1320 			progressInfo.remaining = 0;
   1321 			progressInfo.completed = 100;
   1322 			progressInfo.total = 100;
   1323 		} else {
   1324 			progressInfo.total =
   1325 				raidPtr->reconControl[row]->numRUsTotal;
   1326 			progressInfo.completed =
   1327 				raidPtr->reconControl[row]->numRUsComplete;
   1328 			progressInfo.remaining = progressInfo.total -
   1329 				progressInfo.completed;
   1330 		}
   1331 		retcode = copyout((caddr_t) &progressInfo,
   1332 				  (caddr_t) *progressInfoPtr,
   1333 				  sizeof(RF_ProgressInfo_t));
   1334 		return (retcode);
   1335 
   1336 	case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
   1337 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1338 			/* This makes no sense on a RAID 0, so tell the
   1339 			   user it's done. */
   1340 			*(int *) data = 100;
   1341 			return(0);
   1342 		}
   1343 		if (raidPtr->parity_rewrite_in_progress == 1) {
   1344 			*(int *) data = 100 *
   1345 				raidPtr->parity_rewrite_stripes_done /
   1346 				raidPtr->Layout.numStripe;
   1347 		} else {
   1348 			*(int *) data = 100;
   1349 		}
   1350 		return (0);
   1351 
   1352 	case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
   1353 		progressInfoPtr = (RF_ProgressInfo_t **) data;
   1354 		if (raidPtr->parity_rewrite_in_progress == 1) {
   1355 			progressInfo.total = raidPtr->Layout.numStripe;
   1356 			progressInfo.completed =
   1357 				raidPtr->parity_rewrite_stripes_done;
   1358 			progressInfo.remaining = progressInfo.total -
   1359 				progressInfo.completed;
   1360 		} else {
   1361 			progressInfo.remaining = 0;
   1362 			progressInfo.completed = 100;
   1363 			progressInfo.total = 100;
   1364 		}
   1365 		retcode = copyout((caddr_t) &progressInfo,
   1366 				  (caddr_t) *progressInfoPtr,
   1367 				  sizeof(RF_ProgressInfo_t));
   1368 		return (retcode);
   1369 
   1370 	case RAIDFRAME_CHECK_COPYBACK_STATUS:
   1371 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1372 			/* This makes no sense on a RAID 0 */
   1373 			*(int *) data = 100;
   1374 			return(0);
   1375 		}
   1376 		if (raidPtr->copyback_in_progress == 1) {
   1377 			*(int *) data = 100 * raidPtr->copyback_stripes_done /
   1378 				raidPtr->Layout.numStripe;
   1379 		} else {
   1380 			*(int *) data = 100;
   1381 		}
   1382 		return (0);
   1383 
   1384 	case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
   1385 		progressInfoPtr = (RF_ProgressInfo_t **) data;
   1386 		if (raidPtr->copyback_in_progress == 1) {
   1387 			progressInfo.total = raidPtr->Layout.numStripe;
   1388 			progressInfo.completed =
   1389 				raidPtr->copyback_stripes_done;
   1390 			progressInfo.remaining = progressInfo.total -
   1391 				progressInfo.completed;
   1392 		} else {
   1393 			progressInfo.remaining = 0;
   1394 			progressInfo.completed = 100;
   1395 			progressInfo.total = 100;
   1396 		}
   1397 		retcode = copyout((caddr_t) &progressInfo,
   1398 				  (caddr_t) *progressInfoPtr,
   1399 				  sizeof(RF_ProgressInfo_t));
   1400 		return (retcode);
   1401 
   1402 		/* the sparetable daemon calls this to wait for the kernel to
   1403 		 * need a spare table. this ioctl does not return until a
   1404 		 * spare table is needed. XXX -- calling mpsleep here in the
   1405 		 * ioctl code is almost certainly wrong and evil. -- XXX XXX
   1406 		 * -- I should either compute the spare table in the kernel,
   1407 		 * or have a different -- XXX XXX -- interface (a different
   1408 		 * character device) for delivering the table     -- XXX */
   1409 #if 0
   1410 	case RAIDFRAME_SPARET_WAIT:
   1411 		RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1412 		while (!rf_sparet_wait_queue)
   1413 			mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE);
   1414 		waitreq = rf_sparet_wait_queue;
   1415 		rf_sparet_wait_queue = rf_sparet_wait_queue->next;
   1416 		RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1417 
   1418 		/* structure assignment */
   1419 		*((RF_SparetWait_t *) data) = *waitreq;
   1420 
   1421 		RF_Free(waitreq, sizeof(*waitreq));
   1422 		return (0);
   1423 
   1424 		/* wakes up a process waiting on SPARET_WAIT and puts an error
   1425 		 * code in it that will cause the dameon to exit */
   1426 	case RAIDFRAME_ABORT_SPARET_WAIT:
   1427 		RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
   1428 		waitreq->fcol = -1;
   1429 		RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1430 		waitreq->next = rf_sparet_wait_queue;
   1431 		rf_sparet_wait_queue = waitreq;
   1432 		RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1433 		wakeup(&rf_sparet_wait_queue);
   1434 		return (0);
   1435 
   1436 		/* used by the spare table daemon to deliver a spare table
   1437 		 * into the kernel */
   1438 	case RAIDFRAME_SEND_SPARET:
   1439 
   1440 		/* install the spare table */
   1441 		retcode = rf_SetSpareTable(raidPtr, *(void **) data);
   1442 
   1443 		/* respond to the requestor.  the return status of the spare
   1444 		 * table installation is passed in the "fcol" field */
   1445 		RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
   1446 		waitreq->fcol = retcode;
   1447 		RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1448 		waitreq->next = rf_sparet_resp_queue;
   1449 		rf_sparet_resp_queue = waitreq;
   1450 		wakeup(&rf_sparet_resp_queue);
   1451 		RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1452 
   1453 		return (retcode);
   1454 #endif
   1455 
   1456 	default:
   1457 		break; /* fall through to the os-specific code below */
   1458 
   1459 	}
   1460 
   1461 	if (!raidPtr->valid)
   1462 		return (EINVAL);
   1463 
   1464 	/*
   1465 	 * Add support for "regular" device ioctls here.
   1466 	 */
   1467 
   1468 	switch (cmd) {
   1469 	case DIOCGDINFO:
   1470 		*(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
   1471 		break;
   1472 #ifdef __HAVE_OLD_DISKLABEL
   1473 	case ODIOCGDINFO:
   1474 		newlabel = *(rs->sc_dkdev.dk_label);
   1475 		if (newlabel.d_npartitions > OLDMAXPARTITIONS)
   1476 			return ENOTTY;
   1477 		memcpy(data, &newlabel, sizeof (struct olddisklabel));
   1478 		break;
   1479 #endif
   1480 
   1481 	case DIOCGPART:
   1482 		((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
   1483 		((struct partinfo *) data)->part =
   1484 		    &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
   1485 		break;
   1486 
   1487 	case DIOCWDINFO:
   1488 	case DIOCSDINFO:
   1489 #ifdef __HAVE_OLD_DISKLABEL
   1490 	case ODIOCWDINFO:
   1491 	case ODIOCSDINFO:
   1492 #endif
   1493 	{
   1494 		struct disklabel *lp;
   1495 #ifdef __HAVE_OLD_DISKLABEL
   1496 		if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) {
   1497 			memset(&newlabel, 0, sizeof newlabel);
   1498 			memcpy(&newlabel, data, sizeof (struct olddisklabel));
   1499 			lp = &newlabel;
   1500 		} else
   1501 #endif
   1502 		lp = (struct disklabel *)data;
   1503 
   1504 		if ((error = raidlock(rs)) != 0)
   1505 			return (error);
   1506 
   1507 		rs->sc_flags |= RAIDF_LABELLING;
   1508 
   1509 		error = setdisklabel(rs->sc_dkdev.dk_label,
   1510 		    lp, 0, rs->sc_dkdev.dk_cpulabel);
   1511 		if (error == 0) {
   1512 			if (cmd == DIOCWDINFO
   1513 #ifdef __HAVE_OLD_DISKLABEL
   1514 			    || cmd == ODIOCWDINFO
   1515 #endif
   1516 			   )
   1517 				error = writedisklabel(RAIDLABELDEV(dev),
   1518 				    raidstrategy, rs->sc_dkdev.dk_label,
   1519 				    rs->sc_dkdev.dk_cpulabel);
   1520 		}
   1521 		rs->sc_flags &= ~RAIDF_LABELLING;
   1522 
   1523 		raidunlock(rs);
   1524 
   1525 		if (error)
   1526 			return (error);
   1527 		break;
   1528 	}
   1529 
   1530 	case DIOCWLABEL:
   1531 		if (*(int *) data != 0)
   1532 			rs->sc_flags |= RAIDF_WLABEL;
   1533 		else
   1534 			rs->sc_flags &= ~RAIDF_WLABEL;
   1535 		break;
   1536 
   1537 	case DIOCGDEFLABEL:
   1538 		raidgetdefaultlabel(raidPtr, rs, (struct disklabel *) data);
   1539 		break;
   1540 
   1541 #ifdef __HAVE_OLD_DISKLABEL
   1542 	case ODIOCGDEFLABEL:
   1543 		raidgetdefaultlabel(raidPtr, rs, &newlabel);
   1544 		if (newlabel.d_npartitions > OLDMAXPARTITIONS)
   1545 			return ENOTTY;
   1546 		memcpy(data, &newlabel, sizeof (struct olddisklabel));
   1547 		break;
   1548 #endif
   1549 
   1550 	default:
   1551 		retcode = ENOTTY;
   1552 	}
   1553 	return (retcode);
   1554 
   1555 }
   1556 
   1557 
   1558 /* raidinit -- complete the rest of the initialization for the
   1559    RAIDframe device.  */
   1560 
   1561 
   1562 static void
   1563 raidinit(raidPtr)
   1564 	RF_Raid_t *raidPtr;
   1565 {
   1566 	struct raid_softc *rs;
   1567 	int     unit;
   1568 
   1569 	unit = raidPtr->raidid;
   1570 
   1571 	rs = &raid_softc[unit];
   1572 
   1573 	/* XXX should check return code first... */
   1574 	rs->sc_flags |= RAIDF_INITED;
   1575 
   1576 	sprintf(rs->sc_xname, "raid%d", unit);	/* XXX doesn't check bounds. */
   1577 
   1578 	rs->sc_dkdev.dk_name = rs->sc_xname;
   1579 
   1580 	/* disk_attach actually creates space for the CPU disklabel, among
   1581 	 * other things, so it's critical to call this *BEFORE* we try putzing
   1582 	 * with disklabels. */
   1583 
   1584 	disk_attach(&rs->sc_dkdev);
   1585 
   1586 	/* XXX There may be a weird interaction here between this, and
   1587 	 * protectedSectors, as used in RAIDframe.  */
   1588 
   1589 	rs->sc_size = raidPtr->totalSectors;
   1590 
   1591 }
   1592 
   1593 /* wake up the daemon & tell it to get us a spare table
   1594  * XXX
   1595  * the entries in the queues should be tagged with the raidPtr
   1596  * so that in the extremely rare case that two recons happen at once,
   1597  * we know for which device were requesting a spare table
   1598  * XXX
   1599  *
   1600  * XXX This code is not currently used. GO
   1601  */
   1602 int
   1603 rf_GetSpareTableFromDaemon(req)
   1604 	RF_SparetWait_t *req;
   1605 {
   1606 	int     retcode;
   1607 
   1608 	RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1609 	req->next = rf_sparet_wait_queue;
   1610 	rf_sparet_wait_queue = req;
   1611 	wakeup(&rf_sparet_wait_queue);
   1612 
   1613 	/* mpsleep unlocks the mutex */
   1614 	while (!rf_sparet_resp_queue) {
   1615 		tsleep(&rf_sparet_resp_queue, PRIBIO,
   1616 		    "raidframe getsparetable", 0);
   1617 	}
   1618 	req = rf_sparet_resp_queue;
   1619 	rf_sparet_resp_queue = req->next;
   1620 	RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1621 
   1622 	retcode = req->fcol;
   1623 	RF_Free(req, sizeof(*req));	/* this is not the same req as we
   1624 					 * alloc'd */
   1625 	return (retcode);
   1626 }
   1627 
   1628 /* a wrapper around rf_DoAccess that extracts appropriate info from the
   1629  * bp & passes it down.
   1630  * any calls originating in the kernel must use non-blocking I/O
   1631  * do some extra sanity checking to return "appropriate" error values for
   1632  * certain conditions (to make some standard utilities work)
   1633  *
   1634  * Formerly known as: rf_DoAccessKernel
   1635  */
   1636 void
   1637 raidstart(raidPtr)
   1638 	RF_Raid_t *raidPtr;
   1639 {
   1640 	RF_SectorCount_t num_blocks, pb, sum;
   1641 	RF_RaidAddr_t raid_addr;
   1642 	int     retcode;
   1643 	struct partition *pp;
   1644 	daddr_t blocknum;
   1645 	int     unit;
   1646 	struct raid_softc *rs;
   1647 	int     do_async;
   1648 	struct buf *bp;
   1649 
   1650 	unit = raidPtr->raidid;
   1651 	rs = &raid_softc[unit];
   1652 
   1653 	/* quick check to see if anything has died recently */
   1654 	RF_LOCK_MUTEX(raidPtr->mutex);
   1655 	if (raidPtr->numNewFailures > 0) {
   1656 		rf_update_component_labels(raidPtr,
   1657 					   RF_NORMAL_COMPONENT_UPDATE);
   1658 		raidPtr->numNewFailures--;
   1659 	}
   1660 
   1661 	/* Check to see if we're at the limit... */
   1662 	while (raidPtr->openings > 0) {
   1663 		RF_UNLOCK_MUTEX(raidPtr->mutex);
   1664 
   1665 		/* get the next item, if any, from the queue */
   1666 		if ((bp = BUFQ_GET(&rs->buf_queue)) == NULL) {
   1667 			/* nothing more to do */
   1668 			return;
   1669 		}
   1670 
   1671 		/* Ok, for the bp we have here, bp->b_blkno is relative to the
   1672 		 * partition.. Need to make it absolute to the underlying
   1673 		 * device.. */
   1674 
   1675 		blocknum = bp->b_blkno;
   1676 		if (DISKPART(bp->b_dev) != RAW_PART) {
   1677 			pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
   1678 			blocknum += pp->p_offset;
   1679 		}
   1680 
   1681 		db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
   1682 			    (int) blocknum));
   1683 
   1684 		db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
   1685 		db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
   1686 
   1687 		/* *THIS* is where we adjust what block we're going to...
   1688 		 * but DO NOT TOUCH bp->b_blkno!!! */
   1689 		raid_addr = blocknum;
   1690 
   1691 		num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
   1692 		pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
   1693 		sum = raid_addr + num_blocks + pb;
   1694 		if (1 || rf_debugKernelAccess) {
   1695 			db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
   1696 				    (int) raid_addr, (int) sum, (int) num_blocks,
   1697 				    (int) pb, (int) bp->b_resid));
   1698 		}
   1699 		if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
   1700 		    || (sum < num_blocks) || (sum < pb)) {
   1701 			bp->b_error = ENOSPC;
   1702 			bp->b_flags |= B_ERROR;
   1703 			bp->b_resid = bp->b_bcount;
   1704 			biodone(bp);
   1705 			RF_LOCK_MUTEX(raidPtr->mutex);
   1706 			continue;
   1707 		}
   1708 		/*
   1709 		 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
   1710 		 */
   1711 
   1712 		if (bp->b_bcount & raidPtr->sectorMask) {
   1713 			bp->b_error = EINVAL;
   1714 			bp->b_flags |= B_ERROR;
   1715 			bp->b_resid = bp->b_bcount;
   1716 			biodone(bp);
   1717 			RF_LOCK_MUTEX(raidPtr->mutex);
   1718 			continue;
   1719 
   1720 		}
   1721 		db1_printf(("Calling DoAccess..\n"));
   1722 
   1723 
   1724 		RF_LOCK_MUTEX(raidPtr->mutex);
   1725 		raidPtr->openings--;
   1726 		RF_UNLOCK_MUTEX(raidPtr->mutex);
   1727 
   1728 		/*
   1729 		 * Everything is async.
   1730 		 */
   1731 		do_async = 1;
   1732 
   1733 		disk_busy(&rs->sc_dkdev);
   1734 
   1735 		/* XXX we're still at splbio() here... do we *really*
   1736 		   need to be? */
   1737 
   1738 		/* don't ever condition on bp->b_flags & B_WRITE.
   1739 		 * always condition on B_READ instead */
   1740 
   1741 		retcode = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
   1742 				      RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
   1743 				      do_async, raid_addr, num_blocks,
   1744 				      bp->b_data, bp, RF_DAG_NONBLOCKING_IO);
   1745 
   1746 		RF_LOCK_MUTEX(raidPtr->mutex);
   1747 	}
   1748 	RF_UNLOCK_MUTEX(raidPtr->mutex);
   1749 }
   1750 
   1751 
   1752 
   1753 
   1754 /* invoke an I/O from kernel mode.  Disk queue should be locked upon entry */
   1755 
   1756 int
   1757 rf_DispatchKernelIO(queue, req)
   1758 	RF_DiskQueue_t *queue;
   1759 	RF_DiskQueueData_t *req;
   1760 {
   1761 	int     op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
   1762 	struct buf *bp;
   1763 	struct raidbuf *raidbp = NULL;
   1764 
   1765 	req->queue = queue;
   1766 
   1767 #if DIAGNOSTIC
   1768 	if (queue->raidPtr->raidid >= numraid) {
   1769 		printf("Invalid unit number: %d %d\n", unit, numraid);
   1770 		panic("Invalid Unit number in rf_DispatchKernelIO\n");
   1771 	}
   1772 #endif
   1773 
   1774 	bp = req->bp;
   1775 #if 1
   1776 	/* XXX when there is a physical disk failure, someone is passing us a
   1777 	 * buffer that contains old stuff!!  Attempt to deal with this problem
   1778 	 * without taking a performance hit... (not sure where the real bug
   1779 	 * is.  It's buried in RAIDframe somewhere) :-(  GO ) */
   1780 
   1781 	if (bp->b_flags & B_ERROR) {
   1782 		bp->b_flags &= ~B_ERROR;
   1783 	}
   1784 	if (bp->b_error != 0) {
   1785 		bp->b_error = 0;
   1786 	}
   1787 #endif
   1788 	raidbp = pool_get(&raidframe_cbufpool, PR_NOWAIT);
   1789 
   1790 	/*
   1791 	 * context for raidiodone
   1792 	 */
   1793 	raidbp->rf_obp = bp;
   1794 	raidbp->req = req;
   1795 
   1796 	LIST_INIT(&raidbp->rf_buf.b_dep);
   1797 
   1798 	switch (req->type) {
   1799 	case RF_IO_TYPE_NOP:	/* used primarily to unlock a locked queue */
   1800 		/* XXX need to do something extra here.. */
   1801 		/* I'm leaving this in, as I've never actually seen it used,
   1802 		 * and I'd like folks to report it... GO */
   1803 		printf(("WAKEUP CALLED\n"));
   1804 		queue->numOutstanding++;
   1805 
   1806 		/* XXX need to glue the original buffer into this??  */
   1807 
   1808 		KernelWakeupFunc(&raidbp->rf_buf);
   1809 		break;
   1810 
   1811 	case RF_IO_TYPE_READ:
   1812 	case RF_IO_TYPE_WRITE:
   1813 
   1814 		if (req->tracerec) {
   1815 			RF_ETIMER_START(req->tracerec->timer);
   1816 		}
   1817 		InitBP(&raidbp->rf_buf, queue->rf_cinfo->ci_vp,
   1818 		    op | bp->b_flags, queue->rf_cinfo->ci_dev,
   1819 		    req->sectorOffset, req->numSector,
   1820 		    req->buf, KernelWakeupFunc, (void *) req,
   1821 		    queue->raidPtr->logBytesPerSector, req->b_proc);
   1822 
   1823 		if (rf_debugKernelAccess) {
   1824 			db1_printf(("dispatch: bp->b_blkno = %ld\n",
   1825 				(long) bp->b_blkno));
   1826 		}
   1827 		queue->numOutstanding++;
   1828 		queue->last_deq_sector = req->sectorOffset;
   1829 		/* acc wouldn't have been let in if there were any pending
   1830 		 * reqs at any other priority */
   1831 		queue->curPriority = req->priority;
   1832 
   1833 		db1_printf(("Going for %c to unit %d row %d col %d\n",
   1834 			    req->type, queue->raidPtr->raidid,
   1835 			    queue->row, queue->col));
   1836 		db1_printf(("sector %d count %d (%d bytes) %d\n",
   1837 			(int) req->sectorOffset, (int) req->numSector,
   1838 			(int) (req->numSector <<
   1839 			    queue->raidPtr->logBytesPerSector),
   1840 			(int) queue->raidPtr->logBytesPerSector));
   1841 		if ((raidbp->rf_buf.b_flags & B_READ) == 0) {
   1842 			raidbp->rf_buf.b_vp->v_numoutput++;
   1843 		}
   1844 		VOP_STRATEGY(&raidbp->rf_buf);
   1845 
   1846 		break;
   1847 
   1848 	default:
   1849 		panic("bad req->type in rf_DispatchKernelIO");
   1850 	}
   1851 	db1_printf(("Exiting from DispatchKernelIO\n"));
   1852 
   1853 	return (0);
   1854 }
   1855 /* this is the callback function associated with a I/O invoked from
   1856    kernel code.
   1857  */
   1858 static void
   1859 KernelWakeupFunc(vbp)
   1860 	struct buf *vbp;
   1861 {
   1862 	RF_DiskQueueData_t *req = NULL;
   1863 	RF_DiskQueue_t *queue;
   1864 	struct raidbuf *raidbp = (struct raidbuf *) vbp;
   1865 	struct buf *bp;
   1866 	int s;
   1867 
   1868 	s = splbio();
   1869 	db1_printf(("recovering the request queue:\n"));
   1870 	req = raidbp->req;
   1871 
   1872 	bp = raidbp->rf_obp;
   1873 
   1874 	queue = (RF_DiskQueue_t *) req->queue;
   1875 
   1876 	if (raidbp->rf_buf.b_flags & B_ERROR) {
   1877 		bp->b_flags |= B_ERROR;
   1878 		bp->b_error = raidbp->rf_buf.b_error ?
   1879 		    raidbp->rf_buf.b_error : EIO;
   1880 	}
   1881 
   1882 	/* XXX methinks this could be wrong... */
   1883 #if 1
   1884 	bp->b_resid = raidbp->rf_buf.b_resid;
   1885 #endif
   1886 
   1887 	if (req->tracerec) {
   1888 		RF_ETIMER_STOP(req->tracerec->timer);
   1889 		RF_ETIMER_EVAL(req->tracerec->timer);
   1890 		RF_LOCK_MUTEX(rf_tracing_mutex);
   1891 		req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
   1892 		req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
   1893 		req->tracerec->num_phys_ios++;
   1894 		RF_UNLOCK_MUTEX(rf_tracing_mutex);
   1895 	}
   1896 	bp->b_bcount = raidbp->rf_buf.b_bcount;	/* XXXX ?? */
   1897 
   1898 	/* XXX Ok, let's get aggressive... If B_ERROR is set, let's go
   1899 	 * ballistic, and mark the component as hosed... */
   1900 
   1901 	if (bp->b_flags & B_ERROR) {
   1902 		/* Mark the disk as dead */
   1903 		/* but only mark it once... */
   1904 		if (queue->raidPtr->Disks[queue->row][queue->col].status ==
   1905 		    rf_ds_optimal) {
   1906 			printf("raid%d: IO Error.  Marking %s as failed.\n",
   1907 			       queue->raidPtr->raidid,
   1908 			       queue->raidPtr->Disks[queue->row][queue->col].devname);
   1909 			queue->raidPtr->Disks[queue->row][queue->col].status =
   1910 			    rf_ds_failed;
   1911 			queue->raidPtr->status[queue->row] = rf_rs_degraded;
   1912 			queue->raidPtr->numFailures++;
   1913 			queue->raidPtr->numNewFailures++;
   1914 		} else {	/* Disk is already dead... */
   1915 			/* printf("Disk already marked as dead!\n"); */
   1916 		}
   1917 
   1918 	}
   1919 
   1920 	pool_put(&raidframe_cbufpool, raidbp);
   1921 
   1922 	rf_DiskIOComplete(queue, req, (bp->b_flags & B_ERROR) ? 1 : 0);
   1923 	(req->CompleteFunc) (req->argument, (bp->b_flags & B_ERROR) ? 1 : 0);
   1924 
   1925 	splx(s);
   1926 }
   1927 
   1928 
   1929 
   1930 /*
   1931  * initialize a buf structure for doing an I/O in the kernel.
   1932  */
   1933 static void
   1934 InitBP(bp, b_vp, rw_flag, dev, startSect, numSect, buf, cbFunc, cbArg,
   1935        logBytesPerSector, b_proc)
   1936 	struct buf *bp;
   1937 	struct vnode *b_vp;
   1938 	unsigned rw_flag;
   1939 	dev_t dev;
   1940 	RF_SectorNum_t startSect;
   1941 	RF_SectorCount_t numSect;
   1942 	caddr_t buf;
   1943 	void (*cbFunc) (struct buf *);
   1944 	void *cbArg;
   1945 	int logBytesPerSector;
   1946 	struct proc *b_proc;
   1947 {
   1948 	/* bp->b_flags       = B_PHYS | rw_flag; */
   1949 	bp->b_flags = B_CALL | rw_flag;	/* XXX need B_PHYS here too??? */
   1950 	bp->b_bcount = numSect << logBytesPerSector;
   1951 	bp->b_bufsize = bp->b_bcount;
   1952 	bp->b_error = 0;
   1953 	bp->b_dev = dev;
   1954 	bp->b_data = buf;
   1955 	bp->b_blkno = startSect;
   1956 	bp->b_resid = bp->b_bcount;	/* XXX is this right!??!?!! */
   1957 	if (bp->b_bcount == 0) {
   1958 		panic("bp->b_bcount is zero in InitBP!!\n");
   1959 	}
   1960 	bp->b_proc = b_proc;
   1961 	bp->b_iodone = cbFunc;
   1962 	bp->b_vp = b_vp;
   1963 
   1964 }
   1965 
   1966 static void
   1967 raidgetdefaultlabel(raidPtr, rs, lp)
   1968 	RF_Raid_t *raidPtr;
   1969 	struct raid_softc *rs;
   1970 	struct disklabel *lp;
   1971 {
   1972 	db1_printf(("Building a default label...\n"));
   1973 	memset(lp, 0, sizeof(*lp));
   1974 
   1975 	/* fabricate a label... */
   1976 	lp->d_secperunit = raidPtr->totalSectors;
   1977 	lp->d_secsize = raidPtr->bytesPerSector;
   1978 	lp->d_nsectors = raidPtr->Layout.dataSectorsPerStripe;
   1979 	lp->d_ntracks = 4 * raidPtr->numCol;
   1980 	lp->d_ncylinders = raidPtr->totalSectors /
   1981 		(lp->d_nsectors * lp->d_ntracks);
   1982 	lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
   1983 
   1984 	strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
   1985 	lp->d_type = DTYPE_RAID;
   1986 	strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
   1987 	lp->d_rpm = 3600;
   1988 	lp->d_interleave = 1;
   1989 	lp->d_flags = 0;
   1990 
   1991 	lp->d_partitions[RAW_PART].p_offset = 0;
   1992 	lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors;
   1993 	lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
   1994 	lp->d_npartitions = RAW_PART + 1;
   1995 
   1996 	lp->d_magic = DISKMAGIC;
   1997 	lp->d_magic2 = DISKMAGIC;
   1998 	lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
   1999 
   2000 }
   2001 /*
   2002  * Read the disklabel from the raid device.  If one is not present, fake one
   2003  * up.
   2004  */
   2005 static void
   2006 raidgetdisklabel(dev)
   2007 	dev_t   dev;
   2008 {
   2009 	int     unit = raidunit(dev);
   2010 	struct raid_softc *rs = &raid_softc[unit];
   2011 	char   *errstring;
   2012 	struct disklabel *lp = rs->sc_dkdev.dk_label;
   2013 	struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel;
   2014 	RF_Raid_t *raidPtr;
   2015 
   2016 	db1_printf(("Getting the disklabel...\n"));
   2017 
   2018 	memset(clp, 0, sizeof(*clp));
   2019 
   2020 	raidPtr = raidPtrs[unit];
   2021 
   2022 	raidgetdefaultlabel(raidPtr, rs, lp);
   2023 
   2024 	/*
   2025 	 * Call the generic disklabel extraction routine.
   2026 	 */
   2027 	errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
   2028 	    rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
   2029 	if (errstring)
   2030 		raidmakedisklabel(rs);
   2031 	else {
   2032 		int     i;
   2033 		struct partition *pp;
   2034 
   2035 		/*
   2036 		 * Sanity check whether the found disklabel is valid.
   2037 		 *
   2038 		 * This is necessary since total size of the raid device
   2039 		 * may vary when an interleave is changed even though exactly
   2040 		 * same componets are used, and old disklabel may used
   2041 		 * if that is found.
   2042 		 */
   2043 		if (lp->d_secperunit != rs->sc_size)
   2044 			printf("raid%d: WARNING: %s: "
   2045 			    "total sector size in disklabel (%d) != "
   2046 			    "the size of raid (%ld)\n", unit, rs->sc_xname,
   2047 			    lp->d_secperunit, (long) rs->sc_size);
   2048 		for (i = 0; i < lp->d_npartitions; i++) {
   2049 			pp = &lp->d_partitions[i];
   2050 			if (pp->p_offset + pp->p_size > rs->sc_size)
   2051 				printf("raid%d: WARNING: %s: end of partition `%c' "
   2052 				       "exceeds the size of raid (%ld)\n",
   2053 				       unit, rs->sc_xname, 'a' + i, (long) rs->sc_size);
   2054 		}
   2055 	}
   2056 
   2057 }
   2058 /*
   2059  * Take care of things one might want to take care of in the event
   2060  * that a disklabel isn't present.
   2061  */
   2062 static void
   2063 raidmakedisklabel(rs)
   2064 	struct raid_softc *rs;
   2065 {
   2066 	struct disklabel *lp = rs->sc_dkdev.dk_label;
   2067 	db1_printf(("Making a label..\n"));
   2068 
   2069 	/*
   2070 	 * For historical reasons, if there's no disklabel present
   2071 	 * the raw partition must be marked FS_BSDFFS.
   2072 	 */
   2073 
   2074 	lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
   2075 
   2076 	strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
   2077 
   2078 	lp->d_checksum = dkcksum(lp);
   2079 }
   2080 /*
   2081  * Lookup the provided name in the filesystem.  If the file exists,
   2082  * is a valid block device, and isn't being used by anyone else,
   2083  * set *vpp to the file's vnode.
   2084  * You'll find the original of this in ccd.c
   2085  */
   2086 int
   2087 raidlookup(path, p, vpp)
   2088 	char   *path;
   2089 	struct proc *p;
   2090 	struct vnode **vpp;	/* result */
   2091 {
   2092 	struct nameidata nd;
   2093 	struct vnode *vp;
   2094 	struct vattr va;
   2095 	int     error;
   2096 
   2097 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, path, p);
   2098 	if ((error = vn_open(&nd, FREAD | FWRITE, 0)) != 0) {
   2099 #if 0
   2100 		printf("RAIDframe: vn_open returned %d\n", error);
   2101 #endif
   2102 		return (error);
   2103 	}
   2104 	vp = nd.ni_vp;
   2105 	if (vp->v_usecount > 1) {
   2106 		VOP_UNLOCK(vp, 0);
   2107 		(void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
   2108 		return (EBUSY);
   2109 	}
   2110 	if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)) != 0) {
   2111 		VOP_UNLOCK(vp, 0);
   2112 		(void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
   2113 		return (error);
   2114 	}
   2115 	/* XXX: eventually we should handle VREG, too. */
   2116 	if (va.va_type != VBLK) {
   2117 		VOP_UNLOCK(vp, 0);
   2118 		(void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
   2119 		return (ENOTBLK);
   2120 	}
   2121 	VOP_UNLOCK(vp, 0);
   2122 	*vpp = vp;
   2123 	return (0);
   2124 }
   2125 /*
   2126  * Wait interruptibly for an exclusive lock.
   2127  *
   2128  * XXX
   2129  * Several drivers do this; it should be abstracted and made MP-safe.
   2130  * (Hmm... where have we seen this warning before :->  GO )
   2131  */
   2132 static int
   2133 raidlock(rs)
   2134 	struct raid_softc *rs;
   2135 {
   2136 	int     error;
   2137 
   2138 	while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
   2139 		rs->sc_flags |= RAIDF_WANTED;
   2140 		if ((error =
   2141 			tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
   2142 			return (error);
   2143 	}
   2144 	rs->sc_flags |= RAIDF_LOCKED;
   2145 	return (0);
   2146 }
   2147 /*
   2148  * Unlock and wake up any waiters.
   2149  */
   2150 static void
   2151 raidunlock(rs)
   2152 	struct raid_softc *rs;
   2153 {
   2154 
   2155 	rs->sc_flags &= ~RAIDF_LOCKED;
   2156 	if ((rs->sc_flags & RAIDF_WANTED) != 0) {
   2157 		rs->sc_flags &= ~RAIDF_WANTED;
   2158 		wakeup(rs);
   2159 	}
   2160 }
   2161 
   2162 
   2163 #define RF_COMPONENT_INFO_OFFSET  16384 /* bytes */
   2164 #define RF_COMPONENT_INFO_SIZE     1024 /* bytes */
   2165 
   2166 int
   2167 raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter)
   2168 {
   2169 	RF_ComponentLabel_t clabel;
   2170 	raidread_component_label(dev, b_vp, &clabel);
   2171 	clabel.mod_counter = mod_counter;
   2172 	clabel.clean = RF_RAID_CLEAN;
   2173 	raidwrite_component_label(dev, b_vp, &clabel);
   2174 	return(0);
   2175 }
   2176 
   2177 
   2178 int
   2179 raidmarkdirty(dev_t dev, struct vnode *b_vp, int mod_counter)
   2180 {
   2181 	RF_ComponentLabel_t clabel;
   2182 	raidread_component_label(dev, b_vp, &clabel);
   2183 	clabel.mod_counter = mod_counter;
   2184 	clabel.clean = RF_RAID_DIRTY;
   2185 	raidwrite_component_label(dev, b_vp, &clabel);
   2186 	return(0);
   2187 }
   2188 
   2189 /* ARGSUSED */
   2190 int
   2191 raidread_component_label(dev, b_vp, clabel)
   2192 	dev_t dev;
   2193 	struct vnode *b_vp;
   2194 	RF_ComponentLabel_t *clabel;
   2195 {
   2196 	struct buf *bp;
   2197 	const struct bdevsw *bdev;
   2198 	int error;
   2199 
   2200 	/* XXX should probably ensure that we don't try to do this if
   2201 	   someone has changed rf_protected_sectors. */
   2202 
   2203 	if (b_vp == NULL) {
   2204 		/* For whatever reason, this component is not valid.
   2205 		   Don't try to read a component label from it. */
   2206 		return(EINVAL);
   2207 	}
   2208 
   2209 	/* get a block of the appropriate size... */
   2210 	bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
   2211 	bp->b_dev = dev;
   2212 
   2213 	/* get our ducks in a row for the read */
   2214 	bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
   2215 	bp->b_bcount = RF_COMPONENT_INFO_SIZE;
   2216 	bp->b_flags |= B_READ;
   2217  	bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
   2218 
   2219 	bdev = bdevsw_lookup(bp->b_dev);
   2220 	if (bdev == NULL)
   2221 		return (ENXIO);
   2222 	(*bdev->d_strategy)(bp);
   2223 
   2224 	error = biowait(bp);
   2225 
   2226 	if (!error) {
   2227 		memcpy(clabel, bp->b_data,
   2228 		       sizeof(RF_ComponentLabel_t));
   2229 #if 0
   2230 		rf_print_component_label( clabel );
   2231 #endif
   2232         } else {
   2233 #if 0
   2234 		printf("Failed to read RAID component label!\n");
   2235 #endif
   2236 	}
   2237 
   2238 	brelse(bp);
   2239 	return(error);
   2240 }
   2241 /* ARGSUSED */
   2242 int
   2243 raidwrite_component_label(dev, b_vp, clabel)
   2244 	dev_t dev;
   2245 	struct vnode *b_vp;
   2246 	RF_ComponentLabel_t *clabel;
   2247 {
   2248 	struct buf *bp;
   2249 	const struct bdevsw *bdev;
   2250 	int error;
   2251 
   2252 	/* get a block of the appropriate size... */
   2253 	bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
   2254 	bp->b_dev = dev;
   2255 
   2256 	/* get our ducks in a row for the write */
   2257 	bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
   2258 	bp->b_bcount = RF_COMPONENT_INFO_SIZE;
   2259 	bp->b_flags |= B_WRITE;
   2260  	bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
   2261 
   2262 	memset(bp->b_data, 0, RF_COMPONENT_INFO_SIZE );
   2263 
   2264 	memcpy(bp->b_data, clabel, sizeof(RF_ComponentLabel_t));
   2265 
   2266 	bdev = bdevsw_lookup(bp->b_dev);
   2267 	if (bdev == NULL)
   2268 		return (ENXIO);
   2269 	(*bdev->d_strategy)(bp);
   2270 	error = biowait(bp);
   2271 	brelse(bp);
   2272 	if (error) {
   2273 #if 1
   2274 		printf("Failed to write RAID component info!\n");
   2275 #endif
   2276 	}
   2277 
   2278 	return(error);
   2279 }
   2280 
   2281 void
   2282 rf_markalldirty(raidPtr)
   2283 	RF_Raid_t *raidPtr;
   2284 {
   2285 	RF_ComponentLabel_t clabel;
   2286 	int r,c;
   2287 
   2288 	raidPtr->mod_counter++;
   2289 	for (r = 0; r < raidPtr->numRow; r++) {
   2290 		for (c = 0; c < raidPtr->numCol; c++) {
   2291 			/* we don't want to touch (at all) a disk that has
   2292 			   failed */
   2293 			if (!RF_DEAD_DISK(raidPtr->Disks[r][c].status)) {
   2294 				raidread_component_label(
   2295 					raidPtr->Disks[r][c].dev,
   2296 					raidPtr->raid_cinfo[r][c].ci_vp,
   2297 					&clabel);
   2298 				if (clabel.status == rf_ds_spared) {
   2299 					/* XXX do something special...
   2300 					 but whatever you do, don't
   2301 					 try to access it!! */
   2302 				} else {
   2303 #if 0
   2304 				clabel.status =
   2305 					raidPtr->Disks[r][c].status;
   2306 				raidwrite_component_label(
   2307 					raidPtr->Disks[r][c].dev,
   2308 					raidPtr->raid_cinfo[r][c].ci_vp,
   2309 					&clabel);
   2310 #endif
   2311 				raidmarkdirty(
   2312 				       raidPtr->Disks[r][c].dev,
   2313 				       raidPtr->raid_cinfo[r][c].ci_vp,
   2314 				       raidPtr->mod_counter);
   2315 				}
   2316 			}
   2317 		}
   2318 	}
   2319 	/* printf("Component labels marked dirty.\n"); */
   2320 #if 0
   2321 	for( c = 0; c < raidPtr->numSpare ; c++) {
   2322 		sparecol = raidPtr->numCol + c;
   2323 		if (raidPtr->Disks[r][sparecol].status == rf_ds_used_spare) {
   2324 			/*
   2325 
   2326 			   XXX this is where we get fancy and map this spare
   2327 			   into it's correct spot in the array.
   2328 
   2329 			 */
   2330 			/*
   2331 
   2332 			   we claim this disk is "optimal" if it's
   2333 			   rf_ds_used_spare, as that means it should be
   2334 			   directly substitutable for the disk it replaced.
   2335 			   We note that too...
   2336 
   2337 			 */
   2338 
   2339 			for(i=0;i<raidPtr->numRow;i++) {
   2340 				for(j=0;j<raidPtr->numCol;j++) {
   2341 					if ((raidPtr->Disks[i][j].spareRow ==
   2342 					     r) &&
   2343 					    (raidPtr->Disks[i][j].spareCol ==
   2344 					     sparecol)) {
   2345 						srow = r;
   2346 						scol = sparecol;
   2347 						break;
   2348 					}
   2349 				}
   2350 			}
   2351 
   2352 			raidread_component_label(
   2353 				      raidPtr->Disks[r][sparecol].dev,
   2354 				      raidPtr->raid_cinfo[r][sparecol].ci_vp,
   2355 				      &clabel);
   2356 			/* make sure status is noted */
   2357 			clabel.version = RF_COMPONENT_LABEL_VERSION;
   2358 			clabel.mod_counter = raidPtr->mod_counter;
   2359 			clabel.serial_number = raidPtr->serial_number;
   2360 			clabel.row = srow;
   2361 			clabel.column = scol;
   2362 			clabel.num_rows = raidPtr->numRow;
   2363 			clabel.num_columns = raidPtr->numCol;
   2364 			clabel.clean = RF_RAID_DIRTY; /* changed in a bit*/
   2365 			clabel.status = rf_ds_optimal;
   2366 			raidwrite_component_label(
   2367 				      raidPtr->Disks[r][sparecol].dev,
   2368 				      raidPtr->raid_cinfo[r][sparecol].ci_vp,
   2369 				      &clabel);
   2370 			raidmarkclean( raidPtr->Disks[r][sparecol].dev,
   2371 			              raidPtr->raid_cinfo[r][sparecol].ci_vp);
   2372 		}
   2373 	}
   2374 
   2375 #endif
   2376 }
   2377 
   2378 
   2379 void
   2380 rf_update_component_labels(raidPtr, final)
   2381 	RF_Raid_t *raidPtr;
   2382 	int final;
   2383 {
   2384 	RF_ComponentLabel_t clabel;
   2385 	int sparecol;
   2386 	int r,c;
   2387 	int i,j;
   2388 	int srow, scol;
   2389 
   2390 	srow = -1;
   2391 	scol = -1;
   2392 
   2393 	/* XXX should do extra checks to make sure things really are clean,
   2394 	   rather than blindly setting the clean bit... */
   2395 
   2396 	raidPtr->mod_counter++;
   2397 
   2398 	for (r = 0; r < raidPtr->numRow; r++) {
   2399 		for (c = 0; c < raidPtr->numCol; c++) {
   2400 			if (raidPtr->Disks[r][c].status == rf_ds_optimal) {
   2401 				raidread_component_label(
   2402 					raidPtr->Disks[r][c].dev,
   2403 					raidPtr->raid_cinfo[r][c].ci_vp,
   2404 					&clabel);
   2405 				/* make sure status is noted */
   2406 				clabel.status = rf_ds_optimal;
   2407 				/* bump the counter */
   2408 				clabel.mod_counter = raidPtr->mod_counter;
   2409 
   2410 				raidwrite_component_label(
   2411 					raidPtr->Disks[r][c].dev,
   2412 					raidPtr->raid_cinfo[r][c].ci_vp,
   2413 					&clabel);
   2414 				if (final == RF_FINAL_COMPONENT_UPDATE) {
   2415 					if (raidPtr->parity_good == RF_RAID_CLEAN) {
   2416 						raidmarkclean(
   2417 							      raidPtr->Disks[r][c].dev,
   2418 							      raidPtr->raid_cinfo[r][c].ci_vp,
   2419 							      raidPtr->mod_counter);
   2420 					}
   2421 				}
   2422 			}
   2423 			/* else we don't touch it.. */
   2424 		}
   2425 	}
   2426 
   2427 	for( c = 0; c < raidPtr->numSpare ; c++) {
   2428 		sparecol = raidPtr->numCol + c;
   2429 		/* Need to ensure that the reconstruct actually completed! */
   2430 		if (raidPtr->Disks[0][sparecol].status == rf_ds_used_spare) {
   2431 			/*
   2432 
   2433 			   we claim this disk is "optimal" if it's
   2434 			   rf_ds_used_spare, as that means it should be
   2435 			   directly substitutable for the disk it replaced.
   2436 			   We note that too...
   2437 
   2438 			 */
   2439 
   2440 			for(i=0;i<raidPtr->numRow;i++) {
   2441 				for(j=0;j<raidPtr->numCol;j++) {
   2442 					if ((raidPtr->Disks[i][j].spareRow ==
   2443 					     0) &&
   2444 					    (raidPtr->Disks[i][j].spareCol ==
   2445 					     sparecol)) {
   2446 						srow = i;
   2447 						scol = j;
   2448 						break;
   2449 					}
   2450 				}
   2451 			}
   2452 
   2453 			/* XXX shouldn't *really* need this... */
   2454 			raidread_component_label(
   2455 				      raidPtr->Disks[0][sparecol].dev,
   2456 				      raidPtr->raid_cinfo[0][sparecol].ci_vp,
   2457 				      &clabel);
   2458 			/* make sure status is noted */
   2459 
   2460 			raid_init_component_label(raidPtr, &clabel);
   2461 
   2462 			clabel.mod_counter = raidPtr->mod_counter;
   2463 			clabel.row = srow;
   2464 			clabel.column = scol;
   2465 			clabel.status = rf_ds_optimal;
   2466 
   2467 			raidwrite_component_label(
   2468 				      raidPtr->Disks[0][sparecol].dev,
   2469 				      raidPtr->raid_cinfo[0][sparecol].ci_vp,
   2470 				      &clabel);
   2471 			if (final == RF_FINAL_COMPONENT_UPDATE) {
   2472 				if (raidPtr->parity_good == RF_RAID_CLEAN) {
   2473 					raidmarkclean( raidPtr->Disks[0][sparecol].dev,
   2474 						       raidPtr->raid_cinfo[0][sparecol].ci_vp,
   2475 						       raidPtr->mod_counter);
   2476 				}
   2477 			}
   2478 		}
   2479 	}
   2480 	/* 	printf("Component labels updated\n"); */
   2481 }
   2482 
   2483 void
   2484 rf_close_component(raidPtr, vp, auto_configured)
   2485 	RF_Raid_t *raidPtr;
   2486 	struct vnode *vp;
   2487 	int auto_configured;
   2488 {
   2489 	struct proc *p;
   2490 
   2491 	p = raidPtr->engine_thread;
   2492 
   2493 	if (vp != NULL) {
   2494 		if (auto_configured == 1) {
   2495 			vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
   2496 			VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
   2497 			vput(vp);
   2498 
   2499 		} else {
   2500 			(void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
   2501 		}
   2502 	} else {
   2503 #if 0
   2504 		printf("vnode was NULL\n");
   2505 #endif
   2506 	}
   2507 }
   2508 
   2509 
   2510 void
   2511 rf_UnconfigureVnodes(raidPtr)
   2512 	RF_Raid_t *raidPtr;
   2513 {
   2514 	int r,c;
   2515 	struct proc *p;
   2516 	struct vnode *vp;
   2517 	int acd;
   2518 
   2519 
   2520 	/* We take this opportunity to close the vnodes like we should.. */
   2521 
   2522 	p = raidPtr->engine_thread;
   2523 
   2524 	for (r = 0; r < raidPtr->numRow; r++) {
   2525 		for (c = 0; c < raidPtr->numCol; c++) {
   2526 #if 0
   2527 			printf("raid%d: Closing vnode for row: %d col: %d\n",
   2528 			       raidPtr->raidid, r, c);
   2529 #endif
   2530 			vp = raidPtr->raid_cinfo[r][c].ci_vp;
   2531 			acd = raidPtr->Disks[r][c].auto_configured;
   2532 			rf_close_component(raidPtr, vp, acd);
   2533 			raidPtr->raid_cinfo[r][c].ci_vp = NULL;
   2534 			raidPtr->Disks[r][c].auto_configured = 0;
   2535 		}
   2536 	}
   2537 	for (r = 0; r < raidPtr->numSpare; r++) {
   2538 #if 0
   2539 		printf("raid%d: Closing vnode for spare: %d\n",
   2540 		       raidPtr->raidid, r);
   2541 #endif
   2542 		vp = raidPtr->raid_cinfo[0][raidPtr->numCol + r].ci_vp;
   2543 		acd = raidPtr->Disks[0][raidPtr->numCol + r].auto_configured;
   2544 		rf_close_component(raidPtr, vp, acd);
   2545 		raidPtr->raid_cinfo[0][raidPtr->numCol + r].ci_vp = NULL;
   2546 		raidPtr->Disks[0][raidPtr->numCol + r].auto_configured = 0;
   2547 	}
   2548 }
   2549 
   2550 
   2551 void
   2552 rf_ReconThread(req)
   2553 	struct rf_recon_req *req;
   2554 {
   2555 	int     s;
   2556 	RF_Raid_t *raidPtr;
   2557 
   2558 	s = splbio();
   2559 	raidPtr = (RF_Raid_t *) req->raidPtr;
   2560 	raidPtr->recon_in_progress = 1;
   2561 
   2562 	rf_FailDisk((RF_Raid_t *) req->raidPtr, req->row, req->col,
   2563 		    ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
   2564 
   2565 	/* XXX get rid of this! we don't need it at all.. */
   2566 	RF_Free(req, sizeof(*req));
   2567 
   2568 	raidPtr->recon_in_progress = 0;
   2569 	splx(s);
   2570 
   2571 	/* That's all... */
   2572 	kthread_exit(0);        /* does not return */
   2573 }
   2574 
   2575 void
   2576 rf_RewriteParityThread(raidPtr)
   2577 	RF_Raid_t *raidPtr;
   2578 {
   2579 	int retcode;
   2580 	int s;
   2581 
   2582 	raidPtr->parity_rewrite_in_progress = 1;
   2583 	s = splbio();
   2584 	retcode = rf_RewriteParity(raidPtr);
   2585 	splx(s);
   2586 	if (retcode) {
   2587 		printf("raid%d: Error re-writing parity!\n",raidPtr->raidid);
   2588 	} else {
   2589 		/* set the clean bit!  If we shutdown correctly,
   2590 		   the clean bit on each component label will get
   2591 		   set */
   2592 		raidPtr->parity_good = RF_RAID_CLEAN;
   2593 	}
   2594 	raidPtr->parity_rewrite_in_progress = 0;
   2595 
   2596 	/* Anyone waiting for us to stop?  If so, inform them... */
   2597 	if (raidPtr->waitShutdown) {
   2598 		wakeup(&raidPtr->parity_rewrite_in_progress);
   2599 	}
   2600 
   2601 	/* That's all... */
   2602 	kthread_exit(0);        /* does not return */
   2603 }
   2604 
   2605 
   2606 void
   2607 rf_CopybackThread(raidPtr)
   2608 	RF_Raid_t *raidPtr;
   2609 {
   2610 	int s;
   2611 
   2612 	raidPtr->copyback_in_progress = 1;
   2613 	s = splbio();
   2614 	rf_CopybackReconstructedData(raidPtr);
   2615 	splx(s);
   2616 	raidPtr->copyback_in_progress = 0;
   2617 
   2618 	/* That's all... */
   2619 	kthread_exit(0);        /* does not return */
   2620 }
   2621 
   2622 
   2623 void
   2624 rf_ReconstructInPlaceThread(req)
   2625 	struct rf_recon_req *req;
   2626 {
   2627 	int retcode;
   2628 	int s;
   2629 	RF_Raid_t *raidPtr;
   2630 
   2631 	s = splbio();
   2632 	raidPtr = req->raidPtr;
   2633 	raidPtr->recon_in_progress = 1;
   2634 	retcode = rf_ReconstructInPlace(raidPtr, req->row, req->col);
   2635 	RF_Free(req, sizeof(*req));
   2636 	raidPtr->recon_in_progress = 0;
   2637 	splx(s);
   2638 
   2639 	/* That's all... */
   2640 	kthread_exit(0);        /* does not return */
   2641 }
   2642 
   2643 RF_AutoConfig_t *
   2644 rf_find_raid_components()
   2645 {
   2646 	struct vnode *vp;
   2647 	struct disklabel label;
   2648 	struct device *dv;
   2649 	dev_t dev;
   2650 	int bmajor;
   2651 	int error;
   2652 	int i;
   2653 	int good_one;
   2654 	RF_ComponentLabel_t *clabel;
   2655 	RF_AutoConfig_t *ac_list;
   2656 	RF_AutoConfig_t *ac;
   2657 
   2658 
   2659 	/* initialize the AutoConfig list */
   2660 	ac_list = NULL;
   2661 
   2662 	/* we begin by trolling through *all* the devices on the system */
   2663 
   2664 	for (dv = alldevs.tqh_first; dv != NULL;
   2665 	     dv = dv->dv_list.tqe_next) {
   2666 
   2667 		/* we are only interested in disks... */
   2668 		if (dv->dv_class != DV_DISK)
   2669 			continue;
   2670 
   2671 		/* we don't care about floppies... */
   2672 		if (!strcmp(dv->dv_cfdata->cf_driver->cd_name,"fd")) {
   2673 			continue;
   2674 		}
   2675 
   2676 		/* we don't care about CD's... */
   2677 		if (!strcmp(dv->dv_cfdata->cf_driver->cd_name,"cd")) {
   2678 			continue;
   2679 		}
   2680 
   2681 		/* hdfd is the Atari/Hades floppy driver */
   2682 		if (!strcmp(dv->dv_cfdata->cf_driver->cd_name,"hdfd")) {
   2683 			continue;
   2684 		}
   2685 		/* fdisa is the Atari/Milan floppy driver */
   2686 		if (!strcmp(dv->dv_cfdata->cf_driver->cd_name,"fdisa")) {
   2687 			continue;
   2688 		}
   2689 
   2690 		/* need to find the device_name_to_block_device_major stuff */
   2691 		bmajor = devsw_name2blk(dv->dv_xname, NULL, 0);
   2692 
   2693 		/* get a vnode for the raw partition of this disk */
   2694 
   2695 		dev = MAKEDISKDEV(bmajor, dv->dv_unit, RAW_PART);
   2696 		if (bdevvp(dev, &vp))
   2697 			panic("RAID can't alloc vnode");
   2698 
   2699 		error = VOP_OPEN(vp, FREAD, NOCRED, 0);
   2700 
   2701 		if (error) {
   2702 			/* "Who cares."  Continue looking
   2703 			   for something that exists*/
   2704 			vput(vp);
   2705 			continue;
   2706 		}
   2707 
   2708 		/* Ok, the disk exists.  Go get the disklabel. */
   2709 		error = VOP_IOCTL(vp, DIOCGDINFO, (caddr_t)&label,
   2710 				  FREAD, NOCRED, 0);
   2711 		if (error) {
   2712 			/*
   2713 			 * XXX can't happen - open() would
   2714 			 * have errored out (or faked up one)
   2715 			 */
   2716 			printf("can't get label for dev %s%c (%d)!?!?\n",
   2717 			       dv->dv_xname, 'a' + RAW_PART, error);
   2718 		}
   2719 
   2720 		/* don't need this any more.  We'll allocate it again
   2721 		   a little later if we really do... */
   2722 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
   2723 		VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
   2724 		vput(vp);
   2725 
   2726 		for (i=0; i < label.d_npartitions; i++) {
   2727 			/* We only support partitions marked as RAID */
   2728 			if (label.d_partitions[i].p_fstype != FS_RAID)
   2729 				continue;
   2730 
   2731 			dev = MAKEDISKDEV(bmajor, dv->dv_unit, i);
   2732 			if (bdevvp(dev, &vp))
   2733 				panic("RAID can't alloc vnode");
   2734 
   2735 			error = VOP_OPEN(vp, FREAD, NOCRED, 0);
   2736 			if (error) {
   2737 				/* Whatever... */
   2738 				vput(vp);
   2739 				continue;
   2740 			}
   2741 
   2742 			good_one = 0;
   2743 
   2744 			clabel = (RF_ComponentLabel_t *)
   2745 				malloc(sizeof(RF_ComponentLabel_t),
   2746 				       M_RAIDFRAME, M_NOWAIT);
   2747 			if (clabel == NULL) {
   2748 				/* XXX CLEANUP HERE */
   2749 				printf("RAID auto config: out of memory!\n");
   2750 				return(NULL); /* XXX probably should panic? */
   2751 			}
   2752 
   2753 			if (!raidread_component_label(dev, vp, clabel)) {
   2754 				/* Got the label.  Does it look reasonable? */
   2755 				if (rf_reasonable_label(clabel) &&
   2756 				    (clabel->partitionSize <=
   2757 				     label.d_partitions[i].p_size)) {
   2758 #if DEBUG
   2759 					printf("Component on: %s%c: %d\n",
   2760 					       dv->dv_xname, 'a'+i,
   2761 					       label.d_partitions[i].p_size);
   2762 					rf_print_component_label(clabel);
   2763 #endif
   2764 					/* if it's reasonable, add it,
   2765 					   else ignore it. */
   2766 					ac = (RF_AutoConfig_t *)
   2767 						malloc(sizeof(RF_AutoConfig_t),
   2768 						       M_RAIDFRAME,
   2769 						       M_NOWAIT);
   2770 					if (ac == NULL) {
   2771 						/* XXX should panic?? */
   2772 						return(NULL);
   2773 					}
   2774 
   2775 					sprintf(ac->devname, "%s%c",
   2776 						dv->dv_xname, 'a'+i);
   2777 					ac->dev = dev;
   2778 					ac->vp = vp;
   2779 					ac->clabel = clabel;
   2780 					ac->next = ac_list;
   2781 					ac_list = ac;
   2782 					good_one = 1;
   2783 				}
   2784 			}
   2785 			if (!good_one) {
   2786 				/* cleanup */
   2787 				free(clabel, M_RAIDFRAME);
   2788 				vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
   2789 				VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
   2790 				vput(vp);
   2791 			}
   2792 		}
   2793 	}
   2794 	return(ac_list);
   2795 }
   2796 
   2797 static int
   2798 rf_reasonable_label(clabel)
   2799 	RF_ComponentLabel_t *clabel;
   2800 {
   2801 
   2802 	if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) ||
   2803 	     (clabel->version==RF_COMPONENT_LABEL_VERSION)) &&
   2804 	    ((clabel->clean == RF_RAID_CLEAN) ||
   2805 	     (clabel->clean == RF_RAID_DIRTY)) &&
   2806 	    clabel->row >=0 &&
   2807 	    clabel->column >= 0 &&
   2808 	    clabel->num_rows > 0 &&
   2809 	    clabel->num_columns > 0 &&
   2810 	    clabel->row < clabel->num_rows &&
   2811 	    clabel->column < clabel->num_columns &&
   2812 	    clabel->blockSize > 0 &&
   2813 	    clabel->numBlocks > 0) {
   2814 		/* label looks reasonable enough... */
   2815 		return(1);
   2816 	}
   2817 	return(0);
   2818 }
   2819 
   2820 
   2821 #if 0
   2822 void
   2823 rf_print_component_label(clabel)
   2824 	RF_ComponentLabel_t *clabel;
   2825 {
   2826 	printf("   Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
   2827 	       clabel->row, clabel->column,
   2828 	       clabel->num_rows, clabel->num_columns);
   2829 	printf("   Version: %d Serial Number: %d Mod Counter: %d\n",
   2830 	       clabel->version, clabel->serial_number,
   2831 	       clabel->mod_counter);
   2832 	printf("   Clean: %s Status: %d\n",
   2833 	       clabel->clean ? "Yes" : "No", clabel->status );
   2834 	printf("   sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
   2835 	       clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
   2836 	printf("   RAID Level: %c  blocksize: %d numBlocks: %d\n",
   2837 	       (char) clabel->parityConfig, clabel->blockSize,
   2838 	       clabel->numBlocks);
   2839 	printf("   Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No" );
   2840 	printf("   Contains root partition: %s\n",
   2841 	       clabel->root_partition ? "Yes" : "No" );
   2842 	printf("   Last configured as: raid%d\n", clabel->last_unit );
   2843 #if 0
   2844 	   printf("   Config order: %d\n", clabel->config_order);
   2845 #endif
   2846 
   2847 }
   2848 #endif
   2849 
   2850 RF_ConfigSet_t *
   2851 rf_create_auto_sets(ac_list)
   2852 	RF_AutoConfig_t *ac_list;
   2853 {
   2854 	RF_AutoConfig_t *ac;
   2855 	RF_ConfigSet_t *config_sets;
   2856 	RF_ConfigSet_t *cset;
   2857 	RF_AutoConfig_t *ac_next;
   2858 
   2859 
   2860 	config_sets = NULL;
   2861 
   2862 	/* Go through the AutoConfig list, and figure out which components
   2863 	   belong to what sets.  */
   2864 	ac = ac_list;
   2865 	while(ac!=NULL) {
   2866 		/* we're going to putz with ac->next, so save it here
   2867 		   for use at the end of the loop */
   2868 		ac_next = ac->next;
   2869 
   2870 		if (config_sets == NULL) {
   2871 			/* will need at least this one... */
   2872 			config_sets = (RF_ConfigSet_t *)
   2873 				malloc(sizeof(RF_ConfigSet_t),
   2874 				       M_RAIDFRAME, M_NOWAIT);
   2875 			if (config_sets == NULL) {
   2876 				panic("rf_create_auto_sets: No memory!\n");
   2877 			}
   2878 			/* this one is easy :) */
   2879 			config_sets->ac = ac;
   2880 			config_sets->next = NULL;
   2881 			config_sets->rootable = 0;
   2882 			ac->next = NULL;
   2883 		} else {
   2884 			/* which set does this component fit into? */
   2885 			cset = config_sets;
   2886 			while(cset!=NULL) {
   2887 				if (rf_does_it_fit(cset, ac)) {
   2888 					/* looks like it matches... */
   2889 					ac->next = cset->ac;
   2890 					cset->ac = ac;
   2891 					break;
   2892 				}
   2893 				cset = cset->next;
   2894 			}
   2895 			if (cset==NULL) {
   2896 				/* didn't find a match above... new set..*/
   2897 				cset = (RF_ConfigSet_t *)
   2898 					malloc(sizeof(RF_ConfigSet_t),
   2899 					       M_RAIDFRAME, M_NOWAIT);
   2900 				if (cset == NULL) {
   2901 					panic("rf_create_auto_sets: No memory!\n");
   2902 				}
   2903 				cset->ac = ac;
   2904 				ac->next = NULL;
   2905 				cset->next = config_sets;
   2906 				cset->rootable = 0;
   2907 				config_sets = cset;
   2908 			}
   2909 		}
   2910 		ac = ac_next;
   2911 	}
   2912 
   2913 
   2914 	return(config_sets);
   2915 }
   2916 
   2917 static int
   2918 rf_does_it_fit(cset, ac)
   2919 	RF_ConfigSet_t *cset;
   2920 	RF_AutoConfig_t *ac;
   2921 {
   2922 	RF_ComponentLabel_t *clabel1, *clabel2;
   2923 
   2924 	/* If this one matches the *first* one in the set, that's good
   2925 	   enough, since the other members of the set would have been
   2926 	   through here too... */
   2927 	/* note that we are not checking partitionSize here..
   2928 
   2929 	   Note that we are also not checking the mod_counters here.
   2930 	   If everything else matches execpt the mod_counter, that's
   2931 	   good enough for this test.  We will deal with the mod_counters
   2932 	   a little later in the autoconfiguration process.
   2933 
   2934 	    (clabel1->mod_counter == clabel2->mod_counter) &&
   2935 
   2936 	   The reason we don't check for this is that failed disks
   2937 	   will have lower modification counts.  If those disks are
   2938 	   not added to the set they used to belong to, then they will
   2939 	   form their own set, which may result in 2 different sets,
   2940 	   for example, competing to be configured at raid0, and
   2941 	   perhaps competing to be the root filesystem set.  If the
   2942 	   wrong ones get configured, or both attempt to become /,
   2943 	   weird behaviour and or serious lossage will occur.  Thus we
   2944 	   need to bring them into the fold here, and kick them out at
   2945 	   a later point.
   2946 
   2947 	*/
   2948 
   2949 	clabel1 = cset->ac->clabel;
   2950 	clabel2 = ac->clabel;
   2951 	if ((clabel1->version == clabel2->version) &&
   2952 	    (clabel1->serial_number == clabel2->serial_number) &&
   2953 	    (clabel1->num_rows == clabel2->num_rows) &&
   2954 	    (clabel1->num_columns == clabel2->num_columns) &&
   2955 	    (clabel1->sectPerSU == clabel2->sectPerSU) &&
   2956 	    (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
   2957 	    (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
   2958 	    (clabel1->parityConfig == clabel2->parityConfig) &&
   2959 	    (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
   2960 	    (clabel1->blockSize == clabel2->blockSize) &&
   2961 	    (clabel1->numBlocks == clabel2->numBlocks) &&
   2962 	    (clabel1->autoconfigure == clabel2->autoconfigure) &&
   2963 	    (clabel1->root_partition == clabel2->root_partition) &&
   2964 	    (clabel1->last_unit == clabel2->last_unit) &&
   2965 	    (clabel1->config_order == clabel2->config_order)) {
   2966 		/* if it get's here, it almost *has* to be a match */
   2967 	} else {
   2968 		/* it's not consistent with somebody in the set..
   2969 		   punt */
   2970 		return(0);
   2971 	}
   2972 	/* all was fine.. it must fit... */
   2973 	return(1);
   2974 }
   2975 
   2976 int
   2977 rf_have_enough_components(cset)
   2978 	RF_ConfigSet_t *cset;
   2979 {
   2980 	RF_AutoConfig_t *ac;
   2981 	RF_AutoConfig_t *auto_config;
   2982 	RF_ComponentLabel_t *clabel;
   2983 	int r,c;
   2984 	int num_rows;
   2985 	int num_cols;
   2986 	int num_missing;
   2987 	int mod_counter;
   2988 	int mod_counter_found;
   2989 	int even_pair_failed;
   2990 	char parity_type;
   2991 
   2992 
   2993 	/* check to see that we have enough 'live' components
   2994 	   of this set.  If so, we can configure it if necessary */
   2995 
   2996 	num_rows = cset->ac->clabel->num_rows;
   2997 	num_cols = cset->ac->clabel->num_columns;
   2998 	parity_type = cset->ac->clabel->parityConfig;
   2999 
   3000 	/* XXX Check for duplicate components!?!?!? */
   3001 
   3002 	/* Determine what the mod_counter is supposed to be for this set. */
   3003 
   3004 	mod_counter_found = 0;
   3005 	mod_counter = 0;
   3006 	ac = cset->ac;
   3007 	while(ac!=NULL) {
   3008 		if (mod_counter_found==0) {
   3009 			mod_counter = ac->clabel->mod_counter;
   3010 			mod_counter_found = 1;
   3011 		} else {
   3012 			if (ac->clabel->mod_counter > mod_counter) {
   3013 				mod_counter = ac->clabel->mod_counter;
   3014 			}
   3015 		}
   3016 		ac = ac->next;
   3017 	}
   3018 
   3019 	num_missing = 0;
   3020 	auto_config = cset->ac;
   3021 
   3022 	for(r=0; r<num_rows; r++) {
   3023 		even_pair_failed = 0;
   3024 		for(c=0; c<num_cols; c++) {
   3025 			ac = auto_config;
   3026 			while(ac!=NULL) {
   3027 				if ((ac->clabel->row == r) &&
   3028 				    (ac->clabel->column == c) &&
   3029 				    (ac->clabel->mod_counter == mod_counter)) {
   3030 					/* it's this one... */
   3031 #if DEBUG
   3032 					printf("Found: %s at %d,%d\n",
   3033 					       ac->devname,r,c);
   3034 #endif
   3035 					break;
   3036 				}
   3037 				ac=ac->next;
   3038 			}
   3039 			if (ac==NULL) {
   3040 				/* Didn't find one here! */
   3041 				/* special case for RAID 1, especially
   3042 				   where there are more than 2
   3043 				   components (where RAIDframe treats
   3044 				   things a little differently :( ) */
   3045 				if (parity_type == '1') {
   3046 					if (c%2 == 0) { /* even component */
   3047 						even_pair_failed = 1;
   3048 					} else { /* odd component.  If
   3049                                                     we're failed, and
   3050                                                     so is the even
   3051                                                     component, it's
   3052                                                     "Good Night, Charlie" */
   3053 						if (even_pair_failed == 1) {
   3054 							return(0);
   3055 						}
   3056 					}
   3057 				} else {
   3058 					/* normal accounting */
   3059 					num_missing++;
   3060 				}
   3061 			}
   3062 			if ((parity_type == '1') && (c%2 == 1)) {
   3063 				/* Just did an even component, and we didn't
   3064 				   bail.. reset the even_pair_failed flag,
   3065 				   and go on to the next component.... */
   3066 				even_pair_failed = 0;
   3067 			}
   3068 		}
   3069 	}
   3070 
   3071 	clabel = cset->ac->clabel;
   3072 
   3073 	if (((clabel->parityConfig == '0') && (num_missing > 0)) ||
   3074 	    ((clabel->parityConfig == '4') && (num_missing > 1)) ||
   3075 	    ((clabel->parityConfig == '5') && (num_missing > 1))) {
   3076 		/* XXX this needs to be made *much* more general */
   3077 		/* Too many failures */
   3078 		return(0);
   3079 	}
   3080 	/* otherwise, all is well, and we've got enough to take a kick
   3081 	   at autoconfiguring this set */
   3082 	return(1);
   3083 }
   3084 
   3085 void
   3086 rf_create_configuration(ac,config,raidPtr)
   3087 	RF_AutoConfig_t *ac;
   3088 	RF_Config_t *config;
   3089 	RF_Raid_t *raidPtr;
   3090 {
   3091 	RF_ComponentLabel_t *clabel;
   3092 	int i;
   3093 
   3094 	clabel = ac->clabel;
   3095 
   3096 	/* 1. Fill in the common stuff */
   3097 	config->numRow = clabel->num_rows;
   3098 	config->numCol = clabel->num_columns;
   3099 	config->numSpare = 0; /* XXX should this be set here? */
   3100 	config->sectPerSU = clabel->sectPerSU;
   3101 	config->SUsPerPU = clabel->SUsPerPU;
   3102 	config->SUsPerRU = clabel->SUsPerRU;
   3103 	config->parityConfig = clabel->parityConfig;
   3104 	/* XXX... */
   3105 	strcpy(config->diskQueueType,"fifo");
   3106 	config->maxOutstandingDiskReqs = clabel->maxOutstanding;
   3107 	config->layoutSpecificSize = 0; /* XXX ?? */
   3108 
   3109 	while(ac!=NULL) {
   3110 		/* row/col values will be in range due to the checks
   3111 		   in reasonable_label() */
   3112 		strcpy(config->devnames[ac->clabel->row][ac->clabel->column],
   3113 		       ac->devname);
   3114 		ac = ac->next;
   3115 	}
   3116 
   3117 	for(i=0;i<RF_MAXDBGV;i++) {
   3118 		config->debugVars[i][0] = NULL;
   3119 	}
   3120 }
   3121 
   3122 int
   3123 rf_set_autoconfig(raidPtr, new_value)
   3124 	RF_Raid_t *raidPtr;
   3125 	int new_value;
   3126 {
   3127 	RF_ComponentLabel_t clabel;
   3128 	struct vnode *vp;
   3129 	dev_t dev;
   3130 	int row, column;
   3131 
   3132 	raidPtr->autoconfigure = new_value;
   3133 	for(row=0; row<raidPtr->numRow; row++) {
   3134 		for(column=0; column<raidPtr->numCol; column++) {
   3135 			if (raidPtr->Disks[row][column].status ==
   3136 			    rf_ds_optimal) {
   3137 				dev = raidPtr->Disks[row][column].dev;
   3138 				vp = raidPtr->raid_cinfo[row][column].ci_vp;
   3139 				raidread_component_label(dev, vp, &clabel);
   3140 				clabel.autoconfigure = new_value;
   3141 				raidwrite_component_label(dev, vp, &clabel);
   3142 			}
   3143 		}
   3144 	}
   3145 	return(new_value);
   3146 }
   3147 
   3148 int
   3149 rf_set_rootpartition(raidPtr, new_value)
   3150 	RF_Raid_t *raidPtr;
   3151 	int new_value;
   3152 {
   3153 	RF_ComponentLabel_t clabel;
   3154 	struct vnode *vp;
   3155 	dev_t dev;
   3156 	int row, column;
   3157 
   3158 	raidPtr->root_partition = new_value;
   3159 	for(row=0; row<raidPtr->numRow; row++) {
   3160 		for(column=0; column<raidPtr->numCol; column++) {
   3161 			if (raidPtr->Disks[row][column].status ==
   3162 			    rf_ds_optimal) {
   3163 				dev = raidPtr->Disks[row][column].dev;
   3164 				vp = raidPtr->raid_cinfo[row][column].ci_vp;
   3165 				raidread_component_label(dev, vp, &clabel);
   3166 				clabel.root_partition = new_value;
   3167 				raidwrite_component_label(dev, vp, &clabel);
   3168 			}
   3169 		}
   3170 	}
   3171 	return(new_value);
   3172 }
   3173 
   3174 void
   3175 rf_release_all_vps(cset)
   3176 	RF_ConfigSet_t *cset;
   3177 {
   3178 	RF_AutoConfig_t *ac;
   3179 
   3180 	ac = cset->ac;
   3181 	while(ac!=NULL) {
   3182 		/* Close the vp, and give it back */
   3183 		if (ac->vp) {
   3184 			vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY);
   3185 			VOP_CLOSE(ac->vp, FREAD, NOCRED, 0);
   3186 			vput(ac->vp);
   3187 			ac->vp = NULL;
   3188 		}
   3189 		ac = ac->next;
   3190 	}
   3191 }
   3192 
   3193 
   3194 void
   3195 rf_cleanup_config_set(cset)
   3196 	RF_ConfigSet_t *cset;
   3197 {
   3198 	RF_AutoConfig_t *ac;
   3199 	RF_AutoConfig_t *next_ac;
   3200 
   3201 	ac = cset->ac;
   3202 	while(ac!=NULL) {
   3203 		next_ac = ac->next;
   3204 		/* nuke the label */
   3205 		free(ac->clabel, M_RAIDFRAME);
   3206 		/* cleanup the config structure */
   3207 		free(ac, M_RAIDFRAME);
   3208 		/* "next.." */
   3209 		ac = next_ac;
   3210 	}
   3211 	/* and, finally, nuke the config set */
   3212 	free(cset, M_RAIDFRAME);
   3213 }
   3214 
   3215 
   3216 void
   3217 raid_init_component_label(raidPtr, clabel)
   3218 	RF_Raid_t *raidPtr;
   3219 	RF_ComponentLabel_t *clabel;
   3220 {
   3221 	/* current version number */
   3222 	clabel->version = RF_COMPONENT_LABEL_VERSION;
   3223 	clabel->serial_number = raidPtr->serial_number;
   3224 	clabel->mod_counter = raidPtr->mod_counter;
   3225 	clabel->num_rows = raidPtr->numRow;
   3226 	clabel->num_columns = raidPtr->numCol;
   3227 	clabel->clean = RF_RAID_DIRTY; /* not clean */
   3228 	clabel->status = rf_ds_optimal; /* "It's good!" */
   3229 
   3230 	clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
   3231 	clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
   3232 	clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
   3233 
   3234 	clabel->blockSize = raidPtr->bytesPerSector;
   3235 	clabel->numBlocks = raidPtr->sectorsPerDisk;
   3236 
   3237 	/* XXX not portable */
   3238 	clabel->parityConfig = raidPtr->Layout.map->parityConfig;
   3239 	clabel->maxOutstanding = raidPtr->maxOutstanding;
   3240 	clabel->autoconfigure = raidPtr->autoconfigure;
   3241 	clabel->root_partition = raidPtr->root_partition;
   3242 	clabel->last_unit = raidPtr->raidid;
   3243 	clabel->config_order = raidPtr->config_order;
   3244 }
   3245 
   3246 int
   3247 rf_auto_config_set(cset,unit)
   3248 	RF_ConfigSet_t *cset;
   3249 	int *unit;
   3250 {
   3251 	RF_Raid_t *raidPtr;
   3252 	RF_Config_t *config;
   3253 	int raidID;
   3254 	int retcode;
   3255 
   3256 #if DEBUG
   3257 	printf("RAID autoconfigure\n");
   3258 #endif
   3259 
   3260 	retcode = 0;
   3261 	*unit = -1;
   3262 
   3263 	/* 1. Create a config structure */
   3264 
   3265 	config = (RF_Config_t *)malloc(sizeof(RF_Config_t),
   3266 				       M_RAIDFRAME,
   3267 				       M_NOWAIT);
   3268 	if (config==NULL) {
   3269 		printf("Out of mem!?!?\n");
   3270 				/* XXX do something more intelligent here. */
   3271 		return(1);
   3272 	}
   3273 
   3274 	memset(config, 0, sizeof(RF_Config_t));
   3275 
   3276 	/*
   3277 	   2. Figure out what RAID ID this one is supposed to live at
   3278 	   See if we can get the same RAID dev that it was configured
   3279 	   on last time..
   3280 	*/
   3281 
   3282 	raidID = cset->ac->clabel->last_unit;
   3283 	if ((raidID < 0) || (raidID >= numraid)) {
   3284 		/* let's not wander off into lala land. */
   3285 		raidID = numraid - 1;
   3286 	}
   3287 	if (raidPtrs[raidID]->valid != 0) {
   3288 
   3289 		/*
   3290 		   Nope... Go looking for an alternative...
   3291 		   Start high so we don't immediately use raid0 if that's
   3292 		   not taken.
   3293 		*/
   3294 
   3295 		for(raidID = numraid - 1; raidID >= 0; raidID--) {
   3296 			if (raidPtrs[raidID]->valid == 0) {
   3297 				/* can use this one! */
   3298 				break;
   3299 			}
   3300 		}
   3301 	}
   3302 
   3303 	if (raidID < 0) {
   3304 		/* punt... */
   3305 		printf("Unable to auto configure this set!\n");
   3306 		printf("(Out of RAID devs!)\n");
   3307 		return(1);
   3308 	}
   3309 
   3310 #if DEBUG
   3311 	printf("Configuring raid%d:\n",raidID);
   3312 #endif
   3313 
   3314 	raidPtr = raidPtrs[raidID];
   3315 
   3316 	/* XXX all this stuff should be done SOMEWHERE ELSE! */
   3317 	raidPtr->raidid = raidID;
   3318 	raidPtr->openings = RAIDOUTSTANDING;
   3319 
   3320 	/* 3. Build the configuration structure */
   3321 	rf_create_configuration(cset->ac, config, raidPtr);
   3322 
   3323 	/* 4. Do the configuration */
   3324 	retcode = rf_Configure(raidPtr, config, cset->ac);
   3325 
   3326 	if (retcode == 0) {
   3327 
   3328 		raidinit(raidPtrs[raidID]);
   3329 
   3330 		rf_markalldirty(raidPtrs[raidID]);
   3331 		raidPtrs[raidID]->autoconfigure = 1; /* XXX do this here? */
   3332 		if (cset->ac->clabel->root_partition==1) {
   3333 			/* everything configured just fine.  Make a note
   3334 			   that this set is eligible to be root. */
   3335 			cset->rootable = 1;
   3336 			/* XXX do this here? */
   3337 			raidPtrs[raidID]->root_partition = 1;
   3338 		}
   3339 	}
   3340 
   3341 	/* 5. Cleanup */
   3342 	free(config, M_RAIDFRAME);
   3343 
   3344 	*unit = raidID;
   3345 	return(retcode);
   3346 }
   3347 
   3348 void
   3349 rf_disk_unbusy(desc)
   3350 	RF_RaidAccessDesc_t *desc;
   3351 {
   3352 	struct buf *bp;
   3353 
   3354 	bp = (struct buf *)desc->bp;
   3355 	disk_unbusy(&raid_softc[desc->raidPtr->raidid].sc_dkdev,
   3356 			    (bp->b_bcount - bp->b_resid));
   3357 }
   3358