Home | History | Annotate | Line # | Download | only in raidframe
rf_netbsdkintf.c revision 1.104.2.11
      1 /*	$NetBSD: rf_netbsdkintf.c,v 1.104.2.11 2002/06/24 22:10:12 nathanw Exp $	*/
      2 /*-
      3  * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
      4  * All rights reserved.
      5  *
      6  * This code is derived from software contributed to The NetBSD Foundation
      7  * by Greg Oster; Jason R. Thorpe.
      8  *
      9  * Redistribution and use in source and binary forms, with or without
     10  * modification, are permitted provided that the following conditions
     11  * are met:
     12  * 1. Redistributions of source code must retain the above copyright
     13  *    notice, this list of conditions and the following disclaimer.
     14  * 2. Redistributions in binary form must reproduce the above copyright
     15  *    notice, this list of conditions and the following disclaimer in the
     16  *    documentation and/or other materials provided with the distribution.
     17  * 3. All advertising materials mentioning features or use of this software
     18  *    must display the following acknowledgement:
     19  *        This product includes software developed by the NetBSD
     20  *        Foundation, Inc. and its contributors.
     21  * 4. Neither the name of The NetBSD Foundation nor the names of its
     22  *    contributors may be used to endorse or promote products derived
     23  *    from this software without specific prior written permission.
     24  *
     25  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     26  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     27  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     28  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     29  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     30  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     31  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     32  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     33  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     34  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     35  * POSSIBILITY OF SUCH DAMAGE.
     36  */
     37 
     38 /*
     39  * Copyright (c) 1988 University of Utah.
     40  * Copyright (c) 1990, 1993
     41  *      The Regents of the University of California.  All rights reserved.
     42  *
     43  * This code is derived from software contributed to Berkeley by
     44  * the Systems Programming Group of the University of Utah Computer
     45  * Science Department.
     46  *
     47  * Redistribution and use in source and binary forms, with or without
     48  * modification, are permitted provided that the following conditions
     49  * are met:
     50  * 1. Redistributions of source code must retain the above copyright
     51  *    notice, this list of conditions and the following disclaimer.
     52  * 2. Redistributions in binary form must reproduce the above copyright
     53  *    notice, this list of conditions and the following disclaimer in the
     54  *    documentation and/or other materials provided with the distribution.
     55  * 3. All advertising materials mentioning features or use of this software
     56  *    must display the following acknowledgement:
     57  *      This product includes software developed by the University of
     58  *      California, Berkeley and its contributors.
     59  * 4. Neither the name of the University nor the names of its contributors
     60  *    may be used to endorse or promote products derived from this software
     61  *    without specific prior written permission.
     62  *
     63  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     64  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     65  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     66  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     67  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     68  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     69  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     70  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     71  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     72  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     73  * SUCH DAMAGE.
     74  *
     75  * from: Utah $Hdr: cd.c 1.6 90/11/28$
     76  *
     77  *      @(#)cd.c        8.2 (Berkeley) 11/16/93
     78  */
     79 
     80 
     81 
     82 
     83 /*
     84  * Copyright (c) 1995 Carnegie-Mellon University.
     85  * All rights reserved.
     86  *
     87  * Authors: Mark Holland, Jim Zelenka
     88  *
     89  * Permission to use, copy, modify and distribute this software and
     90  * its documentation is hereby granted, provided that both the copyright
     91  * notice and this permission notice appear in all copies of the
     92  * software, derivative works or modified versions, and any portions
     93  * thereof, and that both notices appear in supporting documentation.
     94  *
     95  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
     96  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
     97  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
     98  *
     99  * Carnegie Mellon requests users of this software to return to
    100  *
    101  *  Software Distribution Coordinator  or  Software.Distribution (at) CS.CMU.EDU
    102  *  School of Computer Science
    103  *  Carnegie Mellon University
    104  *  Pittsburgh PA 15213-3890
    105  *
    106  * any improvements or extensions that they make and grant Carnegie the
    107  * rights to redistribute these changes.
    108  */
    109 
    110 /***********************************************************
    111  *
    112  * rf_kintf.c -- the kernel interface routines for RAIDframe
    113  *
    114  ***********************************************************/
    115 
    116 #include <sys/cdefs.h>
    117 __KERNEL_RCSID(0, "$NetBSD: rf_netbsdkintf.c,v 1.104.2.11 2002/06/24 22:10:12 nathanw Exp $");
    118 
    119 #include <sys/param.h>
    120 #include <sys/errno.h>
    121 #include <sys/pool.h>
    122 #include <sys/lwp.h>
    123 #include <sys/proc.h>
    124 #include <sys/queue.h>
    125 #include <sys/disk.h>
    126 #include <sys/device.h>
    127 #include <sys/stat.h>
    128 #include <sys/ioctl.h>
    129 #include <sys/fcntl.h>
    130 #include <sys/systm.h>
    131 #include <sys/namei.h>
    132 #include <sys/vnode.h>
    133 #include <sys/disklabel.h>
    134 #include <sys/conf.h>
    135 #include <sys/lock.h>
    136 #include <sys/buf.h>
    137 #include <sys/user.h>
    138 #include <sys/reboot.h>
    139 
    140 #include <dev/raidframe/raidframevar.h>
    141 #include <dev/raidframe/raidframeio.h>
    142 #include "raid.h"
    143 #include "opt_raid_autoconfig.h"
    144 #include "rf_raid.h"
    145 #include "rf_copyback.h"
    146 #include "rf_dag.h"
    147 #include "rf_dagflags.h"
    148 #include "rf_desc.h"
    149 #include "rf_diskqueue.h"
    150 #include "rf_acctrace.h"
    151 #include "rf_etimer.h"
    152 #include "rf_general.h"
    153 #include "rf_debugMem.h"
    154 #include "rf_kintf.h"
    155 #include "rf_options.h"
    156 #include "rf_driver.h"
    157 #include "rf_parityscan.h"
    158 #include "rf_debugprint.h"
    159 #include "rf_threadstuff.h"
    160 
    161 int     rf_kdebug_level = 0;
    162 
    163 #ifdef DEBUG
    164 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
    165 #else				/* DEBUG */
    166 #define db1_printf(a) { }
    167 #endif				/* DEBUG */
    168 
    169 static RF_Raid_t **raidPtrs;	/* global raid device descriptors */
    170 
    171 RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex)
    172 
    173 static RF_SparetWait_t *rf_sparet_wait_queue;	/* requests to install a
    174 						 * spare table */
    175 static RF_SparetWait_t *rf_sparet_resp_queue;	/* responses from
    176 						 * installation process */
    177 
    178 /* prototypes */
    179 static void KernelWakeupFunc(struct buf * bp);
    180 static void InitBP(struct buf * bp, struct vnode *, unsigned rw_flag,
    181 		   dev_t dev, RF_SectorNum_t startSect,
    182 		   RF_SectorCount_t numSect, caddr_t buf,
    183 		   void (*cbFunc) (struct buf *), void *cbArg,
    184 		   int logBytesPerSector, struct proc * b_proc);
    185 static void raidinit(RF_Raid_t *);
    186 
    187 void raidattach(int);
    188 int raidsize(dev_t);
    189 int raidopen(dev_t, int, int, struct proc *);
    190 int raidclose(dev_t, int, int, struct proc *);
    191 int raidioctl(dev_t, u_long, caddr_t, int, struct proc *);
    192 int raidwrite(dev_t, struct uio *, int);
    193 int raidread(dev_t, struct uio *, int);
    194 void raidstrategy(struct buf *);
    195 int raiddump(dev_t, daddr_t, caddr_t, size_t);
    196 
    197 /*
    198  * Pilfered from ccd.c
    199  */
    200 
    201 struct raidbuf {
    202 	struct buf rf_buf;	/* new I/O buf.  MUST BE FIRST!!! */
    203 	struct buf *rf_obp;	/* ptr. to original I/O buf */
    204 	int     rf_flags;	/* misc. flags */
    205 	RF_DiskQueueData_t *req;/* the request that this was part of.. */
    206 };
    207 
    208 /* component buffer pool */
    209 struct pool raidframe_cbufpool;
    210 
    211 #define RAIDGETBUF(rs) pool_get(&raidframe_cbufpool, PR_NOWAIT)
    212 #define	RAIDPUTBUF(rs, cbp) pool_put(&raidframe_cbufpool, cbp)
    213 
    214 /* XXX Not sure if the following should be replacing the raidPtrs above,
    215    or if it should be used in conjunction with that...
    216 */
    217 
    218 struct raid_softc {
    219 	int     sc_flags;	/* flags */
    220 	int     sc_cflags;	/* configuration flags */
    221 	size_t  sc_size;        /* size of the raid device */
    222 	char    sc_xname[20];	/* XXX external name */
    223 	struct disk sc_dkdev;	/* generic disk device info */
    224 	struct buf_queue buf_queue;	/* used for the device queue */
    225 };
    226 /* sc_flags */
    227 #define RAIDF_INITED	0x01	/* unit has been initialized */
    228 #define RAIDF_WLABEL	0x02	/* label area is writable */
    229 #define RAIDF_LABELLING	0x04	/* unit is currently being labelled */
    230 #define RAIDF_WANTED	0x40	/* someone is waiting to obtain a lock */
    231 #define RAIDF_LOCKED	0x80	/* unit is locked */
    232 
    233 #define	raidunit(x)	DISKUNIT(x)
    234 int numraid = 0;
    235 
    236 /*
    237  * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
    238  * Be aware that large numbers can allow the driver to consume a lot of
    239  * kernel memory, especially on writes, and in degraded mode reads.
    240  *
    241  * For example: with a stripe width of 64 blocks (32k) and 5 disks,
    242  * a single 64K write will typically require 64K for the old data,
    243  * 64K for the old parity, and 64K for the new parity, for a total
    244  * of 192K (if the parity buffer is not re-used immediately).
    245  * Even it if is used immediately, that's still 128K, which when multiplied
    246  * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
    247  *
    248  * Now in degraded mode, for example, a 64K read on the above setup may
    249  * require data reconstruction, which will require *all* of the 4 remaining
    250  * disks to participate -- 4 * 32K/disk == 128K again.
    251  */
    252 
    253 #ifndef RAIDOUTSTANDING
    254 #define RAIDOUTSTANDING   6
    255 #endif
    256 
    257 #define RAIDLABELDEV(dev)	\
    258 	(MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
    259 
    260 /* declared here, and made public, for the benefit of KVM stuff.. */
    261 struct raid_softc *raid_softc;
    262 
    263 static void raidgetdefaultlabel(RF_Raid_t *, struct raid_softc *,
    264 				     struct disklabel *);
    265 static void raidgetdisklabel(dev_t);
    266 static void raidmakedisklabel(struct raid_softc *);
    267 
    268 static int raidlock(struct raid_softc *);
    269 static void raidunlock(struct raid_softc *);
    270 
    271 static void rf_markalldirty(RF_Raid_t *);
    272 void rf_mountroot_hook(struct device *);
    273 
    274 struct device *raidrootdev;
    275 
    276 void rf_ReconThread(struct rf_recon_req *);
    277 /* XXX what I want is: */
    278 /*void rf_ReconThread(RF_Raid_t *raidPtr);  */
    279 void rf_RewriteParityThread(RF_Raid_t *raidPtr);
    280 void rf_CopybackThread(RF_Raid_t *raidPtr);
    281 void rf_ReconstructInPlaceThread(struct rf_recon_req *);
    282 void rf_buildroothack(void *);
    283 
    284 RF_AutoConfig_t *rf_find_raid_components(void);
    285 RF_ConfigSet_t *rf_create_auto_sets(RF_AutoConfig_t *);
    286 static int rf_does_it_fit(RF_ConfigSet_t *,RF_AutoConfig_t *);
    287 static int rf_reasonable_label(RF_ComponentLabel_t *);
    288 void rf_create_configuration(RF_AutoConfig_t *,RF_Config_t *, RF_Raid_t *);
    289 int rf_set_autoconfig(RF_Raid_t *, int);
    290 int rf_set_rootpartition(RF_Raid_t *, int);
    291 void rf_release_all_vps(RF_ConfigSet_t *);
    292 void rf_cleanup_config_set(RF_ConfigSet_t *);
    293 int rf_have_enough_components(RF_ConfigSet_t *);
    294 int rf_auto_config_set(RF_ConfigSet_t *, int *);
    295 
    296 static int raidautoconfig = 0; /* Debugging, mostly.  Set to 0 to not
    297 				  allow autoconfig to take place.
    298 			          Note that this is overridden by having
    299 			          RAID_AUTOCONFIG as an option in the
    300 			          kernel config file.  */
    301 
    302 void
    303 raidattach(num)
    304 	int     num;
    305 {
    306 	int raidID;
    307 	int i, rc;
    308 	RF_AutoConfig_t *ac_list; /* autoconfig list */
    309 	RF_ConfigSet_t *config_sets;
    310 
    311 #ifdef DEBUG
    312 	printf("raidattach: Asked for %d units\n", num);
    313 #endif
    314 
    315 	if (num <= 0) {
    316 #ifdef DIAGNOSTIC
    317 		panic("raidattach: count <= 0");
    318 #endif
    319 		return;
    320 	}
    321 	/* This is where all the initialization stuff gets done. */
    322 
    323 	numraid = num;
    324 
    325 	/* Make some space for requested number of units... */
    326 
    327 	RF_Calloc(raidPtrs, num, sizeof(RF_Raid_t *), (RF_Raid_t **));
    328 	if (raidPtrs == NULL) {
    329 		panic("raidPtrs is NULL!!\n");
    330 	}
    331 
    332 	/* Initialize the component buffer pool. */
    333 	pool_init(&raidframe_cbufpool, sizeof(struct raidbuf), 0,
    334 	    0, 0, "raidpl", NULL);
    335 
    336 	rc = rf_mutex_init(&rf_sparet_wait_mutex);
    337 	if (rc) {
    338 		RF_PANIC();
    339 	}
    340 
    341 	rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
    342 
    343 	for (i = 0; i < num; i++)
    344 		raidPtrs[i] = NULL;
    345 	rc = rf_BootRaidframe();
    346 	if (rc == 0)
    347 		printf("Kernelized RAIDframe activated\n");
    348 	else
    349 		panic("Serious error booting RAID!!\n");
    350 
    351 	/* put together some datastructures like the CCD device does.. This
    352 	 * lets us lock the device and what-not when it gets opened. */
    353 
    354 	raid_softc = (struct raid_softc *)
    355 		malloc(num * sizeof(struct raid_softc),
    356 		       M_RAIDFRAME, M_NOWAIT);
    357 	if (raid_softc == NULL) {
    358 		printf("WARNING: no memory for RAIDframe driver\n");
    359 		return;
    360 	}
    361 
    362 	memset(raid_softc, 0, num * sizeof(struct raid_softc));
    363 
    364 	raidrootdev = (struct device *)malloc(num * sizeof(struct device),
    365 					      M_RAIDFRAME, M_NOWAIT);
    366 	if (raidrootdev == NULL) {
    367 		panic("No memory for RAIDframe driver!!?!?!\n");
    368 	}
    369 
    370 	for (raidID = 0; raidID < num; raidID++) {
    371 		BUFQ_INIT(&raid_softc[raidID].buf_queue);
    372 
    373 		raidrootdev[raidID].dv_class  = DV_DISK;
    374 		raidrootdev[raidID].dv_cfdata = NULL;
    375 		raidrootdev[raidID].dv_unit   = raidID;
    376 		raidrootdev[raidID].dv_parent = NULL;
    377 		raidrootdev[raidID].dv_flags  = 0;
    378 		sprintf(raidrootdev[raidID].dv_xname,"raid%d",raidID);
    379 
    380 		RF_Calloc(raidPtrs[raidID], 1, sizeof(RF_Raid_t),
    381 			  (RF_Raid_t *));
    382 		if (raidPtrs[raidID] == NULL) {
    383 			printf("WARNING: raidPtrs[%d] is NULL\n", raidID);
    384 			numraid = raidID;
    385 			return;
    386 		}
    387 	}
    388 
    389 #ifdef RAID_AUTOCONFIG
    390 	raidautoconfig = 1;
    391 #endif
    392 
    393 if (raidautoconfig) {
    394 	/* 1. locate all RAID components on the system */
    395 
    396 #if DEBUG
    397 	printf("Searching for raid components...\n");
    398 #endif
    399 	ac_list = rf_find_raid_components();
    400 
    401 	/* 2. sort them into their respective sets */
    402 
    403 	config_sets = rf_create_auto_sets(ac_list);
    404 
    405 	/* 3. evaluate each set and configure the valid ones
    406 	   This gets done in rf_buildroothack() */
    407 
    408 	/* schedule the creation of the thread to do the
    409 	   "/ on RAID" stuff */
    410 
    411 	kthread_create(rf_buildroothack,config_sets);
    412 
    413 #if 0
    414 	mountroothook_establish(rf_mountroot_hook, &raidrootdev[0]);
    415 #endif
    416 }
    417 
    418 }
    419 
    420 void
    421 rf_buildroothack(arg)
    422 	void *arg;
    423 {
    424 	RF_ConfigSet_t *config_sets = arg;
    425 	RF_ConfigSet_t *cset;
    426 	RF_ConfigSet_t *next_cset;
    427 	int retcode;
    428 	int raidID;
    429 	int rootID;
    430 	int num_root;
    431 
    432 	rootID = 0;
    433 	num_root = 0;
    434 	cset = config_sets;
    435 	while(cset != NULL ) {
    436 		next_cset = cset->next;
    437 		if (rf_have_enough_components(cset) &&
    438 		    cset->ac->clabel->autoconfigure==1) {
    439 			retcode = rf_auto_config_set(cset,&raidID);
    440 			if (!retcode) {
    441 				if (cset->rootable) {
    442 					rootID = raidID;
    443 					num_root++;
    444 				}
    445 			} else {
    446 				/* The autoconfig didn't work :( */
    447 #if DEBUG
    448 				printf("Autoconfig failed with code %d for raid%d\n", retcode, raidID);
    449 #endif
    450 				rf_release_all_vps(cset);
    451 			}
    452 		} else {
    453 			/* we're not autoconfiguring this set...
    454 			   release the associated resources */
    455 			rf_release_all_vps(cset);
    456 		}
    457 		/* cleanup */
    458 		rf_cleanup_config_set(cset);
    459 		cset = next_cset;
    460 	}
    461 	if (boothowto & RB_ASKNAME) {
    462 		/* We don't auto-config... */
    463 	} else {
    464 		/* They didn't ask, and we found something bootable... */
    465 
    466 		if (num_root == 1) {
    467 			booted_device = &raidrootdev[rootID];
    468 		} else if (num_root > 1) {
    469 			/* we can't guess.. require the user to answer... */
    470 			boothowto |= RB_ASKNAME;
    471 		}
    472 	}
    473 }
    474 
    475 
    476 int
    477 raidsize(dev)
    478 	dev_t   dev;
    479 {
    480 	struct raid_softc *rs;
    481 	struct disklabel *lp;
    482 	int     part, unit, omask, size;
    483 
    484 	unit = raidunit(dev);
    485 	if (unit >= numraid)
    486 		return (-1);
    487 	rs = &raid_softc[unit];
    488 
    489 	if ((rs->sc_flags & RAIDF_INITED) == 0)
    490 		return (-1);
    491 
    492 	part = DISKPART(dev);
    493 	omask = rs->sc_dkdev.dk_openmask & (1 << part);
    494 	lp = rs->sc_dkdev.dk_label;
    495 
    496 	if (omask == 0 && raidopen(dev, 0, S_IFBLK, curproc))
    497 		return (-1);
    498 
    499 	if (lp->d_partitions[part].p_fstype != FS_SWAP)
    500 		size = -1;
    501 	else
    502 		size = lp->d_partitions[part].p_size *
    503 		    (lp->d_secsize / DEV_BSIZE);
    504 
    505 	if (omask == 0 && raidclose(dev, 0, S_IFBLK, curproc))
    506 		return (-1);
    507 
    508 	return (size);
    509 
    510 }
    511 
    512 int
    513 raiddump(dev, blkno, va, size)
    514 	dev_t   dev;
    515 	daddr_t blkno;
    516 	caddr_t va;
    517 	size_t  size;
    518 {
    519 	/* Not implemented. */
    520 	return ENXIO;
    521 }
    522 /* ARGSUSED */
    523 int
    524 raidopen(dev, flags, fmt, p)
    525 	dev_t   dev;
    526 	int     flags, fmt;
    527 	struct proc *p;
    528 {
    529 	int     unit = raidunit(dev);
    530 	struct raid_softc *rs;
    531 	struct disklabel *lp;
    532 	int     part, pmask;
    533 	int     error = 0;
    534 
    535 	if (unit >= numraid)
    536 		return (ENXIO);
    537 	rs = &raid_softc[unit];
    538 
    539 	if ((error = raidlock(rs)) != 0)
    540 		return (error);
    541 	lp = rs->sc_dkdev.dk_label;
    542 
    543 	part = DISKPART(dev);
    544 	pmask = (1 << part);
    545 
    546 	db1_printf(("Opening raid device number: %d partition: %d\n",
    547 		unit, part));
    548 
    549 
    550 	if ((rs->sc_flags & RAIDF_INITED) &&
    551 	    (rs->sc_dkdev.dk_openmask == 0))
    552 		raidgetdisklabel(dev);
    553 
    554 	/* make sure that this partition exists */
    555 
    556 	if (part != RAW_PART) {
    557 		db1_printf(("Not a raw partition..\n"));
    558 		if (((rs->sc_flags & RAIDF_INITED) == 0) ||
    559 		    ((part >= lp->d_npartitions) ||
    560 			(lp->d_partitions[part].p_fstype == FS_UNUSED))) {
    561 			error = ENXIO;
    562 			raidunlock(rs);
    563 			db1_printf(("Bailing out...\n"));
    564 			return (error);
    565 		}
    566 	}
    567 	/* Prevent this unit from being unconfigured while open. */
    568 	switch (fmt) {
    569 	case S_IFCHR:
    570 		rs->sc_dkdev.dk_copenmask |= pmask;
    571 		break;
    572 
    573 	case S_IFBLK:
    574 		rs->sc_dkdev.dk_bopenmask |= pmask;
    575 		break;
    576 	}
    577 
    578 	if ((rs->sc_dkdev.dk_openmask == 0) &&
    579 	    ((rs->sc_flags & RAIDF_INITED) != 0)) {
    580 		/* First one... mark things as dirty... Note that we *MUST*
    581 		 have done a configure before this.  I DO NOT WANT TO BE
    582 		 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
    583 		 THAT THEY BELONG TOGETHER!!!!! */
    584 		/* XXX should check to see if we're only open for reading
    585 		   here... If so, we needn't do this, but then need some
    586 		   other way of keeping track of what's happened.. */
    587 
    588 		rf_markalldirty( raidPtrs[unit] );
    589 	}
    590 
    591 
    592 	rs->sc_dkdev.dk_openmask =
    593 	    rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
    594 
    595 	raidunlock(rs);
    596 
    597 	return (error);
    598 
    599 
    600 }
    601 /* ARGSUSED */
    602 int
    603 raidclose(dev, flags, fmt, p)
    604 	dev_t   dev;
    605 	int     flags, fmt;
    606 	struct proc *p;
    607 {
    608 	int     unit = raidunit(dev);
    609 	struct raid_softc *rs;
    610 	int     error = 0;
    611 	int     part;
    612 
    613 	if (unit >= numraid)
    614 		return (ENXIO);
    615 	rs = &raid_softc[unit];
    616 
    617 	if ((error = raidlock(rs)) != 0)
    618 		return (error);
    619 
    620 	part = DISKPART(dev);
    621 
    622 	/* ...that much closer to allowing unconfiguration... */
    623 	switch (fmt) {
    624 	case S_IFCHR:
    625 		rs->sc_dkdev.dk_copenmask &= ~(1 << part);
    626 		break;
    627 
    628 	case S_IFBLK:
    629 		rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
    630 		break;
    631 	}
    632 	rs->sc_dkdev.dk_openmask =
    633 	    rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
    634 
    635 	if ((rs->sc_dkdev.dk_openmask == 0) &&
    636 	    ((rs->sc_flags & RAIDF_INITED) != 0)) {
    637 		/* Last one... device is not unconfigured yet.
    638 		   Device shutdown has taken care of setting the
    639 		   clean bits if RAIDF_INITED is not set
    640 		   mark things as clean... */
    641 #if 0
    642 		printf("Last one on raid%d.  Updating status.\n",unit);
    643 #endif
    644 		rf_update_component_labels(raidPtrs[unit],
    645 						 RF_FINAL_COMPONENT_UPDATE);
    646 		if (doing_shutdown) {
    647 			/* last one, and we're going down, so
    648 			   lights out for this RAID set too. */
    649 			error = rf_Shutdown(raidPtrs[unit]);
    650 
    651 			/* It's no longer initialized... */
    652 			rs->sc_flags &= ~RAIDF_INITED;
    653 
    654 			/* Detach the disk. */
    655 			disk_detach(&rs->sc_dkdev);
    656 		}
    657 	}
    658 
    659 	raidunlock(rs);
    660 	return (0);
    661 
    662 }
    663 
    664 void
    665 raidstrategy(bp)
    666 	struct buf *bp;
    667 {
    668 	int s;
    669 
    670 	unsigned int raidID = raidunit(bp->b_dev);
    671 	RF_Raid_t *raidPtr;
    672 	struct raid_softc *rs = &raid_softc[raidID];
    673 	struct disklabel *lp;
    674 	int     wlabel;
    675 
    676 	if ((rs->sc_flags & RAIDF_INITED) ==0) {
    677 		bp->b_error = ENXIO;
    678 		bp->b_flags |= B_ERROR;
    679 		bp->b_resid = bp->b_bcount;
    680 		biodone(bp);
    681 		return;
    682 	}
    683 	if (raidID >= numraid || !raidPtrs[raidID]) {
    684 		bp->b_error = ENODEV;
    685 		bp->b_flags |= B_ERROR;
    686 		bp->b_resid = bp->b_bcount;
    687 		biodone(bp);
    688 		return;
    689 	}
    690 	raidPtr = raidPtrs[raidID];
    691 	if (!raidPtr->valid) {
    692 		bp->b_error = ENODEV;
    693 		bp->b_flags |= B_ERROR;
    694 		bp->b_resid = bp->b_bcount;
    695 		biodone(bp);
    696 		return;
    697 	}
    698 	if (bp->b_bcount == 0) {
    699 		db1_printf(("b_bcount is zero..\n"));
    700 		biodone(bp);
    701 		return;
    702 	}
    703 	lp = rs->sc_dkdev.dk_label;
    704 
    705 	/*
    706 	 * Do bounds checking and adjust transfer.  If there's an
    707 	 * error, the bounds check will flag that for us.
    708 	 */
    709 
    710 	wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
    711 	if (DISKPART(bp->b_dev) != RAW_PART)
    712 		if (bounds_check_with_label(bp, lp, wlabel) <= 0) {
    713 			db1_printf(("Bounds check failed!!:%d %d\n",
    714 				(int) bp->b_blkno, (int) wlabel));
    715 			biodone(bp);
    716 			return;
    717 		}
    718 	s = splbio();
    719 
    720 	bp->b_resid = 0;
    721 
    722 	/* stuff it onto our queue */
    723 	BUFQ_INSERT_TAIL(&rs->buf_queue, bp);
    724 
    725 	raidstart(raidPtrs[raidID]);
    726 
    727 	splx(s);
    728 }
    729 /* ARGSUSED */
    730 int
    731 raidread(dev, uio, flags)
    732 	dev_t   dev;
    733 	struct uio *uio;
    734 	int     flags;
    735 {
    736 	int     unit = raidunit(dev);
    737 	struct raid_softc *rs;
    738 	int     part;
    739 
    740 	if (unit >= numraid)
    741 		return (ENXIO);
    742 	rs = &raid_softc[unit];
    743 
    744 	if ((rs->sc_flags & RAIDF_INITED) == 0)
    745 		return (ENXIO);
    746 	part = DISKPART(dev);
    747 
    748 	db1_printf(("raidread: unit: %d partition: %d\n", unit, part));
    749 
    750 	return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
    751 
    752 }
    753 /* ARGSUSED */
    754 int
    755 raidwrite(dev, uio, flags)
    756 	dev_t   dev;
    757 	struct uio *uio;
    758 	int     flags;
    759 {
    760 	int     unit = raidunit(dev);
    761 	struct raid_softc *rs;
    762 
    763 	if (unit >= numraid)
    764 		return (ENXIO);
    765 	rs = &raid_softc[unit];
    766 
    767 	if ((rs->sc_flags & RAIDF_INITED) == 0)
    768 		return (ENXIO);
    769 	db1_printf(("raidwrite\n"));
    770 	return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
    771 
    772 }
    773 
    774 int
    775 raidioctl(dev, cmd, data, flag, p)
    776 	dev_t   dev;
    777 	u_long  cmd;
    778 	caddr_t data;
    779 	int     flag;
    780 	struct proc *p;
    781 {
    782 	int     unit = raidunit(dev);
    783 	int     error = 0;
    784 	int     part, pmask;
    785 	struct raid_softc *rs;
    786 	RF_Config_t *k_cfg, *u_cfg;
    787 	RF_Raid_t *raidPtr;
    788 	RF_RaidDisk_t *diskPtr;
    789 	RF_AccTotals_t *totals;
    790 	RF_DeviceConfig_t *d_cfg, **ucfgp;
    791 	u_char *specific_buf;
    792 	int retcode = 0;
    793 	int row;
    794 	int column;
    795 	struct rf_recon_req *rrcopy, *rr;
    796 	RF_ComponentLabel_t *clabel;
    797 	RF_ComponentLabel_t ci_label;
    798 	RF_ComponentLabel_t **clabel_ptr;
    799 	RF_SingleComponent_t *sparePtr,*componentPtr;
    800 	RF_SingleComponent_t hot_spare;
    801 	RF_SingleComponent_t component;
    802 	RF_ProgressInfo_t progressInfo, **progressInfoPtr;
    803 	int i, j, d;
    804 #ifdef __HAVE_OLD_DISKLABEL
    805 	struct disklabel newlabel;
    806 #endif
    807 
    808 	if (unit >= numraid)
    809 		return (ENXIO);
    810 	rs = &raid_softc[unit];
    811 	raidPtr = raidPtrs[unit];
    812 
    813 	db1_printf(("raidioctl: %d %d %d %d\n", (int) dev,
    814 		(int) DISKPART(dev), (int) unit, (int) cmd));
    815 
    816 	/* Must be open for writes for these commands... */
    817 	switch (cmd) {
    818 	case DIOCSDINFO:
    819 	case DIOCWDINFO:
    820 #ifdef __HAVE_OLD_DISKLABEL
    821 	case ODIOCWDINFO:
    822 	case ODIOCSDINFO:
    823 #endif
    824 	case DIOCWLABEL:
    825 		if ((flag & FWRITE) == 0)
    826 			return (EBADF);
    827 	}
    828 
    829 	/* Must be initialized for these... */
    830 	switch (cmd) {
    831 	case DIOCGDINFO:
    832 	case DIOCSDINFO:
    833 	case DIOCWDINFO:
    834 #ifdef __HAVE_OLD_DISKLABEL
    835 	case ODIOCGDINFO:
    836 	case ODIOCWDINFO:
    837 	case ODIOCSDINFO:
    838 	case ODIOCGDEFLABEL:
    839 #endif
    840 	case DIOCGPART:
    841 	case DIOCWLABEL:
    842 	case DIOCGDEFLABEL:
    843 	case RAIDFRAME_SHUTDOWN:
    844 	case RAIDFRAME_REWRITEPARITY:
    845 	case RAIDFRAME_GET_INFO:
    846 	case RAIDFRAME_RESET_ACCTOTALS:
    847 	case RAIDFRAME_GET_ACCTOTALS:
    848 	case RAIDFRAME_KEEP_ACCTOTALS:
    849 	case RAIDFRAME_GET_SIZE:
    850 	case RAIDFRAME_FAIL_DISK:
    851 	case RAIDFRAME_COPYBACK:
    852 	case RAIDFRAME_CHECK_RECON_STATUS:
    853 	case RAIDFRAME_CHECK_RECON_STATUS_EXT:
    854 	case RAIDFRAME_GET_COMPONENT_LABEL:
    855 	case RAIDFRAME_SET_COMPONENT_LABEL:
    856 	case RAIDFRAME_ADD_HOT_SPARE:
    857 	case RAIDFRAME_REMOVE_HOT_SPARE:
    858 	case RAIDFRAME_INIT_LABELS:
    859 	case RAIDFRAME_REBUILD_IN_PLACE:
    860 	case RAIDFRAME_CHECK_PARITY:
    861 	case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
    862 	case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
    863 	case RAIDFRAME_CHECK_COPYBACK_STATUS:
    864 	case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
    865 	case RAIDFRAME_SET_AUTOCONFIG:
    866 	case RAIDFRAME_SET_ROOT:
    867 	case RAIDFRAME_DELETE_COMPONENT:
    868 	case RAIDFRAME_INCORPORATE_HOT_SPARE:
    869 		if ((rs->sc_flags & RAIDF_INITED) == 0)
    870 			return (ENXIO);
    871 	}
    872 
    873 	switch (cmd) {
    874 
    875 		/* configure the system */
    876 	case RAIDFRAME_CONFIGURE:
    877 
    878 		if (raidPtr->valid) {
    879 			/* There is a valid RAID set running on this unit! */
    880 			printf("raid%d: Device already configured!\n",unit);
    881 			return(EINVAL);
    882 		}
    883 
    884 		/* copy-in the configuration information */
    885 		/* data points to a pointer to the configuration structure */
    886 
    887 		u_cfg = *((RF_Config_t **) data);
    888 		RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
    889 		if (k_cfg == NULL) {
    890 			return (ENOMEM);
    891 		}
    892 		retcode = copyin((caddr_t) u_cfg, (caddr_t) k_cfg,
    893 		    sizeof(RF_Config_t));
    894 		if (retcode) {
    895 			RF_Free(k_cfg, sizeof(RF_Config_t));
    896 			db1_printf(("rf_ioctl: retcode=%d copyin.1\n",
    897 				retcode));
    898 			return (retcode);
    899 		}
    900 		/* allocate a buffer for the layout-specific data, and copy it
    901 		 * in */
    902 		if (k_cfg->layoutSpecificSize) {
    903 			if (k_cfg->layoutSpecificSize > 10000) {
    904 				/* sanity check */
    905 				RF_Free(k_cfg, sizeof(RF_Config_t));
    906 				return (EINVAL);
    907 			}
    908 			RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
    909 			    (u_char *));
    910 			if (specific_buf == NULL) {
    911 				RF_Free(k_cfg, sizeof(RF_Config_t));
    912 				return (ENOMEM);
    913 			}
    914 			retcode = copyin(k_cfg->layoutSpecific,
    915 			    (caddr_t) specific_buf,
    916 			    k_cfg->layoutSpecificSize);
    917 			if (retcode) {
    918 				RF_Free(k_cfg, sizeof(RF_Config_t));
    919 				RF_Free(specific_buf,
    920 					k_cfg->layoutSpecificSize);
    921 				db1_printf(("rf_ioctl: retcode=%d copyin.2\n",
    922 					retcode));
    923 				return (retcode);
    924 			}
    925 		} else
    926 			specific_buf = NULL;
    927 		k_cfg->layoutSpecific = specific_buf;
    928 
    929 		/* should do some kind of sanity check on the configuration.
    930 		 * Store the sum of all the bytes in the last byte? */
    931 
    932 		/* configure the system */
    933 
    934 		/*
    935 		 * Clear the entire RAID descriptor, just to make sure
    936 		 *  there is no stale data left in the case of a
    937 		 *  reconfiguration
    938 		 */
    939 		memset((char *) raidPtr, 0, sizeof(RF_Raid_t));
    940 		raidPtr->raidid = unit;
    941 
    942 		retcode = rf_Configure(raidPtr, k_cfg, NULL);
    943 
    944 		if (retcode == 0) {
    945 
    946 			/* allow this many simultaneous IO's to
    947 			   this RAID device */
    948 			raidPtr->openings = RAIDOUTSTANDING;
    949 
    950 			raidinit(raidPtr);
    951 			rf_markalldirty(raidPtr);
    952 		}
    953 		/* free the buffers.  No return code here. */
    954 		if (k_cfg->layoutSpecificSize) {
    955 			RF_Free(specific_buf, k_cfg->layoutSpecificSize);
    956 		}
    957 		RF_Free(k_cfg, sizeof(RF_Config_t));
    958 
    959 		return (retcode);
    960 
    961 		/* shutdown the system */
    962 	case RAIDFRAME_SHUTDOWN:
    963 
    964 		if ((error = raidlock(rs)) != 0)
    965 			return (error);
    966 
    967 		/*
    968 		 * If somebody has a partition mounted, we shouldn't
    969 		 * shutdown.
    970 		 */
    971 
    972 		part = DISKPART(dev);
    973 		pmask = (1 << part);
    974 		if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
    975 		    ((rs->sc_dkdev.dk_bopenmask & pmask) &&
    976 			(rs->sc_dkdev.dk_copenmask & pmask))) {
    977 			raidunlock(rs);
    978 			return (EBUSY);
    979 		}
    980 
    981 		retcode = rf_Shutdown(raidPtr);
    982 
    983 		/* It's no longer initialized... */
    984 		rs->sc_flags &= ~RAIDF_INITED;
    985 
    986 		/* Detach the disk. */
    987 		disk_detach(&rs->sc_dkdev);
    988 
    989 		raidunlock(rs);
    990 
    991 		return (retcode);
    992 	case RAIDFRAME_GET_COMPONENT_LABEL:
    993 		clabel_ptr = (RF_ComponentLabel_t **) data;
    994 		/* need to read the component label for the disk indicated
    995 		   by row,column in clabel */
    996 
    997 		/* For practice, let's get it directly fromdisk, rather
    998 		   than from the in-core copy */
    999 		RF_Malloc( clabel, sizeof( RF_ComponentLabel_t ),
   1000 			   (RF_ComponentLabel_t *));
   1001 		if (clabel == NULL)
   1002 			return (ENOMEM);
   1003 
   1004 		memset((char *) clabel, 0, sizeof(RF_ComponentLabel_t));
   1005 
   1006 		retcode = copyin( *clabel_ptr, clabel,
   1007 				  sizeof(RF_ComponentLabel_t));
   1008 
   1009 		if (retcode) {
   1010 			RF_Free( clabel, sizeof(RF_ComponentLabel_t));
   1011 			return(retcode);
   1012 		}
   1013 
   1014 		row = clabel->row;
   1015 		column = clabel->column;
   1016 
   1017 		if ((row < 0) || (row >= raidPtr->numRow) ||
   1018 		    (column < 0) || (column >= raidPtr->numCol +
   1019 				     raidPtr->numSpare)) {
   1020 			RF_Free( clabel, sizeof(RF_ComponentLabel_t));
   1021 			return(EINVAL);
   1022 		}
   1023 
   1024 		raidread_component_label(raidPtr->Disks[row][column].dev,
   1025 				raidPtr->raid_cinfo[row][column].ci_vp,
   1026 				clabel );
   1027 
   1028 		retcode = copyout((caddr_t) clabel,
   1029 				  (caddr_t) *clabel_ptr,
   1030 				  sizeof(RF_ComponentLabel_t));
   1031 		RF_Free( clabel, sizeof(RF_ComponentLabel_t));
   1032 		return (retcode);
   1033 
   1034 	case RAIDFRAME_SET_COMPONENT_LABEL:
   1035 		clabel = (RF_ComponentLabel_t *) data;
   1036 
   1037 		/* XXX check the label for valid stuff... */
   1038 		/* Note that some things *should not* get modified --
   1039 		   the user should be re-initing the labels instead of
   1040 		   trying to patch things.
   1041 		   */
   1042 
   1043 		printf("Got component label:\n");
   1044 		printf("Version: %d\n",clabel->version);
   1045 		printf("Serial Number: %d\n",clabel->serial_number);
   1046 		printf("Mod counter: %d\n",clabel->mod_counter);
   1047 		printf("Row: %d\n", clabel->row);
   1048 		printf("Column: %d\n", clabel->column);
   1049 		printf("Num Rows: %d\n", clabel->num_rows);
   1050 		printf("Num Columns: %d\n", clabel->num_columns);
   1051 		printf("Clean: %d\n", clabel->clean);
   1052 		printf("Status: %d\n", clabel->status);
   1053 
   1054 		row = clabel->row;
   1055 		column = clabel->column;
   1056 
   1057 		if ((row < 0) || (row >= raidPtr->numRow) ||
   1058 		    (column < 0) || (column >= raidPtr->numCol)) {
   1059 			return(EINVAL);
   1060 		}
   1061 
   1062 		/* XXX this isn't allowed to do anything for now :-) */
   1063 
   1064 		/* XXX and before it is, we need to fill in the rest
   1065 		   of the fields!?!?!?! */
   1066 #if 0
   1067 		raidwrite_component_label(
   1068                             raidPtr->Disks[row][column].dev,
   1069 			    raidPtr->raid_cinfo[row][column].ci_vp,
   1070 			    clabel );
   1071 #endif
   1072 		return (0);
   1073 
   1074 	case RAIDFRAME_INIT_LABELS:
   1075 		clabel = (RF_ComponentLabel_t *) data;
   1076 		/*
   1077 		   we only want the serial number from
   1078 		   the above.  We get all the rest of the information
   1079 		   from the config that was used to create this RAID
   1080 		   set.
   1081 		   */
   1082 
   1083 		raidPtr->serial_number = clabel->serial_number;
   1084 
   1085 		raid_init_component_label(raidPtr, &ci_label);
   1086 		ci_label.serial_number = clabel->serial_number;
   1087 
   1088 		for(row=0;row<raidPtr->numRow;row++) {
   1089 			ci_label.row = row;
   1090 			for(column=0;column<raidPtr->numCol;column++) {
   1091 				diskPtr = &raidPtr->Disks[row][column];
   1092 				if (!RF_DEAD_DISK(diskPtr->status)) {
   1093 					ci_label.partitionSize = diskPtr->partitionSize;
   1094 					ci_label.column = column;
   1095 					raidwrite_component_label(
   1096 					  raidPtr->Disks[row][column].dev,
   1097 					  raidPtr->raid_cinfo[row][column].ci_vp,
   1098 					  &ci_label );
   1099 				}
   1100 			}
   1101 		}
   1102 
   1103 		return (retcode);
   1104 	case RAIDFRAME_SET_AUTOCONFIG:
   1105 		d = rf_set_autoconfig(raidPtr, *(int *) data);
   1106 		printf("New autoconfig value is: %d\n", d);
   1107 		*(int *) data = d;
   1108 		return (retcode);
   1109 
   1110 	case RAIDFRAME_SET_ROOT:
   1111 		d = rf_set_rootpartition(raidPtr, *(int *) data);
   1112 		printf("New rootpartition value is: %d\n", d);
   1113 		*(int *) data = d;
   1114 		return (retcode);
   1115 
   1116 		/* initialize all parity */
   1117 	case RAIDFRAME_REWRITEPARITY:
   1118 
   1119 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1120 			/* Parity for RAID 0 is trivially correct */
   1121 			raidPtr->parity_good = RF_RAID_CLEAN;
   1122 			return(0);
   1123 		}
   1124 
   1125 		if (raidPtr->parity_rewrite_in_progress == 1) {
   1126 			/* Re-write is already in progress! */
   1127 			return(EINVAL);
   1128 		}
   1129 
   1130 		retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
   1131 					   rf_RewriteParityThread,
   1132 					   raidPtr,"raid_parity");
   1133 		return (retcode);
   1134 
   1135 
   1136 	case RAIDFRAME_ADD_HOT_SPARE:
   1137 		sparePtr = (RF_SingleComponent_t *) data;
   1138 		memcpy( &hot_spare, sparePtr, sizeof(RF_SingleComponent_t));
   1139 		retcode = rf_add_hot_spare(raidPtr, &hot_spare);
   1140 		return(retcode);
   1141 
   1142 	case RAIDFRAME_REMOVE_HOT_SPARE:
   1143 		return(retcode);
   1144 
   1145 	case RAIDFRAME_DELETE_COMPONENT:
   1146 		componentPtr = (RF_SingleComponent_t *)data;
   1147 		memcpy( &component, componentPtr,
   1148 			sizeof(RF_SingleComponent_t));
   1149 		retcode = rf_delete_component(raidPtr, &component);
   1150 		return(retcode);
   1151 
   1152 	case RAIDFRAME_INCORPORATE_HOT_SPARE:
   1153 		componentPtr = (RF_SingleComponent_t *)data;
   1154 		memcpy( &component, componentPtr,
   1155 			sizeof(RF_SingleComponent_t));
   1156 		retcode = rf_incorporate_hot_spare(raidPtr, &component);
   1157 		return(retcode);
   1158 
   1159 	case RAIDFRAME_REBUILD_IN_PLACE:
   1160 
   1161 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1162 			/* Can't do this on a RAID 0!! */
   1163 			return(EINVAL);
   1164 		}
   1165 
   1166 		if (raidPtr->recon_in_progress == 1) {
   1167 			/* a reconstruct is already in progress! */
   1168 			return(EINVAL);
   1169 		}
   1170 
   1171 		componentPtr = (RF_SingleComponent_t *) data;
   1172 		memcpy( &component, componentPtr,
   1173 			sizeof(RF_SingleComponent_t));
   1174 		row = component.row;
   1175 		column = component.column;
   1176 		printf("Rebuild: %d %d\n",row, column);
   1177 		if ((row < 0) || (row >= raidPtr->numRow) ||
   1178 		    (column < 0) || (column >= raidPtr->numCol)) {
   1179 			return(EINVAL);
   1180 		}
   1181 
   1182 		RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
   1183 		if (rrcopy == NULL)
   1184 			return(ENOMEM);
   1185 
   1186 		rrcopy->raidPtr = (void *) raidPtr;
   1187 		rrcopy->row = row;
   1188 		rrcopy->col = column;
   1189 
   1190 		retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
   1191 					   rf_ReconstructInPlaceThread,
   1192 					   rrcopy,"raid_reconip");
   1193 		return(retcode);
   1194 
   1195 	case RAIDFRAME_GET_INFO:
   1196 		if (!raidPtr->valid)
   1197 			return (ENODEV);
   1198 		ucfgp = (RF_DeviceConfig_t **) data;
   1199 		RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t),
   1200 			  (RF_DeviceConfig_t *));
   1201 		if (d_cfg == NULL)
   1202 			return (ENOMEM);
   1203 		memset((char *) d_cfg, 0, sizeof(RF_DeviceConfig_t));
   1204 		d_cfg->rows = raidPtr->numRow;
   1205 		d_cfg->cols = raidPtr->numCol;
   1206 		d_cfg->ndevs = raidPtr->numRow * raidPtr->numCol;
   1207 		if (d_cfg->ndevs >= RF_MAX_DISKS) {
   1208 			RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
   1209 			return (ENOMEM);
   1210 		}
   1211 		d_cfg->nspares = raidPtr->numSpare;
   1212 		if (d_cfg->nspares >= RF_MAX_DISKS) {
   1213 			RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
   1214 			return (ENOMEM);
   1215 		}
   1216 		d_cfg->maxqdepth = raidPtr->maxQueueDepth;
   1217 		d = 0;
   1218 		for (i = 0; i < d_cfg->rows; i++) {
   1219 			for (j = 0; j < d_cfg->cols; j++) {
   1220 				d_cfg->devs[d] = raidPtr->Disks[i][j];
   1221 				d++;
   1222 			}
   1223 		}
   1224 		for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) {
   1225 			d_cfg->spares[i] = raidPtr->Disks[0][j];
   1226 		}
   1227 		retcode = copyout((caddr_t) d_cfg, (caddr_t) * ucfgp,
   1228 				  sizeof(RF_DeviceConfig_t));
   1229 		RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
   1230 
   1231 		return (retcode);
   1232 
   1233 	case RAIDFRAME_CHECK_PARITY:
   1234 		*(int *) data = raidPtr->parity_good;
   1235 		return (0);
   1236 
   1237 	case RAIDFRAME_RESET_ACCTOTALS:
   1238 		memset(&raidPtr->acc_totals, 0, sizeof(raidPtr->acc_totals));
   1239 		return (0);
   1240 
   1241 	case RAIDFRAME_GET_ACCTOTALS:
   1242 		totals = (RF_AccTotals_t *) data;
   1243 		*totals = raidPtr->acc_totals;
   1244 		return (0);
   1245 
   1246 	case RAIDFRAME_KEEP_ACCTOTALS:
   1247 		raidPtr->keep_acc_totals = *(int *)data;
   1248 		return (0);
   1249 
   1250 	case RAIDFRAME_GET_SIZE:
   1251 		*(int *) data = raidPtr->totalSectors;
   1252 		return (0);
   1253 
   1254 		/* fail a disk & optionally start reconstruction */
   1255 	case RAIDFRAME_FAIL_DISK:
   1256 
   1257 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1258 			/* Can't do this on a RAID 0!! */
   1259 			return(EINVAL);
   1260 		}
   1261 
   1262 		rr = (struct rf_recon_req *) data;
   1263 
   1264 		if (rr->row < 0 || rr->row >= raidPtr->numRow
   1265 		    || rr->col < 0 || rr->col >= raidPtr->numCol)
   1266 			return (EINVAL);
   1267 
   1268 		printf("raid%d: Failing the disk: row: %d col: %d\n",
   1269 		       unit, rr->row, rr->col);
   1270 
   1271 		/* make a copy of the recon request so that we don't rely on
   1272 		 * the user's buffer */
   1273 		RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
   1274 		if (rrcopy == NULL)
   1275 			return(ENOMEM);
   1276 		memcpy(rrcopy, rr, sizeof(*rr));
   1277 		rrcopy->raidPtr = (void *) raidPtr;
   1278 
   1279 		retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
   1280 					   rf_ReconThread,
   1281 					   rrcopy,"raid_recon");
   1282 		return (0);
   1283 
   1284 		/* invoke a copyback operation after recon on whatever disk
   1285 		 * needs it, if any */
   1286 	case RAIDFRAME_COPYBACK:
   1287 
   1288 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1289 			/* This makes no sense on a RAID 0!! */
   1290 			return(EINVAL);
   1291 		}
   1292 
   1293 		if (raidPtr->copyback_in_progress == 1) {
   1294 			/* Copyback is already in progress! */
   1295 			return(EINVAL);
   1296 		}
   1297 
   1298 		retcode = RF_CREATE_THREAD(raidPtr->copyback_thread,
   1299 					   rf_CopybackThread,
   1300 					   raidPtr,"raid_copyback");
   1301 		return (retcode);
   1302 
   1303 		/* return the percentage completion of reconstruction */
   1304 	case RAIDFRAME_CHECK_RECON_STATUS:
   1305 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1306 			/* This makes no sense on a RAID 0, so tell the
   1307 			   user it's done. */
   1308 			*(int *) data = 100;
   1309 			return(0);
   1310 		}
   1311 		row = 0; /* XXX we only consider a single row... */
   1312 		if (raidPtr->status[row] != rf_rs_reconstructing)
   1313 			*(int *) data = 100;
   1314 		else
   1315 			*(int *) data = raidPtr->reconControl[row]->percentComplete;
   1316 		return (0);
   1317 	case RAIDFRAME_CHECK_RECON_STATUS_EXT:
   1318 		progressInfoPtr = (RF_ProgressInfo_t **) data;
   1319 		row = 0; /* XXX we only consider a single row... */
   1320 		if (raidPtr->status[row] != rf_rs_reconstructing) {
   1321 			progressInfo.remaining = 0;
   1322 			progressInfo.completed = 100;
   1323 			progressInfo.total = 100;
   1324 		} else {
   1325 			progressInfo.total =
   1326 				raidPtr->reconControl[row]->numRUsTotal;
   1327 			progressInfo.completed =
   1328 				raidPtr->reconControl[row]->numRUsComplete;
   1329 			progressInfo.remaining = progressInfo.total -
   1330 				progressInfo.completed;
   1331 		}
   1332 		retcode = copyout((caddr_t) &progressInfo,
   1333 				  (caddr_t) *progressInfoPtr,
   1334 				  sizeof(RF_ProgressInfo_t));
   1335 		return (retcode);
   1336 
   1337 	case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
   1338 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1339 			/* This makes no sense on a RAID 0, so tell the
   1340 			   user it's done. */
   1341 			*(int *) data = 100;
   1342 			return(0);
   1343 		}
   1344 		if (raidPtr->parity_rewrite_in_progress == 1) {
   1345 			*(int *) data = 100 *
   1346 				raidPtr->parity_rewrite_stripes_done /
   1347 				raidPtr->Layout.numStripe;
   1348 		} else {
   1349 			*(int *) data = 100;
   1350 		}
   1351 		return (0);
   1352 
   1353 	case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
   1354 		progressInfoPtr = (RF_ProgressInfo_t **) data;
   1355 		if (raidPtr->parity_rewrite_in_progress == 1) {
   1356 			progressInfo.total = raidPtr->Layout.numStripe;
   1357 			progressInfo.completed =
   1358 				raidPtr->parity_rewrite_stripes_done;
   1359 			progressInfo.remaining = progressInfo.total -
   1360 				progressInfo.completed;
   1361 		} else {
   1362 			progressInfo.remaining = 0;
   1363 			progressInfo.completed = 100;
   1364 			progressInfo.total = 100;
   1365 		}
   1366 		retcode = copyout((caddr_t) &progressInfo,
   1367 				  (caddr_t) *progressInfoPtr,
   1368 				  sizeof(RF_ProgressInfo_t));
   1369 		return (retcode);
   1370 
   1371 	case RAIDFRAME_CHECK_COPYBACK_STATUS:
   1372 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1373 			/* This makes no sense on a RAID 0 */
   1374 			*(int *) data = 100;
   1375 			return(0);
   1376 		}
   1377 		if (raidPtr->copyback_in_progress == 1) {
   1378 			*(int *) data = 100 * raidPtr->copyback_stripes_done /
   1379 				raidPtr->Layout.numStripe;
   1380 		} else {
   1381 			*(int *) data = 100;
   1382 		}
   1383 		return (0);
   1384 
   1385 	case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
   1386 		progressInfoPtr = (RF_ProgressInfo_t **) data;
   1387 		if (raidPtr->copyback_in_progress == 1) {
   1388 			progressInfo.total = raidPtr->Layout.numStripe;
   1389 			progressInfo.completed =
   1390 				raidPtr->copyback_stripes_done;
   1391 			progressInfo.remaining = progressInfo.total -
   1392 				progressInfo.completed;
   1393 		} else {
   1394 			progressInfo.remaining = 0;
   1395 			progressInfo.completed = 100;
   1396 			progressInfo.total = 100;
   1397 		}
   1398 		retcode = copyout((caddr_t) &progressInfo,
   1399 				  (caddr_t) *progressInfoPtr,
   1400 				  sizeof(RF_ProgressInfo_t));
   1401 		return (retcode);
   1402 
   1403 		/* the sparetable daemon calls this to wait for the kernel to
   1404 		 * need a spare table. this ioctl does not return until a
   1405 		 * spare table is needed. XXX -- calling mpsleep here in the
   1406 		 * ioctl code is almost certainly wrong and evil. -- XXX XXX
   1407 		 * -- I should either compute the spare table in the kernel,
   1408 		 * or have a different -- XXX XXX -- interface (a different
   1409 		 * character device) for delivering the table     -- XXX */
   1410 #if 0
   1411 	case RAIDFRAME_SPARET_WAIT:
   1412 		RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1413 		while (!rf_sparet_wait_queue)
   1414 			mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE);
   1415 		waitreq = rf_sparet_wait_queue;
   1416 		rf_sparet_wait_queue = rf_sparet_wait_queue->next;
   1417 		RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1418 
   1419 		/* structure assignment */
   1420 		*((RF_SparetWait_t *) data) = *waitreq;
   1421 
   1422 		RF_Free(waitreq, sizeof(*waitreq));
   1423 		return (0);
   1424 
   1425 		/* wakes up a process waiting on SPARET_WAIT and puts an error
   1426 		 * code in it that will cause the dameon to exit */
   1427 	case RAIDFRAME_ABORT_SPARET_WAIT:
   1428 		RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
   1429 		waitreq->fcol = -1;
   1430 		RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1431 		waitreq->next = rf_sparet_wait_queue;
   1432 		rf_sparet_wait_queue = waitreq;
   1433 		RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1434 		wakeup(&rf_sparet_wait_queue);
   1435 		return (0);
   1436 
   1437 		/* used by the spare table daemon to deliver a spare table
   1438 		 * into the kernel */
   1439 	case RAIDFRAME_SEND_SPARET:
   1440 
   1441 		/* install the spare table */
   1442 		retcode = rf_SetSpareTable(raidPtr, *(void **) data);
   1443 
   1444 		/* respond to the requestor.  the return status of the spare
   1445 		 * table installation is passed in the "fcol" field */
   1446 		RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
   1447 		waitreq->fcol = retcode;
   1448 		RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1449 		waitreq->next = rf_sparet_resp_queue;
   1450 		rf_sparet_resp_queue = waitreq;
   1451 		wakeup(&rf_sparet_resp_queue);
   1452 		RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1453 
   1454 		return (retcode);
   1455 #endif
   1456 
   1457 	default:
   1458 		break; /* fall through to the os-specific code below */
   1459 
   1460 	}
   1461 
   1462 	if (!raidPtr->valid)
   1463 		return (EINVAL);
   1464 
   1465 	/*
   1466 	 * Add support for "regular" device ioctls here.
   1467 	 */
   1468 
   1469 	switch (cmd) {
   1470 	case DIOCGDINFO:
   1471 		*(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
   1472 		break;
   1473 #ifdef __HAVE_OLD_DISKLABEL
   1474 	case ODIOCGDINFO:
   1475 		newlabel = *(rs->sc_dkdev.dk_label);
   1476 		if (newlabel.d_npartitions > OLDMAXPARTITIONS)
   1477 			return ENOTTY;
   1478 		memcpy(data, &newlabel, sizeof (struct olddisklabel));
   1479 		break;
   1480 #endif
   1481 
   1482 	case DIOCGPART:
   1483 		((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
   1484 		((struct partinfo *) data)->part =
   1485 		    &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
   1486 		break;
   1487 
   1488 	case DIOCWDINFO:
   1489 	case DIOCSDINFO:
   1490 #ifdef __HAVE_OLD_DISKLABEL
   1491 	case ODIOCWDINFO:
   1492 	case ODIOCSDINFO:
   1493 #endif
   1494 	{
   1495 		struct disklabel *lp;
   1496 #ifdef __HAVE_OLD_DISKLABEL
   1497 		if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) {
   1498 			memset(&newlabel, 0, sizeof newlabel);
   1499 			memcpy(&newlabel, data, sizeof (struct olddisklabel));
   1500 			lp = &newlabel;
   1501 		} else
   1502 #endif
   1503 		lp = (struct disklabel *)data;
   1504 
   1505 		if ((error = raidlock(rs)) != 0)
   1506 			return (error);
   1507 
   1508 		rs->sc_flags |= RAIDF_LABELLING;
   1509 
   1510 		error = setdisklabel(rs->sc_dkdev.dk_label,
   1511 		    lp, 0, rs->sc_dkdev.dk_cpulabel);
   1512 		if (error == 0) {
   1513 			if (cmd == DIOCWDINFO
   1514 #ifdef __HAVE_OLD_DISKLABEL
   1515 			    || cmd == ODIOCWDINFO
   1516 #endif
   1517 			   )
   1518 				error = writedisklabel(RAIDLABELDEV(dev),
   1519 				    raidstrategy, rs->sc_dkdev.dk_label,
   1520 				    rs->sc_dkdev.dk_cpulabel);
   1521 		}
   1522 		rs->sc_flags &= ~RAIDF_LABELLING;
   1523 
   1524 		raidunlock(rs);
   1525 
   1526 		if (error)
   1527 			return (error);
   1528 		break;
   1529 	}
   1530 
   1531 	case DIOCWLABEL:
   1532 		if (*(int *) data != 0)
   1533 			rs->sc_flags |= RAIDF_WLABEL;
   1534 		else
   1535 			rs->sc_flags &= ~RAIDF_WLABEL;
   1536 		break;
   1537 
   1538 	case DIOCGDEFLABEL:
   1539 		raidgetdefaultlabel(raidPtr, rs, (struct disklabel *) data);
   1540 		break;
   1541 
   1542 #ifdef __HAVE_OLD_DISKLABEL
   1543 	case ODIOCGDEFLABEL:
   1544 		raidgetdefaultlabel(raidPtr, rs, &newlabel);
   1545 		if (newlabel.d_npartitions > OLDMAXPARTITIONS)
   1546 			return ENOTTY;
   1547 		memcpy(data, &newlabel, sizeof (struct olddisklabel));
   1548 		break;
   1549 #endif
   1550 
   1551 	default:
   1552 		retcode = ENOTTY;
   1553 	}
   1554 	return (retcode);
   1555 
   1556 }
   1557 
   1558 
   1559 /* raidinit -- complete the rest of the initialization for the
   1560    RAIDframe device.  */
   1561 
   1562 
   1563 static void
   1564 raidinit(raidPtr)
   1565 	RF_Raid_t *raidPtr;
   1566 {
   1567 	struct raid_softc *rs;
   1568 	int     unit;
   1569 
   1570 	unit = raidPtr->raidid;
   1571 
   1572 	rs = &raid_softc[unit];
   1573 
   1574 	/* XXX should check return code first... */
   1575 	rs->sc_flags |= RAIDF_INITED;
   1576 
   1577 	sprintf(rs->sc_xname, "raid%d", unit);	/* XXX doesn't check bounds. */
   1578 
   1579 	rs->sc_dkdev.dk_name = rs->sc_xname;
   1580 
   1581 	/* disk_attach actually creates space for the CPU disklabel, among
   1582 	 * other things, so it's critical to call this *BEFORE* we try putzing
   1583 	 * with disklabels. */
   1584 
   1585 	disk_attach(&rs->sc_dkdev);
   1586 
   1587 	/* XXX There may be a weird interaction here between this, and
   1588 	 * protectedSectors, as used in RAIDframe.  */
   1589 
   1590 	rs->sc_size = raidPtr->totalSectors;
   1591 
   1592 }
   1593 
   1594 /* wake up the daemon & tell it to get us a spare table
   1595  * XXX
   1596  * the entries in the queues should be tagged with the raidPtr
   1597  * so that in the extremely rare case that two recons happen at once,
   1598  * we know for which device were requesting a spare table
   1599  * XXX
   1600  *
   1601  * XXX This code is not currently used. GO
   1602  */
   1603 int
   1604 rf_GetSpareTableFromDaemon(req)
   1605 	RF_SparetWait_t *req;
   1606 {
   1607 	int     retcode;
   1608 
   1609 	RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1610 	req->next = rf_sparet_wait_queue;
   1611 	rf_sparet_wait_queue = req;
   1612 	wakeup(&rf_sparet_wait_queue);
   1613 
   1614 	/* mpsleep unlocks the mutex */
   1615 	while (!rf_sparet_resp_queue) {
   1616 		tsleep(&rf_sparet_resp_queue, PRIBIO,
   1617 		    "raidframe getsparetable", 0);
   1618 	}
   1619 	req = rf_sparet_resp_queue;
   1620 	rf_sparet_resp_queue = req->next;
   1621 	RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1622 
   1623 	retcode = req->fcol;
   1624 	RF_Free(req, sizeof(*req));	/* this is not the same req as we
   1625 					 * alloc'd */
   1626 	return (retcode);
   1627 }
   1628 
   1629 /* a wrapper around rf_DoAccess that extracts appropriate info from the
   1630  * bp & passes it down.
   1631  * any calls originating in the kernel must use non-blocking I/O
   1632  * do some extra sanity checking to return "appropriate" error values for
   1633  * certain conditions (to make some standard utilities work)
   1634  *
   1635  * Formerly known as: rf_DoAccessKernel
   1636  */
   1637 void
   1638 raidstart(raidPtr)
   1639 	RF_Raid_t *raidPtr;
   1640 {
   1641 	RF_SectorCount_t num_blocks, pb, sum;
   1642 	RF_RaidAddr_t raid_addr;
   1643 	int     retcode;
   1644 	struct partition *pp;
   1645 	daddr_t blocknum;
   1646 	int     unit;
   1647 	struct raid_softc *rs;
   1648 	int     do_async;
   1649 	struct buf *bp;
   1650 
   1651 	unit = raidPtr->raidid;
   1652 	rs = &raid_softc[unit];
   1653 
   1654 	/* quick check to see if anything has died recently */
   1655 	RF_LOCK_MUTEX(raidPtr->mutex);
   1656 	if (raidPtr->numNewFailures > 0) {
   1657 		rf_update_component_labels(raidPtr,
   1658 					   RF_NORMAL_COMPONENT_UPDATE);
   1659 		raidPtr->numNewFailures--;
   1660 	}
   1661 	RF_UNLOCK_MUTEX(raidPtr->mutex);
   1662 
   1663 	/* Check to see if we're at the limit... */
   1664 	RF_LOCK_MUTEX(raidPtr->mutex);
   1665 	while (raidPtr->openings > 0) {
   1666 		RF_UNLOCK_MUTEX(raidPtr->mutex);
   1667 
   1668 		/* get the next item, if any, from the queue */
   1669 		if ((bp = BUFQ_FIRST(&rs->buf_queue)) == NULL) {
   1670 			/* nothing more to do */
   1671 			return;
   1672 		}
   1673 		BUFQ_REMOVE(&rs->buf_queue, bp);
   1674 
   1675 		/* Ok, for the bp we have here, bp->b_blkno is relative to the
   1676 		 * partition.. Need to make it absolute to the underlying
   1677 		 * device.. */
   1678 
   1679 		blocknum = bp->b_blkno;
   1680 		if (DISKPART(bp->b_dev) != RAW_PART) {
   1681 			pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
   1682 			blocknum += pp->p_offset;
   1683 		}
   1684 
   1685 		db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
   1686 			    (int) blocknum));
   1687 
   1688 		db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
   1689 		db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
   1690 
   1691 		/* *THIS* is where we adjust what block we're going to...
   1692 		 * but DO NOT TOUCH bp->b_blkno!!! */
   1693 		raid_addr = blocknum;
   1694 
   1695 		num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
   1696 		pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
   1697 		sum = raid_addr + num_blocks + pb;
   1698 		if (1 || rf_debugKernelAccess) {
   1699 			db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
   1700 				    (int) raid_addr, (int) sum, (int) num_blocks,
   1701 				    (int) pb, (int) bp->b_resid));
   1702 		}
   1703 		if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
   1704 		    || (sum < num_blocks) || (sum < pb)) {
   1705 			bp->b_error = ENOSPC;
   1706 			bp->b_flags |= B_ERROR;
   1707 			bp->b_resid = bp->b_bcount;
   1708 			biodone(bp);
   1709 			RF_LOCK_MUTEX(raidPtr->mutex);
   1710 			continue;
   1711 		}
   1712 		/*
   1713 		 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
   1714 		 */
   1715 
   1716 		if (bp->b_bcount & raidPtr->sectorMask) {
   1717 			bp->b_error = EINVAL;
   1718 			bp->b_flags |= B_ERROR;
   1719 			bp->b_resid = bp->b_bcount;
   1720 			biodone(bp);
   1721 			RF_LOCK_MUTEX(raidPtr->mutex);
   1722 			continue;
   1723 
   1724 		}
   1725 		db1_printf(("Calling DoAccess..\n"));
   1726 
   1727 
   1728 		RF_LOCK_MUTEX(raidPtr->mutex);
   1729 		raidPtr->openings--;
   1730 		RF_UNLOCK_MUTEX(raidPtr->mutex);
   1731 
   1732 		/*
   1733 		 * Everything is async.
   1734 		 */
   1735 		do_async = 1;
   1736 
   1737 		disk_busy(&rs->sc_dkdev);
   1738 
   1739 		/* XXX we're still at splbio() here... do we *really*
   1740 		   need to be? */
   1741 
   1742 		/* don't ever condition on bp->b_flags & B_WRITE.
   1743 		 * always condition on B_READ instead */
   1744 
   1745 		retcode = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
   1746 				      RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
   1747 				      do_async, raid_addr, num_blocks,
   1748 				      bp->b_data, bp, RF_DAG_NONBLOCKING_IO);
   1749 
   1750 		RF_LOCK_MUTEX(raidPtr->mutex);
   1751 	}
   1752 	RF_UNLOCK_MUTEX(raidPtr->mutex);
   1753 }
   1754 
   1755 
   1756 
   1757 
   1758 /* invoke an I/O from kernel mode.  Disk queue should be locked upon entry */
   1759 
   1760 int
   1761 rf_DispatchKernelIO(queue, req)
   1762 	RF_DiskQueue_t *queue;
   1763 	RF_DiskQueueData_t *req;
   1764 {
   1765 	int     op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
   1766 	struct buf *bp;
   1767 	struct raidbuf *raidbp = NULL;
   1768 	struct raid_softc *rs;
   1769 	int     unit;
   1770 	int s;
   1771 
   1772 	s=0;
   1773 	/* s = splbio();*/ /* want to test this */
   1774 	/* XXX along with the vnode, we also need the softc associated with
   1775 	 * this device.. */
   1776 
   1777 	req->queue = queue;
   1778 
   1779 	unit = queue->raidPtr->raidid;
   1780 
   1781 	db1_printf(("DispatchKernelIO unit: %d\n", unit));
   1782 
   1783 	if (unit >= numraid) {
   1784 		printf("Invalid unit number: %d %d\n", unit, numraid);
   1785 		panic("Invalid Unit number in rf_DispatchKernelIO\n");
   1786 	}
   1787 	rs = &raid_softc[unit];
   1788 
   1789 	bp = req->bp;
   1790 #if 1
   1791 	/* XXX when there is a physical disk failure, someone is passing us a
   1792 	 * buffer that contains old stuff!!  Attempt to deal with this problem
   1793 	 * without taking a performance hit... (not sure where the real bug
   1794 	 * is.  It's buried in RAIDframe somewhere) :-(  GO ) */
   1795 
   1796 	if (bp->b_flags & B_ERROR) {
   1797 		bp->b_flags &= ~B_ERROR;
   1798 	}
   1799 	if (bp->b_error != 0) {
   1800 		bp->b_error = 0;
   1801 	}
   1802 #endif
   1803 	raidbp = RAIDGETBUF(rs);
   1804 
   1805 	raidbp->rf_flags = 0;	/* XXX not really used anywhere... */
   1806 
   1807 	/*
   1808 	 * context for raidiodone
   1809 	 */
   1810 	raidbp->rf_obp = bp;
   1811 	raidbp->req = req;
   1812 
   1813 	LIST_INIT(&raidbp->rf_buf.b_dep);
   1814 
   1815 	switch (req->type) {
   1816 	case RF_IO_TYPE_NOP:	/* used primarily to unlock a locked queue */
   1817 		/* XXX need to do something extra here.. */
   1818 		/* I'm leaving this in, as I've never actually seen it used,
   1819 		 * and I'd like folks to report it... GO */
   1820 		printf(("WAKEUP CALLED\n"));
   1821 		queue->numOutstanding++;
   1822 
   1823 		/* XXX need to glue the original buffer into this??  */
   1824 
   1825 		KernelWakeupFunc(&raidbp->rf_buf);
   1826 		break;
   1827 
   1828 	case RF_IO_TYPE_READ:
   1829 	case RF_IO_TYPE_WRITE:
   1830 
   1831 		if (req->tracerec) {
   1832 			RF_ETIMER_START(req->tracerec->timer);
   1833 		}
   1834 		InitBP(&raidbp->rf_buf, queue->rf_cinfo->ci_vp,
   1835 		    op | bp->b_flags, queue->rf_cinfo->ci_dev,
   1836 		    req->sectorOffset, req->numSector,
   1837 		    req->buf, KernelWakeupFunc, (void *) req,
   1838 		    queue->raidPtr->logBytesPerSector, req->b_proc);
   1839 
   1840 		if (rf_debugKernelAccess) {
   1841 			db1_printf(("dispatch: bp->b_blkno = %ld\n",
   1842 				(long) bp->b_blkno));
   1843 		}
   1844 		queue->numOutstanding++;
   1845 		queue->last_deq_sector = req->sectorOffset;
   1846 		/* acc wouldn't have been let in if there were any pending
   1847 		 * reqs at any other priority */
   1848 		queue->curPriority = req->priority;
   1849 
   1850 		db1_printf(("Going for %c to unit %d row %d col %d\n",
   1851 			req->type, unit, queue->row, queue->col));
   1852 		db1_printf(("sector %d count %d (%d bytes) %d\n",
   1853 			(int) req->sectorOffset, (int) req->numSector,
   1854 			(int) (req->numSector <<
   1855 			    queue->raidPtr->logBytesPerSector),
   1856 			(int) queue->raidPtr->logBytesPerSector));
   1857 		if ((raidbp->rf_buf.b_flags & B_READ) == 0) {
   1858 			raidbp->rf_buf.b_vp->v_numoutput++;
   1859 		}
   1860 		VOP_STRATEGY(&raidbp->rf_buf);
   1861 
   1862 		break;
   1863 
   1864 	default:
   1865 		panic("bad req->type in rf_DispatchKernelIO");
   1866 	}
   1867 	db1_printf(("Exiting from DispatchKernelIO\n"));
   1868 	/* splx(s); */ /* want to test this */
   1869 	return (0);
   1870 }
   1871 /* this is the callback function associated with a I/O invoked from
   1872    kernel code.
   1873  */
   1874 static void
   1875 KernelWakeupFunc(vbp)
   1876 	struct buf *vbp;
   1877 {
   1878 	RF_DiskQueueData_t *req = NULL;
   1879 	RF_DiskQueue_t *queue;
   1880 	struct raidbuf *raidbp = (struct raidbuf *) vbp;
   1881 	struct buf *bp;
   1882 	struct raid_softc *rs;
   1883 	int     unit;
   1884 	int s;
   1885 
   1886 	s = splbio();
   1887 	db1_printf(("recovering the request queue:\n"));
   1888 	req = raidbp->req;
   1889 
   1890 	bp = raidbp->rf_obp;
   1891 
   1892 	queue = (RF_DiskQueue_t *) req->queue;
   1893 
   1894 	if (raidbp->rf_buf.b_flags & B_ERROR) {
   1895 		bp->b_flags |= B_ERROR;
   1896 		bp->b_error = raidbp->rf_buf.b_error ?
   1897 		    raidbp->rf_buf.b_error : EIO;
   1898 	}
   1899 
   1900 	/* XXX methinks this could be wrong... */
   1901 #if 1
   1902 	bp->b_resid = raidbp->rf_buf.b_resid;
   1903 #endif
   1904 
   1905 	if (req->tracerec) {
   1906 		RF_ETIMER_STOP(req->tracerec->timer);
   1907 		RF_ETIMER_EVAL(req->tracerec->timer);
   1908 		RF_LOCK_MUTEX(rf_tracing_mutex);
   1909 		req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
   1910 		req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
   1911 		req->tracerec->num_phys_ios++;
   1912 		RF_UNLOCK_MUTEX(rf_tracing_mutex);
   1913 	}
   1914 	bp->b_bcount = raidbp->rf_buf.b_bcount;	/* XXXX ?? */
   1915 
   1916 	unit = queue->raidPtr->raidid;	/* *Much* simpler :-> */
   1917 
   1918 
   1919 	/* XXX Ok, let's get aggressive... If B_ERROR is set, let's go
   1920 	 * ballistic, and mark the component as hosed... */
   1921 
   1922 	if (bp->b_flags & B_ERROR) {
   1923 		/* Mark the disk as dead */
   1924 		/* but only mark it once... */
   1925 		if (queue->raidPtr->Disks[queue->row][queue->col].status ==
   1926 		    rf_ds_optimal) {
   1927 			printf("raid%d: IO Error.  Marking %s as failed.\n",
   1928 			    unit, queue->raidPtr->Disks[queue->row][queue->col].devname);
   1929 			queue->raidPtr->Disks[queue->row][queue->col].status =
   1930 			    rf_ds_failed;
   1931 			queue->raidPtr->status[queue->row] = rf_rs_degraded;
   1932 			queue->raidPtr->numFailures++;
   1933 			queue->raidPtr->numNewFailures++;
   1934 		} else {	/* Disk is already dead... */
   1935 			/* printf("Disk already marked as dead!\n"); */
   1936 		}
   1937 
   1938 	}
   1939 
   1940 	rs = &raid_softc[unit];
   1941 	RAIDPUTBUF(rs, raidbp);
   1942 
   1943 	rf_DiskIOComplete(queue, req, (bp->b_flags & B_ERROR) ? 1 : 0);
   1944 	(req->CompleteFunc) (req->argument, (bp->b_flags & B_ERROR) ? 1 : 0);
   1945 
   1946 	splx(s);
   1947 }
   1948 
   1949 
   1950 
   1951 /*
   1952  * initialize a buf structure for doing an I/O in the kernel.
   1953  */
   1954 static void
   1955 InitBP(bp, b_vp, rw_flag, dev, startSect, numSect, buf, cbFunc, cbArg,
   1956        logBytesPerSector, b_proc)
   1957 	struct buf *bp;
   1958 	struct vnode *b_vp;
   1959 	unsigned rw_flag;
   1960 	dev_t dev;
   1961 	RF_SectorNum_t startSect;
   1962 	RF_SectorCount_t numSect;
   1963 	caddr_t buf;
   1964 	void (*cbFunc) (struct buf *);
   1965 	void *cbArg;
   1966 	int logBytesPerSector;
   1967 	struct proc *b_proc;
   1968 {
   1969 	/* bp->b_flags       = B_PHYS | rw_flag; */
   1970 	bp->b_flags = B_CALL | rw_flag;	/* XXX need B_PHYS here too??? */
   1971 	bp->b_bcount = numSect << logBytesPerSector;
   1972 	bp->b_bufsize = bp->b_bcount;
   1973 	bp->b_error = 0;
   1974 	bp->b_dev = dev;
   1975 	bp->b_data = buf;
   1976 	bp->b_blkno = startSect;
   1977 	bp->b_resid = bp->b_bcount;	/* XXX is this right!??!?!! */
   1978 	if (bp->b_bcount == 0) {
   1979 		panic("bp->b_bcount is zero in InitBP!!\n");
   1980 	}
   1981 	bp->b_proc = b_proc;
   1982 	bp->b_iodone = cbFunc;
   1983 	bp->b_vp = b_vp;
   1984 
   1985 }
   1986 
   1987 static void
   1988 raidgetdefaultlabel(raidPtr, rs, lp)
   1989 	RF_Raid_t *raidPtr;
   1990 	struct raid_softc *rs;
   1991 	struct disklabel *lp;
   1992 {
   1993 	db1_printf(("Building a default label...\n"));
   1994 	memset(lp, 0, sizeof(*lp));
   1995 
   1996 	/* fabricate a label... */
   1997 	lp->d_secperunit = raidPtr->totalSectors;
   1998 	lp->d_secsize = raidPtr->bytesPerSector;
   1999 	lp->d_nsectors = raidPtr->Layout.dataSectorsPerStripe;
   2000 	lp->d_ntracks = 4 * raidPtr->numCol;
   2001 	lp->d_ncylinders = raidPtr->totalSectors /
   2002 		(lp->d_nsectors * lp->d_ntracks);
   2003 	lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
   2004 
   2005 	strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
   2006 	lp->d_type = DTYPE_RAID;
   2007 	strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
   2008 	lp->d_rpm = 3600;
   2009 	lp->d_interleave = 1;
   2010 	lp->d_flags = 0;
   2011 
   2012 	lp->d_partitions[RAW_PART].p_offset = 0;
   2013 	lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors;
   2014 	lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
   2015 	lp->d_npartitions = RAW_PART + 1;
   2016 
   2017 	lp->d_magic = DISKMAGIC;
   2018 	lp->d_magic2 = DISKMAGIC;
   2019 	lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
   2020 
   2021 }
   2022 /*
   2023  * Read the disklabel from the raid device.  If one is not present, fake one
   2024  * up.
   2025  */
   2026 static void
   2027 raidgetdisklabel(dev)
   2028 	dev_t   dev;
   2029 {
   2030 	int     unit = raidunit(dev);
   2031 	struct raid_softc *rs = &raid_softc[unit];
   2032 	char   *errstring;
   2033 	struct disklabel *lp = rs->sc_dkdev.dk_label;
   2034 	struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel;
   2035 	RF_Raid_t *raidPtr;
   2036 
   2037 	db1_printf(("Getting the disklabel...\n"));
   2038 
   2039 	memset(clp, 0, sizeof(*clp));
   2040 
   2041 	raidPtr = raidPtrs[unit];
   2042 
   2043 	raidgetdefaultlabel(raidPtr, rs, lp);
   2044 
   2045 	/*
   2046 	 * Call the generic disklabel extraction routine.
   2047 	 */
   2048 	errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
   2049 	    rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
   2050 	if (errstring)
   2051 		raidmakedisklabel(rs);
   2052 	else {
   2053 		int     i;
   2054 		struct partition *pp;
   2055 
   2056 		/*
   2057 		 * Sanity check whether the found disklabel is valid.
   2058 		 *
   2059 		 * This is necessary since total size of the raid device
   2060 		 * may vary when an interleave is changed even though exactly
   2061 		 * same componets are used, and old disklabel may used
   2062 		 * if that is found.
   2063 		 */
   2064 		if (lp->d_secperunit != rs->sc_size)
   2065 			printf("WARNING: %s: "
   2066 			    "total sector size in disklabel (%d) != "
   2067 			    "the size of raid (%ld)\n", rs->sc_xname,
   2068 			    lp->d_secperunit, (long) rs->sc_size);
   2069 		for (i = 0; i < lp->d_npartitions; i++) {
   2070 			pp = &lp->d_partitions[i];
   2071 			if (pp->p_offset + pp->p_size > rs->sc_size)
   2072 				printf("WARNING: %s: end of partition `%c' "
   2073 				    "exceeds the size of raid (%ld)\n",
   2074 				    rs->sc_xname, 'a' + i, (long) rs->sc_size);
   2075 		}
   2076 	}
   2077 
   2078 }
   2079 /*
   2080  * Take care of things one might want to take care of in the event
   2081  * that a disklabel isn't present.
   2082  */
   2083 static void
   2084 raidmakedisklabel(rs)
   2085 	struct raid_softc *rs;
   2086 {
   2087 	struct disklabel *lp = rs->sc_dkdev.dk_label;
   2088 	db1_printf(("Making a label..\n"));
   2089 
   2090 	/*
   2091 	 * For historical reasons, if there's no disklabel present
   2092 	 * the raw partition must be marked FS_BSDFFS.
   2093 	 */
   2094 
   2095 	lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
   2096 
   2097 	strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
   2098 
   2099 	lp->d_checksum = dkcksum(lp);
   2100 }
   2101 /*
   2102  * Lookup the provided name in the filesystem.  If the file exists,
   2103  * is a valid block device, and isn't being used by anyone else,
   2104  * set *vpp to the file's vnode.
   2105  * You'll find the original of this in ccd.c
   2106  */
   2107 int
   2108 raidlookup(path, p, vpp)
   2109 	char   *path;
   2110 	struct proc *p;
   2111 	struct vnode **vpp;	/* result */
   2112 {
   2113 	struct nameidata nd;
   2114 	struct vnode *vp;
   2115 	struct vattr va;
   2116 	int     error;
   2117 
   2118 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, path, p);
   2119 	if ((error = vn_open(&nd, FREAD | FWRITE, 0)) != 0) {
   2120 #ifdef DEBUG
   2121 		printf("RAIDframe: vn_open returned %d\n", error);
   2122 #endif
   2123 		return (error);
   2124 	}
   2125 	vp = nd.ni_vp;
   2126 	if (vp->v_usecount > 1) {
   2127 		VOP_UNLOCK(vp, 0);
   2128 		(void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
   2129 		return (EBUSY);
   2130 	}
   2131 	if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)) != 0) {
   2132 		VOP_UNLOCK(vp, 0);
   2133 		(void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
   2134 		return (error);
   2135 	}
   2136 	/* XXX: eventually we should handle VREG, too. */
   2137 	if (va.va_type != VBLK) {
   2138 		VOP_UNLOCK(vp, 0);
   2139 		(void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
   2140 		return (ENOTBLK);
   2141 	}
   2142 	VOP_UNLOCK(vp, 0);
   2143 	*vpp = vp;
   2144 	return (0);
   2145 }
   2146 /*
   2147  * Wait interruptibly for an exclusive lock.
   2148  *
   2149  * XXX
   2150  * Several drivers do this; it should be abstracted and made MP-safe.
   2151  * (Hmm... where have we seen this warning before :->  GO )
   2152  */
   2153 static int
   2154 raidlock(rs)
   2155 	struct raid_softc *rs;
   2156 {
   2157 	int     error;
   2158 
   2159 	while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
   2160 		rs->sc_flags |= RAIDF_WANTED;
   2161 		if ((error =
   2162 			tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
   2163 			return (error);
   2164 	}
   2165 	rs->sc_flags |= RAIDF_LOCKED;
   2166 	return (0);
   2167 }
   2168 /*
   2169  * Unlock and wake up any waiters.
   2170  */
   2171 static void
   2172 raidunlock(rs)
   2173 	struct raid_softc *rs;
   2174 {
   2175 
   2176 	rs->sc_flags &= ~RAIDF_LOCKED;
   2177 	if ((rs->sc_flags & RAIDF_WANTED) != 0) {
   2178 		rs->sc_flags &= ~RAIDF_WANTED;
   2179 		wakeup(rs);
   2180 	}
   2181 }
   2182 
   2183 
   2184 #define RF_COMPONENT_INFO_OFFSET  16384 /* bytes */
   2185 #define RF_COMPONENT_INFO_SIZE     1024 /* bytes */
   2186 
   2187 int
   2188 raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter)
   2189 {
   2190 	RF_ComponentLabel_t clabel;
   2191 	raidread_component_label(dev, b_vp, &clabel);
   2192 	clabel.mod_counter = mod_counter;
   2193 	clabel.clean = RF_RAID_CLEAN;
   2194 	raidwrite_component_label(dev, b_vp, &clabel);
   2195 	return(0);
   2196 }
   2197 
   2198 
   2199 int
   2200 raidmarkdirty(dev_t dev, struct vnode *b_vp, int mod_counter)
   2201 {
   2202 	RF_ComponentLabel_t clabel;
   2203 	raidread_component_label(dev, b_vp, &clabel);
   2204 	clabel.mod_counter = mod_counter;
   2205 	clabel.clean = RF_RAID_DIRTY;
   2206 	raidwrite_component_label(dev, b_vp, &clabel);
   2207 	return(0);
   2208 }
   2209 
   2210 /* ARGSUSED */
   2211 int
   2212 raidread_component_label(dev, b_vp, clabel)
   2213 	dev_t dev;
   2214 	struct vnode *b_vp;
   2215 	RF_ComponentLabel_t *clabel;
   2216 {
   2217 	struct buf *bp;
   2218 	int error;
   2219 
   2220 	/* XXX should probably ensure that we don't try to do this if
   2221 	   someone has changed rf_protected_sectors. */
   2222 
   2223 	if (b_vp == NULL) {
   2224 		/* For whatever reason, this component is not valid.
   2225 		   Don't try to read a component label from it. */
   2226 		return(EINVAL);
   2227 	}
   2228 
   2229 	/* get a block of the appropriate size... */
   2230 	bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
   2231 	bp->b_dev = dev;
   2232 
   2233 	/* get our ducks in a row for the read */
   2234 	bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
   2235 	bp->b_bcount = RF_COMPONENT_INFO_SIZE;
   2236 	bp->b_flags |= B_READ;
   2237  	bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
   2238 
   2239 	(*bdevsw[major(bp->b_dev)].d_strategy)(bp);
   2240 
   2241 	error = biowait(bp);
   2242 
   2243 	if (!error) {
   2244 		memcpy(clabel, bp->b_data,
   2245 		       sizeof(RF_ComponentLabel_t));
   2246 #if 0
   2247 		rf_print_component_label( clabel );
   2248 #endif
   2249         } else {
   2250 #if 0
   2251 		printf("Failed to read RAID component label!\n");
   2252 #endif
   2253 	}
   2254 
   2255 	brelse(bp);
   2256 	return(error);
   2257 }
   2258 /* ARGSUSED */
   2259 int
   2260 raidwrite_component_label(dev, b_vp, clabel)
   2261 	dev_t dev;
   2262 	struct vnode *b_vp;
   2263 	RF_ComponentLabel_t *clabel;
   2264 {
   2265 	struct buf *bp;
   2266 	int error;
   2267 
   2268 	/* get a block of the appropriate size... */
   2269 	bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
   2270 	bp->b_dev = dev;
   2271 
   2272 	/* get our ducks in a row for the write */
   2273 	bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
   2274 	bp->b_bcount = RF_COMPONENT_INFO_SIZE;
   2275 	bp->b_flags |= B_WRITE;
   2276  	bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
   2277 
   2278 	memset(bp->b_data, 0, RF_COMPONENT_INFO_SIZE );
   2279 
   2280 	memcpy(bp->b_data, clabel, sizeof(RF_ComponentLabel_t));
   2281 
   2282 	(*bdevsw[major(bp->b_dev)].d_strategy)(bp);
   2283 	error = biowait(bp);
   2284 	brelse(bp);
   2285 	if (error) {
   2286 #if 1
   2287 		printf("Failed to write RAID component info!\n");
   2288 #endif
   2289 	}
   2290 
   2291 	return(error);
   2292 }
   2293 
   2294 void
   2295 rf_markalldirty(raidPtr)
   2296 	RF_Raid_t *raidPtr;
   2297 {
   2298 	RF_ComponentLabel_t clabel;
   2299 	int r,c;
   2300 
   2301 	raidPtr->mod_counter++;
   2302 	for (r = 0; r < raidPtr->numRow; r++) {
   2303 		for (c = 0; c < raidPtr->numCol; c++) {
   2304 			/* we don't want to touch (at all) a disk that has
   2305 			   failed */
   2306 			if (!RF_DEAD_DISK(raidPtr->Disks[r][c].status)) {
   2307 				raidread_component_label(
   2308 					raidPtr->Disks[r][c].dev,
   2309 					raidPtr->raid_cinfo[r][c].ci_vp,
   2310 					&clabel);
   2311 				if (clabel.status == rf_ds_spared) {
   2312 					/* XXX do something special...
   2313 					 but whatever you do, don't
   2314 					 try to access it!! */
   2315 				} else {
   2316 #if 0
   2317 				clabel.status =
   2318 					raidPtr->Disks[r][c].status;
   2319 				raidwrite_component_label(
   2320 					raidPtr->Disks[r][c].dev,
   2321 					raidPtr->raid_cinfo[r][c].ci_vp,
   2322 					&clabel);
   2323 #endif
   2324 				raidmarkdirty(
   2325 				       raidPtr->Disks[r][c].dev,
   2326 				       raidPtr->raid_cinfo[r][c].ci_vp,
   2327 				       raidPtr->mod_counter);
   2328 				}
   2329 			}
   2330 		}
   2331 	}
   2332 	/* printf("Component labels marked dirty.\n"); */
   2333 #if 0
   2334 	for( c = 0; c < raidPtr->numSpare ; c++) {
   2335 		sparecol = raidPtr->numCol + c;
   2336 		if (raidPtr->Disks[r][sparecol].status == rf_ds_used_spare) {
   2337 			/*
   2338 
   2339 			   XXX this is where we get fancy and map this spare
   2340 			   into it's correct spot in the array.
   2341 
   2342 			 */
   2343 			/*
   2344 
   2345 			   we claim this disk is "optimal" if it's
   2346 			   rf_ds_used_spare, as that means it should be
   2347 			   directly substitutable for the disk it replaced.
   2348 			   We note that too...
   2349 
   2350 			 */
   2351 
   2352 			for(i=0;i<raidPtr->numRow;i++) {
   2353 				for(j=0;j<raidPtr->numCol;j++) {
   2354 					if ((raidPtr->Disks[i][j].spareRow ==
   2355 					     r) &&
   2356 					    (raidPtr->Disks[i][j].spareCol ==
   2357 					     sparecol)) {
   2358 						srow = r;
   2359 						scol = sparecol;
   2360 						break;
   2361 					}
   2362 				}
   2363 			}
   2364 
   2365 			raidread_component_label(
   2366 				      raidPtr->Disks[r][sparecol].dev,
   2367 				      raidPtr->raid_cinfo[r][sparecol].ci_vp,
   2368 				      &clabel);
   2369 			/* make sure status is noted */
   2370 			clabel.version = RF_COMPONENT_LABEL_VERSION;
   2371 			clabel.mod_counter = raidPtr->mod_counter;
   2372 			clabel.serial_number = raidPtr->serial_number;
   2373 			clabel.row = srow;
   2374 			clabel.column = scol;
   2375 			clabel.num_rows = raidPtr->numRow;
   2376 			clabel.num_columns = raidPtr->numCol;
   2377 			clabel.clean = RF_RAID_DIRTY; /* changed in a bit*/
   2378 			clabel.status = rf_ds_optimal;
   2379 			raidwrite_component_label(
   2380 				      raidPtr->Disks[r][sparecol].dev,
   2381 				      raidPtr->raid_cinfo[r][sparecol].ci_vp,
   2382 				      &clabel);
   2383 			raidmarkclean( raidPtr->Disks[r][sparecol].dev,
   2384 			              raidPtr->raid_cinfo[r][sparecol].ci_vp);
   2385 		}
   2386 	}
   2387 
   2388 #endif
   2389 }
   2390 
   2391 
   2392 void
   2393 rf_update_component_labels(raidPtr, final)
   2394 	RF_Raid_t *raidPtr;
   2395 	int final;
   2396 {
   2397 	RF_ComponentLabel_t clabel;
   2398 	int sparecol;
   2399 	int r,c;
   2400 	int i,j;
   2401 	int srow, scol;
   2402 
   2403 	srow = -1;
   2404 	scol = -1;
   2405 
   2406 	/* XXX should do extra checks to make sure things really are clean,
   2407 	   rather than blindly setting the clean bit... */
   2408 
   2409 	raidPtr->mod_counter++;
   2410 
   2411 	for (r = 0; r < raidPtr->numRow; r++) {
   2412 		for (c = 0; c < raidPtr->numCol; c++) {
   2413 			if (raidPtr->Disks[r][c].status == rf_ds_optimal) {
   2414 				raidread_component_label(
   2415 					raidPtr->Disks[r][c].dev,
   2416 					raidPtr->raid_cinfo[r][c].ci_vp,
   2417 					&clabel);
   2418 				/* make sure status is noted */
   2419 				clabel.status = rf_ds_optimal;
   2420 				/* bump the counter */
   2421 				clabel.mod_counter = raidPtr->mod_counter;
   2422 
   2423 				raidwrite_component_label(
   2424 					raidPtr->Disks[r][c].dev,
   2425 					raidPtr->raid_cinfo[r][c].ci_vp,
   2426 					&clabel);
   2427 				if (final == RF_FINAL_COMPONENT_UPDATE) {
   2428 					if (raidPtr->parity_good == RF_RAID_CLEAN) {
   2429 						raidmarkclean(
   2430 							      raidPtr->Disks[r][c].dev,
   2431 							      raidPtr->raid_cinfo[r][c].ci_vp,
   2432 							      raidPtr->mod_counter);
   2433 					}
   2434 				}
   2435 			}
   2436 			/* else we don't touch it.. */
   2437 		}
   2438 	}
   2439 
   2440 	for( c = 0; c < raidPtr->numSpare ; c++) {
   2441 		sparecol = raidPtr->numCol + c;
   2442 		/* Need to ensure that the reconstruct actually completed! */
   2443 		if (raidPtr->Disks[0][sparecol].status == rf_ds_used_spare) {
   2444 			/*
   2445 
   2446 			   we claim this disk is "optimal" if it's
   2447 			   rf_ds_used_spare, as that means it should be
   2448 			   directly substitutable for the disk it replaced.
   2449 			   We note that too...
   2450 
   2451 			 */
   2452 
   2453 			for(i=0;i<raidPtr->numRow;i++) {
   2454 				for(j=0;j<raidPtr->numCol;j++) {
   2455 					if ((raidPtr->Disks[i][j].spareRow ==
   2456 					     0) &&
   2457 					    (raidPtr->Disks[i][j].spareCol ==
   2458 					     sparecol)) {
   2459 						srow = i;
   2460 						scol = j;
   2461 						break;
   2462 					}
   2463 				}
   2464 			}
   2465 
   2466 			/* XXX shouldn't *really* need this... */
   2467 			raidread_component_label(
   2468 				      raidPtr->Disks[0][sparecol].dev,
   2469 				      raidPtr->raid_cinfo[0][sparecol].ci_vp,
   2470 				      &clabel);
   2471 			/* make sure status is noted */
   2472 
   2473 			raid_init_component_label(raidPtr, &clabel);
   2474 
   2475 			clabel.mod_counter = raidPtr->mod_counter;
   2476 			clabel.row = srow;
   2477 			clabel.column = scol;
   2478 			clabel.status = rf_ds_optimal;
   2479 
   2480 			raidwrite_component_label(
   2481 				      raidPtr->Disks[0][sparecol].dev,
   2482 				      raidPtr->raid_cinfo[0][sparecol].ci_vp,
   2483 				      &clabel);
   2484 			if (final == RF_FINAL_COMPONENT_UPDATE) {
   2485 				if (raidPtr->parity_good == RF_RAID_CLEAN) {
   2486 					raidmarkclean( raidPtr->Disks[0][sparecol].dev,
   2487 						       raidPtr->raid_cinfo[0][sparecol].ci_vp,
   2488 						       raidPtr->mod_counter);
   2489 				}
   2490 			}
   2491 		}
   2492 	}
   2493 	/* 	printf("Component labels updated\n"); */
   2494 }
   2495 
   2496 void
   2497 rf_close_component(raidPtr, vp, auto_configured)
   2498 	RF_Raid_t *raidPtr;
   2499 	struct vnode *vp;
   2500 	int auto_configured;
   2501 {
   2502 	struct proc *p;
   2503 
   2504 	p = raidPtr->engine_thread;
   2505 
   2506 	if (vp != NULL) {
   2507 		if (auto_configured == 1) {
   2508 			vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
   2509 			VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
   2510 			vput(vp);
   2511 
   2512 		} else {
   2513 			(void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
   2514 		}
   2515 	} else {
   2516 		printf("vnode was NULL\n");
   2517 	}
   2518 }
   2519 
   2520 
   2521 void
   2522 rf_UnconfigureVnodes(raidPtr)
   2523 	RF_Raid_t *raidPtr;
   2524 {
   2525 	int r,c;
   2526 	struct proc *p;
   2527 	struct vnode *vp;
   2528 	int acd;
   2529 
   2530 
   2531 	/* We take this opportunity to close the vnodes like we should.. */
   2532 
   2533 	p = raidPtr->engine_thread;
   2534 
   2535 	for (r = 0; r < raidPtr->numRow; r++) {
   2536 		for (c = 0; c < raidPtr->numCol; c++) {
   2537 			printf("Closing vnode for row: %d col: %d\n", r, c);
   2538 			vp = raidPtr->raid_cinfo[r][c].ci_vp;
   2539 			acd = raidPtr->Disks[r][c].auto_configured;
   2540 			rf_close_component(raidPtr, vp, acd);
   2541 			raidPtr->raid_cinfo[r][c].ci_vp = NULL;
   2542 			raidPtr->Disks[r][c].auto_configured = 0;
   2543 		}
   2544 	}
   2545 	for (r = 0; r < raidPtr->numSpare; r++) {
   2546 		printf("Closing vnode for spare: %d\n", r);
   2547 		vp = raidPtr->raid_cinfo[0][raidPtr->numCol + r].ci_vp;
   2548 		acd = raidPtr->Disks[0][raidPtr->numCol + r].auto_configured;
   2549 		rf_close_component(raidPtr, vp, acd);
   2550 		raidPtr->raid_cinfo[0][raidPtr->numCol + r].ci_vp = NULL;
   2551 		raidPtr->Disks[0][raidPtr->numCol + r].auto_configured = 0;
   2552 	}
   2553 }
   2554 
   2555 
   2556 void
   2557 rf_ReconThread(req)
   2558 	struct rf_recon_req *req;
   2559 {
   2560 	int     s;
   2561 	RF_Raid_t *raidPtr;
   2562 
   2563 	s = splbio();
   2564 	raidPtr = (RF_Raid_t *) req->raidPtr;
   2565 	raidPtr->recon_in_progress = 1;
   2566 
   2567 	rf_FailDisk((RF_Raid_t *) req->raidPtr, req->row, req->col,
   2568 		    ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
   2569 
   2570 	/* XXX get rid of this! we don't need it at all.. */
   2571 	RF_Free(req, sizeof(*req));
   2572 
   2573 	raidPtr->recon_in_progress = 0;
   2574 	splx(s);
   2575 
   2576 	/* That's all... */
   2577 	kthread_exit(0);        /* does not return */
   2578 }
   2579 
   2580 void
   2581 rf_RewriteParityThread(raidPtr)
   2582 	RF_Raid_t *raidPtr;
   2583 {
   2584 	int retcode;
   2585 	int s;
   2586 
   2587 	raidPtr->parity_rewrite_in_progress = 1;
   2588 	s = splbio();
   2589 	retcode = rf_RewriteParity(raidPtr);
   2590 	splx(s);
   2591 	if (retcode) {
   2592 		printf("raid%d: Error re-writing parity!\n",raidPtr->raidid);
   2593 	} else {
   2594 		/* set the clean bit!  If we shutdown correctly,
   2595 		   the clean bit on each component label will get
   2596 		   set */
   2597 		raidPtr->parity_good = RF_RAID_CLEAN;
   2598 	}
   2599 	raidPtr->parity_rewrite_in_progress = 0;
   2600 
   2601 	/* Anyone waiting for us to stop?  If so, inform them... */
   2602 	if (raidPtr->waitShutdown) {
   2603 		wakeup(&raidPtr->parity_rewrite_in_progress);
   2604 	}
   2605 
   2606 	/* That's all... */
   2607 	kthread_exit(0);        /* does not return */
   2608 }
   2609 
   2610 
   2611 void
   2612 rf_CopybackThread(raidPtr)
   2613 	RF_Raid_t *raidPtr;
   2614 {
   2615 	int s;
   2616 
   2617 	raidPtr->copyback_in_progress = 1;
   2618 	s = splbio();
   2619 	rf_CopybackReconstructedData(raidPtr);
   2620 	splx(s);
   2621 	raidPtr->copyback_in_progress = 0;
   2622 
   2623 	/* That's all... */
   2624 	kthread_exit(0);        /* does not return */
   2625 }
   2626 
   2627 
   2628 void
   2629 rf_ReconstructInPlaceThread(req)
   2630 	struct rf_recon_req *req;
   2631 {
   2632 	int retcode;
   2633 	int s;
   2634 	RF_Raid_t *raidPtr;
   2635 
   2636 	s = splbio();
   2637 	raidPtr = req->raidPtr;
   2638 	raidPtr->recon_in_progress = 1;
   2639 	retcode = rf_ReconstructInPlace(raidPtr, req->row, req->col);
   2640 	RF_Free(req, sizeof(*req));
   2641 	raidPtr->recon_in_progress = 0;
   2642 	splx(s);
   2643 
   2644 	/* That's all... */
   2645 	kthread_exit(0);        /* does not return */
   2646 }
   2647 
   2648 void
   2649 rf_mountroot_hook(dev)
   2650 	struct device *dev;
   2651 {
   2652 
   2653 }
   2654 
   2655 
   2656 RF_AutoConfig_t *
   2657 rf_find_raid_components()
   2658 {
   2659 	struct devnametobdevmaj *dtobdm;
   2660 	struct vnode *vp;
   2661 	struct disklabel label;
   2662 	struct device *dv;
   2663 	char *cd_name;
   2664 	dev_t dev;
   2665 	int error;
   2666 	int i;
   2667 	int good_one;
   2668 	RF_ComponentLabel_t *clabel;
   2669 	RF_AutoConfig_t *ac_list;
   2670 	RF_AutoConfig_t *ac;
   2671 
   2672 
   2673 	/* initialize the AutoConfig list */
   2674 	ac_list = NULL;
   2675 
   2676 	/* we begin by trolling through *all* the devices on the system */
   2677 
   2678 	for (dv = alldevs.tqh_first; dv != NULL;
   2679 	     dv = dv->dv_list.tqe_next) {
   2680 
   2681 		/* we are only interested in disks... */
   2682 		if (dv->dv_class != DV_DISK)
   2683 			continue;
   2684 
   2685 		/* we don't care about floppies... */
   2686 		if (!strcmp(dv->dv_cfdata->cf_driver->cd_name,"fd")) {
   2687 			continue;
   2688 		}
   2689 		/* hdfd is the Atari/Hades floppy driver */
   2690 		if (!strcmp(dv->dv_cfdata->cf_driver->cd_name,"hdfd")) {
   2691 			continue;
   2692 		}
   2693 
   2694 		/* need to find the device_name_to_block_device_major stuff */
   2695 		cd_name = dv->dv_cfdata->cf_driver->cd_name;
   2696 		dtobdm = dev_name2blk;
   2697 		while (dtobdm->d_name && strcmp(dtobdm->d_name, cd_name)) {
   2698 			dtobdm++;
   2699 		}
   2700 
   2701 		/* get a vnode for the raw partition of this disk */
   2702 
   2703 		dev = MAKEDISKDEV(dtobdm->d_maj, dv->dv_unit, RAW_PART);
   2704 		if (bdevvp(dev, &vp))
   2705 			panic("RAID can't alloc vnode");
   2706 
   2707 		error = VOP_OPEN(vp, FREAD, NOCRED, 0);
   2708 
   2709 		if (error) {
   2710 			/* "Who cares."  Continue looking
   2711 			   for something that exists*/
   2712 			vput(vp);
   2713 			continue;
   2714 		}
   2715 
   2716 		/* Ok, the disk exists.  Go get the disklabel. */
   2717 		error = VOP_IOCTL(vp, DIOCGDINFO, (caddr_t)&label,
   2718 				  FREAD, NOCRED, 0);
   2719 		if (error) {
   2720 			/*
   2721 			 * XXX can't happen - open() would
   2722 			 * have errored out (or faked up one)
   2723 			 */
   2724 			printf("can't get label for dev %s%c (%d)!?!?\n",
   2725 			       dv->dv_xname, 'a' + RAW_PART, error);
   2726 		}
   2727 
   2728 		/* don't need this any more.  We'll allocate it again
   2729 		   a little later if we really do... */
   2730 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
   2731 		VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
   2732 		vput(vp);
   2733 
   2734 		for (i=0; i < label.d_npartitions; i++) {
   2735 			/* We only support partitions marked as RAID */
   2736 			if (label.d_partitions[i].p_fstype != FS_RAID)
   2737 				continue;
   2738 
   2739 			dev = MAKEDISKDEV(dtobdm->d_maj, dv->dv_unit, i);
   2740 			if (bdevvp(dev, &vp))
   2741 				panic("RAID can't alloc vnode");
   2742 
   2743 			error = VOP_OPEN(vp, FREAD, NOCRED, 0);
   2744 			if (error) {
   2745 				/* Whatever... */
   2746 				vput(vp);
   2747 				continue;
   2748 			}
   2749 
   2750 			good_one = 0;
   2751 
   2752 			clabel = (RF_ComponentLabel_t *)
   2753 				malloc(sizeof(RF_ComponentLabel_t),
   2754 				       M_RAIDFRAME, M_NOWAIT);
   2755 			if (clabel == NULL) {
   2756 				/* XXX CLEANUP HERE */
   2757 				printf("RAID auto config: out of memory!\n");
   2758 				return(NULL); /* XXX probably should panic? */
   2759 			}
   2760 
   2761 			if (!raidread_component_label(dev, vp, clabel)) {
   2762 				/* Got the label.  Does it look reasonable? */
   2763 				if (rf_reasonable_label(clabel) &&
   2764 				    (clabel->partitionSize <=
   2765 				     label.d_partitions[i].p_size)) {
   2766 #if DEBUG
   2767 					printf("Component on: %s%c: %d\n",
   2768 					       dv->dv_xname, 'a'+i,
   2769 					       label.d_partitions[i].p_size);
   2770 					rf_print_component_label(clabel);
   2771 #endif
   2772 					/* if it's reasonable, add it,
   2773 					   else ignore it. */
   2774 					ac = (RF_AutoConfig_t *)
   2775 						malloc(sizeof(RF_AutoConfig_t),
   2776 						       M_RAIDFRAME,
   2777 						       M_NOWAIT);
   2778 					if (ac == NULL) {
   2779 						/* XXX should panic?? */
   2780 						return(NULL);
   2781 					}
   2782 
   2783 					sprintf(ac->devname, "%s%c",
   2784 						dv->dv_xname, 'a'+i);
   2785 					ac->dev = dev;
   2786 					ac->vp = vp;
   2787 					ac->clabel = clabel;
   2788 					ac->next = ac_list;
   2789 					ac_list = ac;
   2790 					good_one = 1;
   2791 				}
   2792 			}
   2793 			if (!good_one) {
   2794 				/* cleanup */
   2795 				free(clabel, M_RAIDFRAME);
   2796 				vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
   2797 				VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
   2798 				vput(vp);
   2799 			}
   2800 		}
   2801 	}
   2802 	return(ac_list);
   2803 }
   2804 
   2805 static int
   2806 rf_reasonable_label(clabel)
   2807 	RF_ComponentLabel_t *clabel;
   2808 {
   2809 
   2810 	if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) ||
   2811 	     (clabel->version==RF_COMPONENT_LABEL_VERSION)) &&
   2812 	    ((clabel->clean == RF_RAID_CLEAN) ||
   2813 	     (clabel->clean == RF_RAID_DIRTY)) &&
   2814 	    clabel->row >=0 &&
   2815 	    clabel->column >= 0 &&
   2816 	    clabel->num_rows > 0 &&
   2817 	    clabel->num_columns > 0 &&
   2818 	    clabel->row < clabel->num_rows &&
   2819 	    clabel->column < clabel->num_columns &&
   2820 	    clabel->blockSize > 0 &&
   2821 	    clabel->numBlocks > 0) {
   2822 		/* label looks reasonable enough... */
   2823 		return(1);
   2824 	}
   2825 	return(0);
   2826 }
   2827 
   2828 
   2829 void
   2830 rf_print_component_label(clabel)
   2831 	RF_ComponentLabel_t *clabel;
   2832 {
   2833 	printf("   Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
   2834 	       clabel->row, clabel->column,
   2835 	       clabel->num_rows, clabel->num_columns);
   2836 	printf("   Version: %d Serial Number: %d Mod Counter: %d\n",
   2837 	       clabel->version, clabel->serial_number,
   2838 	       clabel->mod_counter);
   2839 	printf("   Clean: %s Status: %d\n",
   2840 	       clabel->clean ? "Yes" : "No", clabel->status );
   2841 	printf("   sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
   2842 	       clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
   2843 	printf("   RAID Level: %c  blocksize: %d numBlocks: %d\n",
   2844 	       (char) clabel->parityConfig, clabel->blockSize,
   2845 	       clabel->numBlocks);
   2846 	printf("   Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No" );
   2847 	printf("   Contains root partition: %s\n",
   2848 	       clabel->root_partition ? "Yes" : "No" );
   2849 	printf("   Last configured as: raid%d\n", clabel->last_unit );
   2850 #if 0
   2851 	   printf("   Config order: %d\n", clabel->config_order);
   2852 #endif
   2853 
   2854 }
   2855 
   2856 RF_ConfigSet_t *
   2857 rf_create_auto_sets(ac_list)
   2858 	RF_AutoConfig_t *ac_list;
   2859 {
   2860 	RF_AutoConfig_t *ac;
   2861 	RF_ConfigSet_t *config_sets;
   2862 	RF_ConfigSet_t *cset;
   2863 	RF_AutoConfig_t *ac_next;
   2864 
   2865 
   2866 	config_sets = NULL;
   2867 
   2868 	/* Go through the AutoConfig list, and figure out which components
   2869 	   belong to what sets.  */
   2870 	ac = ac_list;
   2871 	while(ac!=NULL) {
   2872 		/* we're going to putz with ac->next, so save it here
   2873 		   for use at the end of the loop */
   2874 		ac_next = ac->next;
   2875 
   2876 		if (config_sets == NULL) {
   2877 			/* will need at least this one... */
   2878 			config_sets = (RF_ConfigSet_t *)
   2879 				malloc(sizeof(RF_ConfigSet_t),
   2880 				       M_RAIDFRAME, M_NOWAIT);
   2881 			if (config_sets == NULL) {
   2882 				panic("rf_create_auto_sets: No memory!\n");
   2883 			}
   2884 			/* this one is easy :) */
   2885 			config_sets->ac = ac;
   2886 			config_sets->next = NULL;
   2887 			config_sets->rootable = 0;
   2888 			ac->next = NULL;
   2889 		} else {
   2890 			/* which set does this component fit into? */
   2891 			cset = config_sets;
   2892 			while(cset!=NULL) {
   2893 				if (rf_does_it_fit(cset, ac)) {
   2894 					/* looks like it matches... */
   2895 					ac->next = cset->ac;
   2896 					cset->ac = ac;
   2897 					break;
   2898 				}
   2899 				cset = cset->next;
   2900 			}
   2901 			if (cset==NULL) {
   2902 				/* didn't find a match above... new set..*/
   2903 				cset = (RF_ConfigSet_t *)
   2904 					malloc(sizeof(RF_ConfigSet_t),
   2905 					       M_RAIDFRAME, M_NOWAIT);
   2906 				if (cset == NULL) {
   2907 					panic("rf_create_auto_sets: No memory!\n");
   2908 				}
   2909 				cset->ac = ac;
   2910 				ac->next = NULL;
   2911 				cset->next = config_sets;
   2912 				cset->rootable = 0;
   2913 				config_sets = cset;
   2914 			}
   2915 		}
   2916 		ac = ac_next;
   2917 	}
   2918 
   2919 
   2920 	return(config_sets);
   2921 }
   2922 
   2923 static int
   2924 rf_does_it_fit(cset, ac)
   2925 	RF_ConfigSet_t *cset;
   2926 	RF_AutoConfig_t *ac;
   2927 {
   2928 	RF_ComponentLabel_t *clabel1, *clabel2;
   2929 
   2930 	/* If this one matches the *first* one in the set, that's good
   2931 	   enough, since the other members of the set would have been
   2932 	   through here too... */
   2933 	/* note that we are not checking partitionSize here..
   2934 
   2935 	   Note that we are also not checking the mod_counters here.
   2936 	   If everything else matches execpt the mod_counter, that's
   2937 	   good enough for this test.  We will deal with the mod_counters
   2938 	   a little later in the autoconfiguration process.
   2939 
   2940 	    (clabel1->mod_counter == clabel2->mod_counter) &&
   2941 
   2942 	   The reason we don't check for this is that failed disks
   2943 	   will have lower modification counts.  If those disks are
   2944 	   not added to the set they used to belong to, then they will
   2945 	   form their own set, which may result in 2 different sets,
   2946 	   for example, competing to be configured at raid0, and
   2947 	   perhaps competing to be the root filesystem set.  If the
   2948 	   wrong ones get configured, or both attempt to become /,
   2949 	   weird behaviour and or serious lossage will occur.  Thus we
   2950 	   need to bring them into the fold here, and kick them out at
   2951 	   a later point.
   2952 
   2953 	*/
   2954 
   2955 	clabel1 = cset->ac->clabel;
   2956 	clabel2 = ac->clabel;
   2957 	if ((clabel1->version == clabel2->version) &&
   2958 	    (clabel1->serial_number == clabel2->serial_number) &&
   2959 	    (clabel1->num_rows == clabel2->num_rows) &&
   2960 	    (clabel1->num_columns == clabel2->num_columns) &&
   2961 	    (clabel1->sectPerSU == clabel2->sectPerSU) &&
   2962 	    (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
   2963 	    (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
   2964 	    (clabel1->parityConfig == clabel2->parityConfig) &&
   2965 	    (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
   2966 	    (clabel1->blockSize == clabel2->blockSize) &&
   2967 	    (clabel1->numBlocks == clabel2->numBlocks) &&
   2968 	    (clabel1->autoconfigure == clabel2->autoconfigure) &&
   2969 	    (clabel1->root_partition == clabel2->root_partition) &&
   2970 	    (clabel1->last_unit == clabel2->last_unit) &&
   2971 	    (clabel1->config_order == clabel2->config_order)) {
   2972 		/* if it get's here, it almost *has* to be a match */
   2973 	} else {
   2974 		/* it's not consistent with somebody in the set..
   2975 		   punt */
   2976 		return(0);
   2977 	}
   2978 	/* all was fine.. it must fit... */
   2979 	return(1);
   2980 }
   2981 
   2982 int
   2983 rf_have_enough_components(cset)
   2984 	RF_ConfigSet_t *cset;
   2985 {
   2986 	RF_AutoConfig_t *ac;
   2987 	RF_AutoConfig_t *auto_config;
   2988 	RF_ComponentLabel_t *clabel;
   2989 	int r,c;
   2990 	int num_rows;
   2991 	int num_cols;
   2992 	int num_missing;
   2993 	int mod_counter;
   2994 	int mod_counter_found;
   2995 	int even_pair_failed;
   2996 	char parity_type;
   2997 
   2998 
   2999 	/* check to see that we have enough 'live' components
   3000 	   of this set.  If so, we can configure it if necessary */
   3001 
   3002 	num_rows = cset->ac->clabel->num_rows;
   3003 	num_cols = cset->ac->clabel->num_columns;
   3004 	parity_type = cset->ac->clabel->parityConfig;
   3005 
   3006 	/* XXX Check for duplicate components!?!?!? */
   3007 
   3008 	/* Determine what the mod_counter is supposed to be for this set. */
   3009 
   3010 	mod_counter_found = 0;
   3011 	mod_counter = 0;
   3012 	ac = cset->ac;
   3013 	while(ac!=NULL) {
   3014 		if (mod_counter_found==0) {
   3015 			mod_counter = ac->clabel->mod_counter;
   3016 			mod_counter_found = 1;
   3017 		} else {
   3018 			if (ac->clabel->mod_counter > mod_counter) {
   3019 				mod_counter = ac->clabel->mod_counter;
   3020 			}
   3021 		}
   3022 		ac = ac->next;
   3023 	}
   3024 
   3025 	num_missing = 0;
   3026 	auto_config = cset->ac;
   3027 
   3028 	for(r=0; r<num_rows; r++) {
   3029 		even_pair_failed = 0;
   3030 		for(c=0; c<num_cols; c++) {
   3031 			ac = auto_config;
   3032 			while(ac!=NULL) {
   3033 				if ((ac->clabel->row == r) &&
   3034 				    (ac->clabel->column == c) &&
   3035 				    (ac->clabel->mod_counter == mod_counter)) {
   3036 					/* it's this one... */
   3037 #if DEBUG
   3038 					printf("Found: %s at %d,%d\n",
   3039 					       ac->devname,r,c);
   3040 #endif
   3041 					break;
   3042 				}
   3043 				ac=ac->next;
   3044 			}
   3045 			if (ac==NULL) {
   3046 				/* Didn't find one here! */
   3047 				/* special case for RAID 1, especially
   3048 				   where there are more than 2
   3049 				   components (where RAIDframe treats
   3050 				   things a little differently :( ) */
   3051 				if (parity_type == '1') {
   3052 					if (c%2 == 0) { /* even component */
   3053 						even_pair_failed = 1;
   3054 					} else { /* odd component.  If
   3055                                                     we're failed, and
   3056                                                     so is the even
   3057                                                     component, it's
   3058                                                     "Good Night, Charlie" */
   3059 						if (even_pair_failed == 1) {
   3060 							return(0);
   3061 						}
   3062 					}
   3063 				} else {
   3064 					/* normal accounting */
   3065 					num_missing++;
   3066 				}
   3067 			}
   3068 			if ((parity_type == '1') && (c%2 == 1)) {
   3069 				/* Just did an even component, and we didn't
   3070 				   bail.. reset the even_pair_failed flag,
   3071 				   and go on to the next component.... */
   3072 				even_pair_failed = 0;
   3073 			}
   3074 		}
   3075 	}
   3076 
   3077 	clabel = cset->ac->clabel;
   3078 
   3079 	if (((clabel->parityConfig == '0') && (num_missing > 0)) ||
   3080 	    ((clabel->parityConfig == '4') && (num_missing > 1)) ||
   3081 	    ((clabel->parityConfig == '5') && (num_missing > 1))) {
   3082 		/* XXX this needs to be made *much* more general */
   3083 		/* Too many failures */
   3084 		return(0);
   3085 	}
   3086 	/* otherwise, all is well, and we've got enough to take a kick
   3087 	   at autoconfiguring this set */
   3088 	return(1);
   3089 }
   3090 
   3091 void
   3092 rf_create_configuration(ac,config,raidPtr)
   3093 	RF_AutoConfig_t *ac;
   3094 	RF_Config_t *config;
   3095 	RF_Raid_t *raidPtr;
   3096 {
   3097 	RF_ComponentLabel_t *clabel;
   3098 	int i;
   3099 
   3100 	clabel = ac->clabel;
   3101 
   3102 	/* 1. Fill in the common stuff */
   3103 	config->numRow = clabel->num_rows;
   3104 	config->numCol = clabel->num_columns;
   3105 	config->numSpare = 0; /* XXX should this be set here? */
   3106 	config->sectPerSU = clabel->sectPerSU;
   3107 	config->SUsPerPU = clabel->SUsPerPU;
   3108 	config->SUsPerRU = clabel->SUsPerRU;
   3109 	config->parityConfig = clabel->parityConfig;
   3110 	/* XXX... */
   3111 	strcpy(config->diskQueueType,"fifo");
   3112 	config->maxOutstandingDiskReqs = clabel->maxOutstanding;
   3113 	config->layoutSpecificSize = 0; /* XXX ?? */
   3114 
   3115 	while(ac!=NULL) {
   3116 		/* row/col values will be in range due to the checks
   3117 		   in reasonable_label() */
   3118 		strcpy(config->devnames[ac->clabel->row][ac->clabel->column],
   3119 		       ac->devname);
   3120 		ac = ac->next;
   3121 	}
   3122 
   3123 	for(i=0;i<RF_MAXDBGV;i++) {
   3124 		config->debugVars[i][0] = NULL;
   3125 	}
   3126 }
   3127 
   3128 int
   3129 rf_set_autoconfig(raidPtr, new_value)
   3130 	RF_Raid_t *raidPtr;
   3131 	int new_value;
   3132 {
   3133 	RF_ComponentLabel_t clabel;
   3134 	struct vnode *vp;
   3135 	dev_t dev;
   3136 	int row, column;
   3137 
   3138 	raidPtr->autoconfigure = new_value;
   3139 	for(row=0; row<raidPtr->numRow; row++) {
   3140 		for(column=0; column<raidPtr->numCol; column++) {
   3141 			if (raidPtr->Disks[row][column].status ==
   3142 			    rf_ds_optimal) {
   3143 				dev = raidPtr->Disks[row][column].dev;
   3144 				vp = raidPtr->raid_cinfo[row][column].ci_vp;
   3145 				raidread_component_label(dev, vp, &clabel);
   3146 				clabel.autoconfigure = new_value;
   3147 				raidwrite_component_label(dev, vp, &clabel);
   3148 			}
   3149 		}
   3150 	}
   3151 	return(new_value);
   3152 }
   3153 
   3154 int
   3155 rf_set_rootpartition(raidPtr, new_value)
   3156 	RF_Raid_t *raidPtr;
   3157 	int new_value;
   3158 {
   3159 	RF_ComponentLabel_t clabel;
   3160 	struct vnode *vp;
   3161 	dev_t dev;
   3162 	int row, column;
   3163 
   3164 	raidPtr->root_partition = new_value;
   3165 	for(row=0; row<raidPtr->numRow; row++) {
   3166 		for(column=0; column<raidPtr->numCol; column++) {
   3167 			if (raidPtr->Disks[row][column].status ==
   3168 			    rf_ds_optimal) {
   3169 				dev = raidPtr->Disks[row][column].dev;
   3170 				vp = raidPtr->raid_cinfo[row][column].ci_vp;
   3171 				raidread_component_label(dev, vp, &clabel);
   3172 				clabel.root_partition = new_value;
   3173 				raidwrite_component_label(dev, vp, &clabel);
   3174 			}
   3175 		}
   3176 	}
   3177 	return(new_value);
   3178 }
   3179 
   3180 void
   3181 rf_release_all_vps(cset)
   3182 	RF_ConfigSet_t *cset;
   3183 {
   3184 	RF_AutoConfig_t *ac;
   3185 
   3186 	ac = cset->ac;
   3187 	while(ac!=NULL) {
   3188 		/* Close the vp, and give it back */
   3189 		if (ac->vp) {
   3190 			vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY);
   3191 			VOP_CLOSE(ac->vp, FREAD, NOCRED, 0);
   3192 			vput(ac->vp);
   3193 			ac->vp = NULL;
   3194 		}
   3195 		ac = ac->next;
   3196 	}
   3197 }
   3198 
   3199 
   3200 void
   3201 rf_cleanup_config_set(cset)
   3202 	RF_ConfigSet_t *cset;
   3203 {
   3204 	RF_AutoConfig_t *ac;
   3205 	RF_AutoConfig_t *next_ac;
   3206 
   3207 	ac = cset->ac;
   3208 	while(ac!=NULL) {
   3209 		next_ac = ac->next;
   3210 		/* nuke the label */
   3211 		free(ac->clabel, M_RAIDFRAME);
   3212 		/* cleanup the config structure */
   3213 		free(ac, M_RAIDFRAME);
   3214 		/* "next.." */
   3215 		ac = next_ac;
   3216 	}
   3217 	/* and, finally, nuke the config set */
   3218 	free(cset, M_RAIDFRAME);
   3219 }
   3220 
   3221 
   3222 void
   3223 raid_init_component_label(raidPtr, clabel)
   3224 	RF_Raid_t *raidPtr;
   3225 	RF_ComponentLabel_t *clabel;
   3226 {
   3227 	/* current version number */
   3228 	clabel->version = RF_COMPONENT_LABEL_VERSION;
   3229 	clabel->serial_number = raidPtr->serial_number;
   3230 	clabel->mod_counter = raidPtr->mod_counter;
   3231 	clabel->num_rows = raidPtr->numRow;
   3232 	clabel->num_columns = raidPtr->numCol;
   3233 	clabel->clean = RF_RAID_DIRTY; /* not clean */
   3234 	clabel->status = rf_ds_optimal; /* "It's good!" */
   3235 
   3236 	clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
   3237 	clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
   3238 	clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
   3239 
   3240 	clabel->blockSize = raidPtr->bytesPerSector;
   3241 	clabel->numBlocks = raidPtr->sectorsPerDisk;
   3242 
   3243 	/* XXX not portable */
   3244 	clabel->parityConfig = raidPtr->Layout.map->parityConfig;
   3245 	clabel->maxOutstanding = raidPtr->maxOutstanding;
   3246 	clabel->autoconfigure = raidPtr->autoconfigure;
   3247 	clabel->root_partition = raidPtr->root_partition;
   3248 	clabel->last_unit = raidPtr->raidid;
   3249 	clabel->config_order = raidPtr->config_order;
   3250 }
   3251 
   3252 int
   3253 rf_auto_config_set(cset,unit)
   3254 	RF_ConfigSet_t *cset;
   3255 	int *unit;
   3256 {
   3257 	RF_Raid_t *raidPtr;
   3258 	RF_Config_t *config;
   3259 	int raidID;
   3260 	int retcode;
   3261 
   3262 	printf("RAID autoconfigure\n");
   3263 
   3264 	retcode = 0;
   3265 	*unit = -1;
   3266 
   3267 	/* 1. Create a config structure */
   3268 
   3269 	config = (RF_Config_t *)malloc(sizeof(RF_Config_t),
   3270 				       M_RAIDFRAME,
   3271 				       M_NOWAIT);
   3272 	if (config==NULL) {
   3273 		printf("Out of mem!?!?\n");
   3274 				/* XXX do something more intelligent here. */
   3275 		return(1);
   3276 	}
   3277 
   3278 	memset(config, 0, sizeof(RF_Config_t));
   3279 
   3280 	/* XXX raidID needs to be set correctly.. */
   3281 
   3282 	/*
   3283 	   2. Figure out what RAID ID this one is supposed to live at
   3284 	   See if we can get the same RAID dev that it was configured
   3285 	   on last time..
   3286 	*/
   3287 
   3288 	raidID = cset->ac->clabel->last_unit;
   3289 	if ((raidID < 0) || (raidID >= numraid)) {
   3290 		/* let's not wander off into lala land. */
   3291 		raidID = numraid - 1;
   3292 	}
   3293 	if (raidPtrs[raidID]->valid != 0) {
   3294 
   3295 		/*
   3296 		   Nope... Go looking for an alternative...
   3297 		   Start high so we don't immediately use raid0 if that's
   3298 		   not taken.
   3299 		*/
   3300 
   3301 		for(raidID = numraid - 1; raidID >= 0; raidID--) {
   3302 			if (raidPtrs[raidID]->valid == 0) {
   3303 				/* can use this one! */
   3304 				break;
   3305 			}
   3306 		}
   3307 	}
   3308 
   3309 	if (raidID < 0) {
   3310 		/* punt... */
   3311 		printf("Unable to auto configure this set!\n");
   3312 		printf("(Out of RAID devs!)\n");
   3313 		return(1);
   3314 	}
   3315 	printf("Configuring raid%d:\n",raidID);
   3316 	raidPtr = raidPtrs[raidID];
   3317 
   3318 	/* XXX all this stuff should be done SOMEWHERE ELSE! */
   3319 	raidPtr->raidid = raidID;
   3320 	raidPtr->openings = RAIDOUTSTANDING;
   3321 
   3322 	/* 3. Build the configuration structure */
   3323 	rf_create_configuration(cset->ac, config, raidPtr);
   3324 
   3325 	/* 4. Do the configuration */
   3326 	retcode = rf_Configure(raidPtr, config, cset->ac);
   3327 
   3328 	if (retcode == 0) {
   3329 
   3330 		raidinit(raidPtrs[raidID]);
   3331 
   3332 		rf_markalldirty(raidPtrs[raidID]);
   3333 		raidPtrs[raidID]->autoconfigure = 1; /* XXX do this here? */
   3334 		if (cset->ac->clabel->root_partition==1) {
   3335 			/* everything configured just fine.  Make a note
   3336 			   that this set is eligible to be root. */
   3337 			cset->rootable = 1;
   3338 			/* XXX do this here? */
   3339 			raidPtrs[raidID]->root_partition = 1;
   3340 		}
   3341 	}
   3342 
   3343 	/* 5. Cleanup */
   3344 	free(config, M_RAIDFRAME);
   3345 
   3346 	*unit = raidID;
   3347 	return(retcode);
   3348 }
   3349 
   3350 void
   3351 rf_disk_unbusy(desc)
   3352 	RF_RaidAccessDesc_t *desc;
   3353 {
   3354 	struct buf *bp;
   3355 
   3356 	bp = (struct buf *)desc->bp;
   3357 	disk_unbusy(&raid_softc[desc->raidPtr->raidid].sc_dkdev,
   3358 			    (bp->b_bcount - bp->b_resid));
   3359 }
   3360