Home | History | Annotate | Line # | Download | only in raidframe
rf_netbsdkintf.c revision 1.147
      1 /*	$NetBSD: rf_netbsdkintf.c,v 1.147 2002/11/15 03:00:12 oster Exp $	*/
      2 /*-
      3  * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
      4  * All rights reserved.
      5  *
      6  * This code is derived from software contributed to The NetBSD Foundation
      7  * by Greg Oster; Jason R. Thorpe.
      8  *
      9  * Redistribution and use in source and binary forms, with or without
     10  * modification, are permitted provided that the following conditions
     11  * are met:
     12  * 1. Redistributions of source code must retain the above copyright
     13  *    notice, this list of conditions and the following disclaimer.
     14  * 2. Redistributions in binary form must reproduce the above copyright
     15  *    notice, this list of conditions and the following disclaimer in the
     16  *    documentation and/or other materials provided with the distribution.
     17  * 3. All advertising materials mentioning features or use of this software
     18  *    must display the following acknowledgement:
     19  *        This product includes software developed by the NetBSD
     20  *        Foundation, Inc. and its contributors.
     21  * 4. Neither the name of The NetBSD Foundation nor the names of its
     22  *    contributors may be used to endorse or promote products derived
     23  *    from this software without specific prior written permission.
     24  *
     25  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     26  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     27  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     28  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     29  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     30  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     31  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     32  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     33  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     34  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     35  * POSSIBILITY OF SUCH DAMAGE.
     36  */
     37 
     38 /*
     39  * Copyright (c) 1988 University of Utah.
     40  * Copyright (c) 1990, 1993
     41  *      The Regents of the University of California.  All rights reserved.
     42  *
     43  * This code is derived from software contributed to Berkeley by
     44  * the Systems Programming Group of the University of Utah Computer
     45  * Science Department.
     46  *
     47  * Redistribution and use in source and binary forms, with or without
     48  * modification, are permitted provided that the following conditions
     49  * are met:
     50  * 1. Redistributions of source code must retain the above copyright
     51  *    notice, this list of conditions and the following disclaimer.
     52  * 2. Redistributions in binary form must reproduce the above copyright
     53  *    notice, this list of conditions and the following disclaimer in the
     54  *    documentation and/or other materials provided with the distribution.
     55  * 3. All advertising materials mentioning features or use of this software
     56  *    must display the following acknowledgement:
     57  *      This product includes software developed by the University of
     58  *      California, Berkeley and its contributors.
     59  * 4. Neither the name of the University nor the names of its contributors
     60  *    may be used to endorse or promote products derived from this software
     61  *    without specific prior written permission.
     62  *
     63  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     64  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     65  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     66  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     67  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     68  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     69  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     70  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     71  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     72  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     73  * SUCH DAMAGE.
     74  *
     75  * from: Utah $Hdr: cd.c 1.6 90/11/28$
     76  *
     77  *      @(#)cd.c        8.2 (Berkeley) 11/16/93
     78  */
     79 
     80 
     81 
     82 
     83 /*
     84  * Copyright (c) 1995 Carnegie-Mellon University.
     85  * All rights reserved.
     86  *
     87  * Authors: Mark Holland, Jim Zelenka
     88  *
     89  * Permission to use, copy, modify and distribute this software and
     90  * its documentation is hereby granted, provided that both the copyright
     91  * notice and this permission notice appear in all copies of the
     92  * software, derivative works or modified versions, and any portions
     93  * thereof, and that both notices appear in supporting documentation.
     94  *
     95  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
     96  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
     97  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
     98  *
     99  * Carnegie Mellon requests users of this software to return to
    100  *
    101  *  Software Distribution Coordinator  or  Software.Distribution (at) CS.CMU.EDU
    102  *  School of Computer Science
    103  *  Carnegie Mellon University
    104  *  Pittsburgh PA 15213-3890
    105  *
    106  * any improvements or extensions that they make and grant Carnegie the
    107  * rights to redistribute these changes.
    108  */
    109 
    110 /***********************************************************
    111  *
    112  * rf_kintf.c -- the kernel interface routines for RAIDframe
    113  *
    114  ***********************************************************/
    115 
    116 #include <sys/cdefs.h>
    117 __KERNEL_RCSID(0, "$NetBSD: rf_netbsdkintf.c,v 1.147 2002/11/15 03:00:12 oster Exp $");
    118 
    119 #include <sys/param.h>
    120 #include <sys/errno.h>
    121 #include <sys/pool.h>
    122 #include <sys/queue.h>
    123 #include <sys/disk.h>
    124 #include <sys/device.h>
    125 #include <sys/stat.h>
    126 #include <sys/ioctl.h>
    127 #include <sys/fcntl.h>
    128 #include <sys/systm.h>
    129 #include <sys/namei.h>
    130 #include <sys/vnode.h>
    131 #include <sys/disklabel.h>
    132 #include <sys/conf.h>
    133 #include <sys/lock.h>
    134 #include <sys/buf.h>
    135 #include <sys/user.h>
    136 #include <sys/reboot.h>
    137 
    138 #include <dev/raidframe/raidframevar.h>
    139 #include <dev/raidframe/raidframeio.h>
    140 #include "raid.h"
    141 #include "opt_raid_autoconfig.h"
    142 #include "rf_raid.h"
    143 #include "rf_copyback.h"
    144 #include "rf_dag.h"
    145 #include "rf_dagflags.h"
    146 #include "rf_desc.h"
    147 #include "rf_diskqueue.h"
    148 #include "rf_etimer.h"
    149 #include "rf_general.h"
    150 #include "rf_kintf.h"
    151 #include "rf_options.h"
    152 #include "rf_driver.h"
    153 #include "rf_parityscan.h"
    154 #include "rf_threadstuff.h"
    155 
    156 #ifdef DEBUG
    157 int     rf_kdebug_level = 0;
    158 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
    159 #else				/* DEBUG */
    160 #define db1_printf(a) { }
    161 #endif				/* DEBUG */
    162 
    163 static RF_Raid_t **raidPtrs;	/* global raid device descriptors */
    164 
    165 RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex)
    166 
    167 static RF_SparetWait_t *rf_sparet_wait_queue;	/* requests to install a
    168 						 * spare table */
    169 static RF_SparetWait_t *rf_sparet_resp_queue;	/* responses from
    170 						 * installation process */
    171 
    172 /* prototypes */
    173 static void KernelWakeupFunc(struct buf * bp);
    174 static void InitBP(struct buf * bp, struct vnode *, unsigned rw_flag,
    175 		   dev_t dev, RF_SectorNum_t startSect,
    176 		   RF_SectorCount_t numSect, caddr_t buf,
    177 		   void (*cbFunc) (struct buf *), void *cbArg,
    178 		   int logBytesPerSector, struct proc * b_proc);
    179 static void raidinit(RF_Raid_t *);
    180 
    181 void raidattach(int);
    182 
    183 dev_type_open(raidopen);
    184 dev_type_close(raidclose);
    185 dev_type_read(raidread);
    186 dev_type_write(raidwrite);
    187 dev_type_ioctl(raidioctl);
    188 dev_type_strategy(raidstrategy);
    189 dev_type_dump(raiddump);
    190 dev_type_size(raidsize);
    191 
    192 const struct bdevsw raid_bdevsw = {
    193 	raidopen, raidclose, raidstrategy, raidioctl,
    194 	raiddump, raidsize, D_DISK
    195 };
    196 
    197 const struct cdevsw raid_cdevsw = {
    198 	raidopen, raidclose, raidread, raidwrite, raidioctl,
    199 	nostop, notty, nopoll, nommap, nokqfilter, D_DISK
    200 };
    201 
    202 /*
    203  * Pilfered from ccd.c
    204  */
    205 
    206 struct raidbuf {
    207 	struct buf rf_buf;	/* new I/O buf.  MUST BE FIRST!!! */
    208 	struct buf *rf_obp;	/* ptr. to original I/O buf */
    209 	RF_DiskQueueData_t *req;/* the request that this was part of.. */
    210 };
    211 
    212 /* component buffer pool */
    213 struct pool raidframe_cbufpool;
    214 
    215 /* XXX Not sure if the following should be replacing the raidPtrs above,
    216    or if it should be used in conjunction with that...
    217 */
    218 
    219 struct raid_softc {
    220 	int     sc_flags;	/* flags */
    221 	int     sc_cflags;	/* configuration flags */
    222 	size_t  sc_size;        /* size of the raid device */
    223 	char    sc_xname[20];	/* XXX external name */
    224 	struct disk sc_dkdev;	/* generic disk device info */
    225 	struct bufq_state buf_queue;	/* used for the device queue */
    226 };
    227 /* sc_flags */
    228 #define RAIDF_INITED	0x01	/* unit has been initialized */
    229 #define RAIDF_WLABEL	0x02	/* label area is writable */
    230 #define RAIDF_LABELLING	0x04	/* unit is currently being labelled */
    231 #define RAIDF_WANTED	0x40	/* someone is waiting to obtain a lock */
    232 #define RAIDF_LOCKED	0x80	/* unit is locked */
    233 
    234 #define	raidunit(x)	DISKUNIT(x)
    235 int numraid = 0;
    236 
    237 /*
    238  * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
    239  * Be aware that large numbers can allow the driver to consume a lot of
    240  * kernel memory, especially on writes, and in degraded mode reads.
    241  *
    242  * For example: with a stripe width of 64 blocks (32k) and 5 disks,
    243  * a single 64K write will typically require 64K for the old data,
    244  * 64K for the old parity, and 64K for the new parity, for a total
    245  * of 192K (if the parity buffer is not re-used immediately).
    246  * Even it if is used immediately, that's still 128K, which when multiplied
    247  * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
    248  *
    249  * Now in degraded mode, for example, a 64K read on the above setup may
    250  * require data reconstruction, which will require *all* of the 4 remaining
    251  * disks to participate -- 4 * 32K/disk == 128K again.
    252  */
    253 
    254 #ifndef RAIDOUTSTANDING
    255 #define RAIDOUTSTANDING   6
    256 #endif
    257 
    258 #define RAIDLABELDEV(dev)	\
    259 	(MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
    260 
    261 /* declared here, and made public, for the benefit of KVM stuff.. */
    262 struct raid_softc *raid_softc;
    263 
    264 static void raidgetdefaultlabel(RF_Raid_t *, struct raid_softc *,
    265 				     struct disklabel *);
    266 static void raidgetdisklabel(dev_t);
    267 static void raidmakedisklabel(struct raid_softc *);
    268 
    269 static int raidlock(struct raid_softc *);
    270 static void raidunlock(struct raid_softc *);
    271 
    272 static void rf_markalldirty(RF_Raid_t *);
    273 
    274 struct device *raidrootdev;
    275 
    276 void rf_ReconThread(struct rf_recon_req *);
    277 /* XXX what I want is: */
    278 /*void rf_ReconThread(RF_Raid_t *raidPtr);  */
    279 void rf_RewriteParityThread(RF_Raid_t *raidPtr);
    280 void rf_CopybackThread(RF_Raid_t *raidPtr);
    281 void rf_ReconstructInPlaceThread(struct rf_recon_req *);
    282 int rf_autoconfig(struct device *self);
    283 void rf_buildroothack(RF_ConfigSet_t *);
    284 
    285 RF_AutoConfig_t *rf_find_raid_components(void);
    286 RF_ConfigSet_t *rf_create_auto_sets(RF_AutoConfig_t *);
    287 static int rf_does_it_fit(RF_ConfigSet_t *,RF_AutoConfig_t *);
    288 static int rf_reasonable_label(RF_ComponentLabel_t *);
    289 void rf_create_configuration(RF_AutoConfig_t *,RF_Config_t *, RF_Raid_t *);
    290 int rf_set_autoconfig(RF_Raid_t *, int);
    291 int rf_set_rootpartition(RF_Raid_t *, int);
    292 void rf_release_all_vps(RF_ConfigSet_t *);
    293 void rf_cleanup_config_set(RF_ConfigSet_t *);
    294 int rf_have_enough_components(RF_ConfigSet_t *);
    295 int rf_auto_config_set(RF_ConfigSet_t *, int *);
    296 
    297 static int raidautoconfig = 0; /* Debugging, mostly.  Set to 0 to not
    298 				  allow autoconfig to take place.
    299 			          Note that this is overridden by having
    300 			          RAID_AUTOCONFIG as an option in the
    301 			          kernel config file.  */
    302 
    303 void
    304 raidattach(num)
    305 	int     num;
    306 {
    307 	int raidID;
    308 	int i, rc;
    309 
    310 #ifdef DEBUG
    311 	printf("raidattach: Asked for %d units\n", num);
    312 #endif
    313 
    314 	if (num <= 0) {
    315 #ifdef DIAGNOSTIC
    316 		panic("raidattach: count <= 0");
    317 #endif
    318 		return;
    319 	}
    320 	/* This is where all the initialization stuff gets done. */
    321 
    322 	numraid = num;
    323 
    324 	/* Make some space for requested number of units... */
    325 
    326 	RF_Calloc(raidPtrs, num, sizeof(RF_Raid_t *), (RF_Raid_t **));
    327 	if (raidPtrs == NULL) {
    328 		panic("raidPtrs is NULL!!");
    329 	}
    330 
    331 	/* Initialize the component buffer pool. */
    332 	pool_init(&raidframe_cbufpool, sizeof(struct raidbuf), 0,
    333 	    0, 0, "raidpl", NULL);
    334 
    335 	rc = rf_mutex_init(&rf_sparet_wait_mutex);
    336 	if (rc) {
    337 		RF_PANIC();
    338 	}
    339 
    340 	rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
    341 
    342 	for (i = 0; i < num; i++)
    343 		raidPtrs[i] = NULL;
    344 	rc = rf_BootRaidframe();
    345 	if (rc == 0)
    346 		printf("Kernelized RAIDframe activated\n");
    347 	else
    348 		panic("Serious error booting RAID!!");
    349 
    350 	/* put together some datastructures like the CCD device does.. This
    351 	 * lets us lock the device and what-not when it gets opened. */
    352 
    353 	raid_softc = (struct raid_softc *)
    354 		malloc(num * sizeof(struct raid_softc),
    355 		       M_RAIDFRAME, M_NOWAIT);
    356 	if (raid_softc == NULL) {
    357 		printf("WARNING: no memory for RAIDframe driver\n");
    358 		return;
    359 	}
    360 
    361 	memset(raid_softc, 0, num * sizeof(struct raid_softc));
    362 
    363 	raidrootdev = (struct device *)malloc(num * sizeof(struct device),
    364 					      M_RAIDFRAME, M_NOWAIT);
    365 	if (raidrootdev == NULL) {
    366 		panic("No memory for RAIDframe driver!!?!?!");
    367 	}
    368 
    369 	for (raidID = 0; raidID < num; raidID++) {
    370 		bufq_alloc(&raid_softc[raidID].buf_queue, BUFQ_FCFS);
    371 
    372 		raidrootdev[raidID].dv_class  = DV_DISK;
    373 		raidrootdev[raidID].dv_cfdata = NULL;
    374 		raidrootdev[raidID].dv_unit   = raidID;
    375 		raidrootdev[raidID].dv_parent = NULL;
    376 		raidrootdev[raidID].dv_flags  = 0;
    377 		sprintf(raidrootdev[raidID].dv_xname,"raid%d",raidID);
    378 
    379 		RF_Calloc(raidPtrs[raidID], 1, sizeof(RF_Raid_t),
    380 			  (RF_Raid_t *));
    381 		if (raidPtrs[raidID] == NULL) {
    382 			printf("WARNING: raidPtrs[%d] is NULL\n", raidID);
    383 			numraid = raidID;
    384 			return;
    385 		}
    386 	}
    387 
    388 #ifdef RAID_AUTOCONFIG
    389 	raidautoconfig = 1;
    390 #endif
    391 
    392 	/*
    393 	 * Register a finalizer which will be used to auto-config RAID
    394 	 * sets once all real hardware devices have been found.
    395 	 */
    396 	if (config_finalize_register(NULL, rf_autoconfig) != 0)
    397 		printf("WARNING: unable to register RAIDframe finalizer\n");
    398 }
    399 
    400 int
    401 rf_autoconfig(struct device *self)
    402 {
    403 	RF_AutoConfig_t *ac_list;
    404 	RF_ConfigSet_t *config_sets;
    405 
    406 	if (raidautoconfig == 0)
    407 		return (0);
    408 
    409 	/* XXX This code can only be run once. */
    410 	raidautoconfig = 0;
    411 
    412 	/* 1. locate all RAID components on the system */
    413 #ifdef DEBUG
    414 	printf("Searching for RAID components...\n");
    415 #endif
    416 	ac_list = rf_find_raid_components();
    417 
    418 	/* 2. Sort them into their respective sets. */
    419 	config_sets = rf_create_auto_sets(ac_list);
    420 
    421 	/*
    422 	 * 3. Evaluate each set andconfigure the valid ones.
    423 	 * This gets done in rf_buildroothack().
    424 	 */
    425 	rf_buildroothack(config_sets);
    426 
    427 	return (1);
    428 }
    429 
    430 void
    431 rf_buildroothack(RF_ConfigSet_t *config_sets)
    432 {
    433 	RF_ConfigSet_t *cset;
    434 	RF_ConfigSet_t *next_cset;
    435 	int retcode;
    436 	int raidID;
    437 	int rootID;
    438 	int num_root;
    439 
    440 	rootID = 0;
    441 	num_root = 0;
    442 	cset = config_sets;
    443 	while(cset != NULL ) {
    444 		next_cset = cset->next;
    445 		if (rf_have_enough_components(cset) &&
    446 		    cset->ac->clabel->autoconfigure==1) {
    447 			retcode = rf_auto_config_set(cset,&raidID);
    448 			if (!retcode) {
    449 				if (cset->rootable) {
    450 					rootID = raidID;
    451 					num_root++;
    452 				}
    453 			} else {
    454 				/* The autoconfig didn't work :( */
    455 #if DEBUG
    456 				printf("Autoconfig failed with code %d for raid%d\n", retcode, raidID);
    457 #endif
    458 				rf_release_all_vps(cset);
    459 			}
    460 		} else {
    461 			/* we're not autoconfiguring this set...
    462 			   release the associated resources */
    463 			rf_release_all_vps(cset);
    464 		}
    465 		/* cleanup */
    466 		rf_cleanup_config_set(cset);
    467 		cset = next_cset;
    468 	}
    469 
    470 	/* we found something bootable... */
    471 
    472 	if (num_root == 1) {
    473 		booted_device = &raidrootdev[rootID];
    474 	} else if (num_root > 1) {
    475 		/* we can't guess.. require the user to answer... */
    476 		boothowto |= RB_ASKNAME;
    477 	}
    478 }
    479 
    480 
    481 int
    482 raidsize(dev)
    483 	dev_t   dev;
    484 {
    485 	struct raid_softc *rs;
    486 	struct disklabel *lp;
    487 	int     part, unit, omask, size;
    488 
    489 	unit = raidunit(dev);
    490 	if (unit >= numraid)
    491 		return (-1);
    492 	rs = &raid_softc[unit];
    493 
    494 	if ((rs->sc_flags & RAIDF_INITED) == 0)
    495 		return (-1);
    496 
    497 	part = DISKPART(dev);
    498 	omask = rs->sc_dkdev.dk_openmask & (1 << part);
    499 	lp = rs->sc_dkdev.dk_label;
    500 
    501 	if (omask == 0 && raidopen(dev, 0, S_IFBLK, curproc))
    502 		return (-1);
    503 
    504 	if (lp->d_partitions[part].p_fstype != FS_SWAP)
    505 		size = -1;
    506 	else
    507 		size = lp->d_partitions[part].p_size *
    508 		    (lp->d_secsize / DEV_BSIZE);
    509 
    510 	if (omask == 0 && raidclose(dev, 0, S_IFBLK, curproc))
    511 		return (-1);
    512 
    513 	return (size);
    514 
    515 }
    516 
    517 int
    518 raiddump(dev, blkno, va, size)
    519 	dev_t   dev;
    520 	daddr_t blkno;
    521 	caddr_t va;
    522 	size_t  size;
    523 {
    524 	/* Not implemented. */
    525 	return ENXIO;
    526 }
    527 /* ARGSUSED */
    528 int
    529 raidopen(dev, flags, fmt, p)
    530 	dev_t   dev;
    531 	int     flags, fmt;
    532 	struct proc *p;
    533 {
    534 	int     unit = raidunit(dev);
    535 	struct raid_softc *rs;
    536 	struct disklabel *lp;
    537 	int     part, pmask;
    538 	int     error = 0;
    539 
    540 	if (unit >= numraid)
    541 		return (ENXIO);
    542 	rs = &raid_softc[unit];
    543 
    544 	if ((error = raidlock(rs)) != 0)
    545 		return (error);
    546 	lp = rs->sc_dkdev.dk_label;
    547 
    548 	part = DISKPART(dev);
    549 	pmask = (1 << part);
    550 
    551 	if ((rs->sc_flags & RAIDF_INITED) &&
    552 	    (rs->sc_dkdev.dk_openmask == 0))
    553 		raidgetdisklabel(dev);
    554 
    555 	/* make sure that this partition exists */
    556 
    557 	if (part != RAW_PART) {
    558 		if (((rs->sc_flags & RAIDF_INITED) == 0) ||
    559 		    ((part >= lp->d_npartitions) ||
    560 			(lp->d_partitions[part].p_fstype == FS_UNUSED))) {
    561 			error = ENXIO;
    562 			raidunlock(rs);
    563 			return (error);
    564 		}
    565 	}
    566 	/* Prevent this unit from being unconfigured while open. */
    567 	switch (fmt) {
    568 	case S_IFCHR:
    569 		rs->sc_dkdev.dk_copenmask |= pmask;
    570 		break;
    571 
    572 	case S_IFBLK:
    573 		rs->sc_dkdev.dk_bopenmask |= pmask;
    574 		break;
    575 	}
    576 
    577 	if ((rs->sc_dkdev.dk_openmask == 0) &&
    578 	    ((rs->sc_flags & RAIDF_INITED) != 0)) {
    579 		/* First one... mark things as dirty... Note that we *MUST*
    580 		 have done a configure before this.  I DO NOT WANT TO BE
    581 		 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
    582 		 THAT THEY BELONG TOGETHER!!!!! */
    583 		/* XXX should check to see if we're only open for reading
    584 		   here... If so, we needn't do this, but then need some
    585 		   other way of keeping track of what's happened.. */
    586 
    587 		rf_markalldirty( raidPtrs[unit] );
    588 	}
    589 
    590 
    591 	rs->sc_dkdev.dk_openmask =
    592 	    rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
    593 
    594 	raidunlock(rs);
    595 
    596 	return (error);
    597 
    598 
    599 }
    600 /* ARGSUSED */
    601 int
    602 raidclose(dev, flags, fmt, p)
    603 	dev_t   dev;
    604 	int     flags, fmt;
    605 	struct proc *p;
    606 {
    607 	int     unit = raidunit(dev);
    608 	struct raid_softc *rs;
    609 	int     error = 0;
    610 	int     part;
    611 
    612 	if (unit >= numraid)
    613 		return (ENXIO);
    614 	rs = &raid_softc[unit];
    615 
    616 	if ((error = raidlock(rs)) != 0)
    617 		return (error);
    618 
    619 	part = DISKPART(dev);
    620 
    621 	/* ...that much closer to allowing unconfiguration... */
    622 	switch (fmt) {
    623 	case S_IFCHR:
    624 		rs->sc_dkdev.dk_copenmask &= ~(1 << part);
    625 		break;
    626 
    627 	case S_IFBLK:
    628 		rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
    629 		break;
    630 	}
    631 	rs->sc_dkdev.dk_openmask =
    632 	    rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
    633 
    634 	if ((rs->sc_dkdev.dk_openmask == 0) &&
    635 	    ((rs->sc_flags & RAIDF_INITED) != 0)) {
    636 		/* Last one... device is not unconfigured yet.
    637 		   Device shutdown has taken care of setting the
    638 		   clean bits if RAIDF_INITED is not set
    639 		   mark things as clean... */
    640 
    641 		rf_update_component_labels(raidPtrs[unit],
    642 						 RF_FINAL_COMPONENT_UPDATE);
    643 		if (doing_shutdown) {
    644 			/* last one, and we're going down, so
    645 			   lights out for this RAID set too. */
    646 			error = rf_Shutdown(raidPtrs[unit]);
    647 
    648 			/* It's no longer initialized... */
    649 			rs->sc_flags &= ~RAIDF_INITED;
    650 
    651 			/* Detach the disk. */
    652 			disk_detach(&rs->sc_dkdev);
    653 		}
    654 	}
    655 
    656 	raidunlock(rs);
    657 	return (0);
    658 
    659 }
    660 
    661 void
    662 raidstrategy(bp)
    663 	struct buf *bp;
    664 {
    665 	int s;
    666 
    667 	unsigned int raidID = raidunit(bp->b_dev);
    668 	RF_Raid_t *raidPtr;
    669 	struct raid_softc *rs = &raid_softc[raidID];
    670 	struct disklabel *lp;
    671 	int     wlabel;
    672 
    673 	if ((rs->sc_flags & RAIDF_INITED) ==0) {
    674 		bp->b_error = ENXIO;
    675 		bp->b_flags |= B_ERROR;
    676 		bp->b_resid = bp->b_bcount;
    677 		biodone(bp);
    678 		return;
    679 	}
    680 	if (raidID >= numraid || !raidPtrs[raidID]) {
    681 		bp->b_error = ENODEV;
    682 		bp->b_flags |= B_ERROR;
    683 		bp->b_resid = bp->b_bcount;
    684 		biodone(bp);
    685 		return;
    686 	}
    687 	raidPtr = raidPtrs[raidID];
    688 	if (!raidPtr->valid) {
    689 		bp->b_error = ENODEV;
    690 		bp->b_flags |= B_ERROR;
    691 		bp->b_resid = bp->b_bcount;
    692 		biodone(bp);
    693 		return;
    694 	}
    695 	if (bp->b_bcount == 0) {
    696 		db1_printf(("b_bcount is zero..\n"));
    697 		biodone(bp);
    698 		return;
    699 	}
    700 	lp = rs->sc_dkdev.dk_label;
    701 
    702 	/*
    703 	 * Do bounds checking and adjust transfer.  If there's an
    704 	 * error, the bounds check will flag that for us.
    705 	 */
    706 
    707 	wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
    708 	if (DISKPART(bp->b_dev) != RAW_PART)
    709 		if (bounds_check_with_label(bp, lp, wlabel) <= 0) {
    710 			db1_printf(("Bounds check failed!!:%d %d\n",
    711 				(int) bp->b_blkno, (int) wlabel));
    712 			biodone(bp);
    713 			return;
    714 		}
    715 	s = splbio();
    716 
    717 	bp->b_resid = 0;
    718 
    719 	/* stuff it onto our queue */
    720 	BUFQ_PUT(&rs->buf_queue, bp);
    721 
    722 	raidstart(raidPtrs[raidID]);
    723 
    724 	splx(s);
    725 }
    726 /* ARGSUSED */
    727 int
    728 raidread(dev, uio, flags)
    729 	dev_t   dev;
    730 	struct uio *uio;
    731 	int     flags;
    732 {
    733 	int     unit = raidunit(dev);
    734 	struct raid_softc *rs;
    735 	int     part;
    736 
    737 	if (unit >= numraid)
    738 		return (ENXIO);
    739 	rs = &raid_softc[unit];
    740 
    741 	if ((rs->sc_flags & RAIDF_INITED) == 0)
    742 		return (ENXIO);
    743 	part = DISKPART(dev);
    744 
    745 	return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
    746 
    747 }
    748 /* ARGSUSED */
    749 int
    750 raidwrite(dev, uio, flags)
    751 	dev_t   dev;
    752 	struct uio *uio;
    753 	int     flags;
    754 {
    755 	int     unit = raidunit(dev);
    756 	struct raid_softc *rs;
    757 
    758 	if (unit >= numraid)
    759 		return (ENXIO);
    760 	rs = &raid_softc[unit];
    761 
    762 	if ((rs->sc_flags & RAIDF_INITED) == 0)
    763 		return (ENXIO);
    764 
    765 	return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
    766 
    767 }
    768 
    769 int
    770 raidioctl(dev, cmd, data, flag, p)
    771 	dev_t   dev;
    772 	u_long  cmd;
    773 	caddr_t data;
    774 	int     flag;
    775 	struct proc *p;
    776 {
    777 	int     unit = raidunit(dev);
    778 	int     error = 0;
    779 	int     part, pmask;
    780 	struct raid_softc *rs;
    781 	RF_Config_t *k_cfg, *u_cfg;
    782 	RF_Raid_t *raidPtr;
    783 	RF_RaidDisk_t *diskPtr;
    784 	RF_AccTotals_t *totals;
    785 	RF_DeviceConfig_t *d_cfg, **ucfgp;
    786 	u_char *specific_buf;
    787 	int retcode = 0;
    788 	int row;
    789 	int column;
    790 	int raidid;
    791 	struct rf_recon_req *rrcopy, *rr;
    792 	RF_ComponentLabel_t *clabel;
    793 	RF_ComponentLabel_t ci_label;
    794 	RF_ComponentLabel_t **clabel_ptr;
    795 	RF_SingleComponent_t *sparePtr,*componentPtr;
    796 	RF_SingleComponent_t hot_spare;
    797 	RF_SingleComponent_t component;
    798 	RF_ProgressInfo_t progressInfo, **progressInfoPtr;
    799 	int i, j, d;
    800 #ifdef __HAVE_OLD_DISKLABEL
    801 	struct disklabel newlabel;
    802 #endif
    803 
    804 	if (unit >= numraid)
    805 		return (ENXIO);
    806 	rs = &raid_softc[unit];
    807 	raidPtr = raidPtrs[unit];
    808 
    809 	db1_printf(("raidioctl: %d %d %d %d\n", (int) dev,
    810 		(int) DISKPART(dev), (int) unit, (int) cmd));
    811 
    812 	/* Must be open for writes for these commands... */
    813 	switch (cmd) {
    814 	case DIOCSDINFO:
    815 	case DIOCWDINFO:
    816 #ifdef __HAVE_OLD_DISKLABEL
    817 	case ODIOCWDINFO:
    818 	case ODIOCSDINFO:
    819 #endif
    820 	case DIOCWLABEL:
    821 		if ((flag & FWRITE) == 0)
    822 			return (EBADF);
    823 	}
    824 
    825 	/* Must be initialized for these... */
    826 	switch (cmd) {
    827 	case DIOCGDINFO:
    828 	case DIOCSDINFO:
    829 	case DIOCWDINFO:
    830 #ifdef __HAVE_OLD_DISKLABEL
    831 	case ODIOCGDINFO:
    832 	case ODIOCWDINFO:
    833 	case ODIOCSDINFO:
    834 	case ODIOCGDEFLABEL:
    835 #endif
    836 	case DIOCGPART:
    837 	case DIOCWLABEL:
    838 	case DIOCGDEFLABEL:
    839 	case RAIDFRAME_SHUTDOWN:
    840 	case RAIDFRAME_REWRITEPARITY:
    841 	case RAIDFRAME_GET_INFO:
    842 	case RAIDFRAME_RESET_ACCTOTALS:
    843 	case RAIDFRAME_GET_ACCTOTALS:
    844 	case RAIDFRAME_KEEP_ACCTOTALS:
    845 	case RAIDFRAME_GET_SIZE:
    846 	case RAIDFRAME_FAIL_DISK:
    847 	case RAIDFRAME_COPYBACK:
    848 	case RAIDFRAME_CHECK_RECON_STATUS:
    849 	case RAIDFRAME_CHECK_RECON_STATUS_EXT:
    850 	case RAIDFRAME_GET_COMPONENT_LABEL:
    851 	case RAIDFRAME_SET_COMPONENT_LABEL:
    852 	case RAIDFRAME_ADD_HOT_SPARE:
    853 	case RAIDFRAME_REMOVE_HOT_SPARE:
    854 	case RAIDFRAME_INIT_LABELS:
    855 	case RAIDFRAME_REBUILD_IN_PLACE:
    856 	case RAIDFRAME_CHECK_PARITY:
    857 	case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
    858 	case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
    859 	case RAIDFRAME_CHECK_COPYBACK_STATUS:
    860 	case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
    861 	case RAIDFRAME_SET_AUTOCONFIG:
    862 	case RAIDFRAME_SET_ROOT:
    863 	case RAIDFRAME_DELETE_COMPONENT:
    864 	case RAIDFRAME_INCORPORATE_HOT_SPARE:
    865 		if ((rs->sc_flags & RAIDF_INITED) == 0)
    866 			return (ENXIO);
    867 	}
    868 
    869 	switch (cmd) {
    870 
    871 		/* configure the system */
    872 	case RAIDFRAME_CONFIGURE:
    873 
    874 		if (raidPtr->valid) {
    875 			/* There is a valid RAID set running on this unit! */
    876 			printf("raid%d: Device already configured!\n",unit);
    877 			return(EINVAL);
    878 		}
    879 
    880 		/* copy-in the configuration information */
    881 		/* data points to a pointer to the configuration structure */
    882 
    883 		u_cfg = *((RF_Config_t **) data);
    884 		RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
    885 		if (k_cfg == NULL) {
    886 			return (ENOMEM);
    887 		}
    888 		retcode = copyin((caddr_t) u_cfg, (caddr_t) k_cfg,
    889 		    sizeof(RF_Config_t));
    890 		if (retcode) {
    891 			RF_Free(k_cfg, sizeof(RF_Config_t));
    892 			db1_printf(("rf_ioctl: retcode=%d copyin.1\n",
    893 				retcode));
    894 			return (retcode);
    895 		}
    896 		/* allocate a buffer for the layout-specific data, and copy it
    897 		 * in */
    898 		if (k_cfg->layoutSpecificSize) {
    899 			if (k_cfg->layoutSpecificSize > 10000) {
    900 				/* sanity check */
    901 				RF_Free(k_cfg, sizeof(RF_Config_t));
    902 				return (EINVAL);
    903 			}
    904 			RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
    905 			    (u_char *));
    906 			if (specific_buf == NULL) {
    907 				RF_Free(k_cfg, sizeof(RF_Config_t));
    908 				return (ENOMEM);
    909 			}
    910 			retcode = copyin(k_cfg->layoutSpecific,
    911 			    (caddr_t) specific_buf,
    912 			    k_cfg->layoutSpecificSize);
    913 			if (retcode) {
    914 				RF_Free(k_cfg, sizeof(RF_Config_t));
    915 				RF_Free(specific_buf,
    916 					k_cfg->layoutSpecificSize);
    917 				db1_printf(("rf_ioctl: retcode=%d copyin.2\n",
    918 					retcode));
    919 				return (retcode);
    920 			}
    921 		} else
    922 			specific_buf = NULL;
    923 		k_cfg->layoutSpecific = specific_buf;
    924 
    925 		/* should do some kind of sanity check on the configuration.
    926 		 * Store the sum of all the bytes in the last byte? */
    927 
    928 		/* configure the system */
    929 
    930 		/*
    931 		 * Clear the entire RAID descriptor, just to make sure
    932 		 *  there is no stale data left in the case of a
    933 		 *  reconfiguration
    934 		 */
    935 		memset((char *) raidPtr, 0, sizeof(RF_Raid_t));
    936 		raidPtr->raidid = unit;
    937 
    938 		retcode = rf_Configure(raidPtr, k_cfg, NULL);
    939 
    940 		if (retcode == 0) {
    941 
    942 			/* allow this many simultaneous IO's to
    943 			   this RAID device */
    944 			raidPtr->openings = RAIDOUTSTANDING;
    945 
    946 			raidinit(raidPtr);
    947 			rf_markalldirty(raidPtr);
    948 		}
    949 		/* free the buffers.  No return code here. */
    950 		if (k_cfg->layoutSpecificSize) {
    951 			RF_Free(specific_buf, k_cfg->layoutSpecificSize);
    952 		}
    953 		RF_Free(k_cfg, sizeof(RF_Config_t));
    954 
    955 		return (retcode);
    956 
    957 		/* shutdown the system */
    958 	case RAIDFRAME_SHUTDOWN:
    959 
    960 		if ((error = raidlock(rs)) != 0)
    961 			return (error);
    962 
    963 		/*
    964 		 * If somebody has a partition mounted, we shouldn't
    965 		 * shutdown.
    966 		 */
    967 
    968 		part = DISKPART(dev);
    969 		pmask = (1 << part);
    970 		if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
    971 		    ((rs->sc_dkdev.dk_bopenmask & pmask) &&
    972 			(rs->sc_dkdev.dk_copenmask & pmask))) {
    973 			raidunlock(rs);
    974 			return (EBUSY);
    975 		}
    976 
    977 		retcode = rf_Shutdown(raidPtr);
    978 
    979 		/* It's no longer initialized... */
    980 		rs->sc_flags &= ~RAIDF_INITED;
    981 
    982 		/* Detach the disk. */
    983 		disk_detach(&rs->sc_dkdev);
    984 
    985 		raidunlock(rs);
    986 
    987 		return (retcode);
    988 	case RAIDFRAME_GET_COMPONENT_LABEL:
    989 		clabel_ptr = (RF_ComponentLabel_t **) data;
    990 		/* need to read the component label for the disk indicated
    991 		   by row,column in clabel */
    992 
    993 		/* For practice, let's get it directly fromdisk, rather
    994 		   than from the in-core copy */
    995 		RF_Malloc( clabel, sizeof( RF_ComponentLabel_t ),
    996 			   (RF_ComponentLabel_t *));
    997 		if (clabel == NULL)
    998 			return (ENOMEM);
    999 
   1000 		memset((char *) clabel, 0, sizeof(RF_ComponentLabel_t));
   1001 
   1002 		retcode = copyin( *clabel_ptr, clabel,
   1003 				  sizeof(RF_ComponentLabel_t));
   1004 
   1005 		if (retcode) {
   1006 			RF_Free( clabel, sizeof(RF_ComponentLabel_t));
   1007 			return(retcode);
   1008 		}
   1009 
   1010 		row = clabel->row;
   1011 		column = clabel->column;
   1012 
   1013 		if ((row < 0) || (row >= raidPtr->numRow) ||
   1014 		    (column < 0) || (column >= raidPtr->numCol +
   1015 				     raidPtr->numSpare)) {
   1016 			RF_Free( clabel, sizeof(RF_ComponentLabel_t));
   1017 			return(EINVAL);
   1018 		}
   1019 
   1020 		raidread_component_label(raidPtr->Disks[row][column].dev,
   1021 				raidPtr->raid_cinfo[row][column].ci_vp,
   1022 				clabel );
   1023 
   1024 		retcode = copyout((caddr_t) clabel,
   1025 				  (caddr_t) *clabel_ptr,
   1026 				  sizeof(RF_ComponentLabel_t));
   1027 		RF_Free( clabel, sizeof(RF_ComponentLabel_t));
   1028 		return (retcode);
   1029 
   1030 	case RAIDFRAME_SET_COMPONENT_LABEL:
   1031 		clabel = (RF_ComponentLabel_t *) data;
   1032 
   1033 		/* XXX check the label for valid stuff... */
   1034 		/* Note that some things *should not* get modified --
   1035 		   the user should be re-initing the labels instead of
   1036 		   trying to patch things.
   1037 		   */
   1038 
   1039 		raidid = raidPtr->raidid;
   1040 		printf("raid%d: Got component label:\n", raidid);
   1041 		printf("raid%d: Version: %d\n", raidid, clabel->version);
   1042 		printf("raid%d: Serial Number: %d\n", raidid, clabel->serial_number);
   1043 		printf("raid%d: Mod counter: %d\n", raidid, clabel->mod_counter);
   1044 		printf("raid%d: Row: %d\n", raidid, clabel->row);
   1045 		printf("raid%d: Column: %d\n", raidid, clabel->column);
   1046 		printf("raid%d: Num Rows: %d\n", raidid, clabel->num_rows);
   1047 		printf("raid%d: Num Columns: %d\n", raidid, clabel->num_columns);
   1048 		printf("raid%d: Clean: %d\n", raidid, clabel->clean);
   1049 		printf("raid%d: Status: %d\n", raidid, clabel->status);
   1050 
   1051 		row = clabel->row;
   1052 		column = clabel->column;
   1053 
   1054 		if ((row < 0) || (row >= raidPtr->numRow) ||
   1055 		    (column < 0) || (column >= raidPtr->numCol)) {
   1056 			return(EINVAL);
   1057 		}
   1058 
   1059 		/* XXX this isn't allowed to do anything for now :-) */
   1060 
   1061 		/* XXX and before it is, we need to fill in the rest
   1062 		   of the fields!?!?!?! */
   1063 #if 0
   1064 		raidwrite_component_label(
   1065                             raidPtr->Disks[row][column].dev,
   1066 			    raidPtr->raid_cinfo[row][column].ci_vp,
   1067 			    clabel );
   1068 #endif
   1069 		return (0);
   1070 
   1071 	case RAIDFRAME_INIT_LABELS:
   1072 		clabel = (RF_ComponentLabel_t *) data;
   1073 		/*
   1074 		   we only want the serial number from
   1075 		   the above.  We get all the rest of the information
   1076 		   from the config that was used to create this RAID
   1077 		   set.
   1078 		   */
   1079 
   1080 		raidPtr->serial_number = clabel->serial_number;
   1081 
   1082 		raid_init_component_label(raidPtr, &ci_label);
   1083 		ci_label.serial_number = clabel->serial_number;
   1084 
   1085 		for(row=0;row<raidPtr->numRow;row++) {
   1086 			ci_label.row = row;
   1087 			for(column=0;column<raidPtr->numCol;column++) {
   1088 				diskPtr = &raidPtr->Disks[row][column];
   1089 				if (!RF_DEAD_DISK(diskPtr->status)) {
   1090 					ci_label.partitionSize = diskPtr->partitionSize;
   1091 					ci_label.column = column;
   1092 					raidwrite_component_label(
   1093 					  raidPtr->Disks[row][column].dev,
   1094 					  raidPtr->raid_cinfo[row][column].ci_vp,
   1095 					  &ci_label );
   1096 				}
   1097 			}
   1098 		}
   1099 
   1100 		return (retcode);
   1101 	case RAIDFRAME_SET_AUTOCONFIG:
   1102 		d = rf_set_autoconfig(raidPtr, *(int *) data);
   1103 		printf("raid%d: New autoconfig value is: %d\n",
   1104 		       raidPtr->raidid, d);
   1105 		*(int *) data = d;
   1106 		return (retcode);
   1107 
   1108 	case RAIDFRAME_SET_ROOT:
   1109 		d = rf_set_rootpartition(raidPtr, *(int *) data);
   1110 		printf("raid%d: New rootpartition value is: %d\n",
   1111 		       raidPtr->raidid, d);
   1112 		*(int *) data = d;
   1113 		return (retcode);
   1114 
   1115 		/* initialize all parity */
   1116 	case RAIDFRAME_REWRITEPARITY:
   1117 
   1118 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1119 			/* Parity for RAID 0 is trivially correct */
   1120 			raidPtr->parity_good = RF_RAID_CLEAN;
   1121 			return(0);
   1122 		}
   1123 
   1124 		if (raidPtr->parity_rewrite_in_progress == 1) {
   1125 			/* Re-write is already in progress! */
   1126 			return(EINVAL);
   1127 		}
   1128 
   1129 		retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
   1130 					   rf_RewriteParityThread,
   1131 					   raidPtr,"raid_parity");
   1132 		return (retcode);
   1133 
   1134 
   1135 	case RAIDFRAME_ADD_HOT_SPARE:
   1136 		sparePtr = (RF_SingleComponent_t *) data;
   1137 		memcpy( &hot_spare, sparePtr, sizeof(RF_SingleComponent_t));
   1138 		retcode = rf_add_hot_spare(raidPtr, &hot_spare);
   1139 		return(retcode);
   1140 
   1141 	case RAIDFRAME_REMOVE_HOT_SPARE:
   1142 		return(retcode);
   1143 
   1144 	case RAIDFRAME_DELETE_COMPONENT:
   1145 		componentPtr = (RF_SingleComponent_t *)data;
   1146 		memcpy( &component, componentPtr,
   1147 			sizeof(RF_SingleComponent_t));
   1148 		retcode = rf_delete_component(raidPtr, &component);
   1149 		return(retcode);
   1150 
   1151 	case RAIDFRAME_INCORPORATE_HOT_SPARE:
   1152 		componentPtr = (RF_SingleComponent_t *)data;
   1153 		memcpy( &component, componentPtr,
   1154 			sizeof(RF_SingleComponent_t));
   1155 		retcode = rf_incorporate_hot_spare(raidPtr, &component);
   1156 		return(retcode);
   1157 
   1158 	case RAIDFRAME_REBUILD_IN_PLACE:
   1159 
   1160 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1161 			/* Can't do this on a RAID 0!! */
   1162 			return(EINVAL);
   1163 		}
   1164 
   1165 		if (raidPtr->recon_in_progress == 1) {
   1166 			/* a reconstruct is already in progress! */
   1167 			return(EINVAL);
   1168 		}
   1169 
   1170 		componentPtr = (RF_SingleComponent_t *) data;
   1171 		memcpy( &component, componentPtr,
   1172 			sizeof(RF_SingleComponent_t));
   1173 		row = component.row;
   1174 		column = component.column;
   1175 
   1176 		if ((row < 0) || (row >= raidPtr->numRow) ||
   1177 		    (column < 0) || (column >= raidPtr->numCol)) {
   1178 			return(EINVAL);
   1179 		}
   1180 
   1181 		RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
   1182 		if (rrcopy == NULL)
   1183 			return(ENOMEM);
   1184 
   1185 		rrcopy->raidPtr = (void *) raidPtr;
   1186 		rrcopy->row = row;
   1187 		rrcopy->col = column;
   1188 
   1189 		retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
   1190 					   rf_ReconstructInPlaceThread,
   1191 					   rrcopy,"raid_reconip");
   1192 		return(retcode);
   1193 
   1194 	case RAIDFRAME_GET_INFO:
   1195 		if (!raidPtr->valid)
   1196 			return (ENODEV);
   1197 		ucfgp = (RF_DeviceConfig_t **) data;
   1198 		RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t),
   1199 			  (RF_DeviceConfig_t *));
   1200 		if (d_cfg == NULL)
   1201 			return (ENOMEM);
   1202 		memset((char *) d_cfg, 0, sizeof(RF_DeviceConfig_t));
   1203 		d_cfg->rows = raidPtr->numRow;
   1204 		d_cfg->cols = raidPtr->numCol;
   1205 		d_cfg->ndevs = raidPtr->numRow * raidPtr->numCol;
   1206 		if (d_cfg->ndevs >= RF_MAX_DISKS) {
   1207 			RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
   1208 			return (ENOMEM);
   1209 		}
   1210 		d_cfg->nspares = raidPtr->numSpare;
   1211 		if (d_cfg->nspares >= RF_MAX_DISKS) {
   1212 			RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
   1213 			return (ENOMEM);
   1214 		}
   1215 		d_cfg->maxqdepth = raidPtr->maxQueueDepth;
   1216 		d = 0;
   1217 		for (i = 0; i < d_cfg->rows; i++) {
   1218 			for (j = 0; j < d_cfg->cols; j++) {
   1219 				d_cfg->devs[d] = raidPtr->Disks[i][j];
   1220 				d++;
   1221 			}
   1222 		}
   1223 		for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) {
   1224 			d_cfg->spares[i] = raidPtr->Disks[0][j];
   1225 		}
   1226 		retcode = copyout((caddr_t) d_cfg, (caddr_t) * ucfgp,
   1227 				  sizeof(RF_DeviceConfig_t));
   1228 		RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
   1229 
   1230 		return (retcode);
   1231 
   1232 	case RAIDFRAME_CHECK_PARITY:
   1233 		*(int *) data = raidPtr->parity_good;
   1234 		return (0);
   1235 
   1236 	case RAIDFRAME_RESET_ACCTOTALS:
   1237 		memset(&raidPtr->acc_totals, 0, sizeof(raidPtr->acc_totals));
   1238 		return (0);
   1239 
   1240 	case RAIDFRAME_GET_ACCTOTALS:
   1241 		totals = (RF_AccTotals_t *) data;
   1242 		*totals = raidPtr->acc_totals;
   1243 		return (0);
   1244 
   1245 	case RAIDFRAME_KEEP_ACCTOTALS:
   1246 		raidPtr->keep_acc_totals = *(int *)data;
   1247 		return (0);
   1248 
   1249 	case RAIDFRAME_GET_SIZE:
   1250 		*(int *) data = raidPtr->totalSectors;
   1251 		return (0);
   1252 
   1253 		/* fail a disk & optionally start reconstruction */
   1254 	case RAIDFRAME_FAIL_DISK:
   1255 
   1256 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1257 			/* Can't do this on a RAID 0!! */
   1258 			return(EINVAL);
   1259 		}
   1260 
   1261 		rr = (struct rf_recon_req *) data;
   1262 
   1263 		if (rr->row < 0 || rr->row >= raidPtr->numRow
   1264 		    || rr->col < 0 || rr->col >= raidPtr->numCol)
   1265 			return (EINVAL);
   1266 
   1267 		/* make a copy of the recon request so that we don't rely on
   1268 		 * the user's buffer */
   1269 		RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
   1270 		if (rrcopy == NULL)
   1271 			return(ENOMEM);
   1272 		memcpy(rrcopy, rr, sizeof(*rr));
   1273 		rrcopy->raidPtr = (void *) raidPtr;
   1274 
   1275 		retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
   1276 					   rf_ReconThread,
   1277 					   rrcopy,"raid_recon");
   1278 		return (0);
   1279 
   1280 		/* invoke a copyback operation after recon on whatever disk
   1281 		 * needs it, if any */
   1282 	case RAIDFRAME_COPYBACK:
   1283 
   1284 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1285 			/* This makes no sense on a RAID 0!! */
   1286 			return(EINVAL);
   1287 		}
   1288 
   1289 		if (raidPtr->copyback_in_progress == 1) {
   1290 			/* Copyback is already in progress! */
   1291 			return(EINVAL);
   1292 		}
   1293 
   1294 		retcode = RF_CREATE_THREAD(raidPtr->copyback_thread,
   1295 					   rf_CopybackThread,
   1296 					   raidPtr,"raid_copyback");
   1297 		return (retcode);
   1298 
   1299 		/* return the percentage completion of reconstruction */
   1300 	case RAIDFRAME_CHECK_RECON_STATUS:
   1301 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1302 			/* This makes no sense on a RAID 0, so tell the
   1303 			   user it's done. */
   1304 			*(int *) data = 100;
   1305 			return(0);
   1306 		}
   1307 		row = 0; /* XXX we only consider a single row... */
   1308 		if (raidPtr->status[row] != rf_rs_reconstructing)
   1309 			*(int *) data = 100;
   1310 		else
   1311 			*(int *) data = raidPtr->reconControl[row]->percentComplete;
   1312 		return (0);
   1313 	case RAIDFRAME_CHECK_RECON_STATUS_EXT:
   1314 		progressInfoPtr = (RF_ProgressInfo_t **) data;
   1315 		row = 0; /* XXX we only consider a single row... */
   1316 		if (raidPtr->status[row] != rf_rs_reconstructing) {
   1317 			progressInfo.remaining = 0;
   1318 			progressInfo.completed = 100;
   1319 			progressInfo.total = 100;
   1320 		} else {
   1321 			progressInfo.total =
   1322 				raidPtr->reconControl[row]->numRUsTotal;
   1323 			progressInfo.completed =
   1324 				raidPtr->reconControl[row]->numRUsComplete;
   1325 			progressInfo.remaining = progressInfo.total -
   1326 				progressInfo.completed;
   1327 		}
   1328 		retcode = copyout((caddr_t) &progressInfo,
   1329 				  (caddr_t) *progressInfoPtr,
   1330 				  sizeof(RF_ProgressInfo_t));
   1331 		return (retcode);
   1332 
   1333 	case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
   1334 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1335 			/* This makes no sense on a RAID 0, so tell the
   1336 			   user it's done. */
   1337 			*(int *) data = 100;
   1338 			return(0);
   1339 		}
   1340 		if (raidPtr->parity_rewrite_in_progress == 1) {
   1341 			*(int *) data = 100 *
   1342 				raidPtr->parity_rewrite_stripes_done /
   1343 				raidPtr->Layout.numStripe;
   1344 		} else {
   1345 			*(int *) data = 100;
   1346 		}
   1347 		return (0);
   1348 
   1349 	case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
   1350 		progressInfoPtr = (RF_ProgressInfo_t **) data;
   1351 		if (raidPtr->parity_rewrite_in_progress == 1) {
   1352 			progressInfo.total = raidPtr->Layout.numStripe;
   1353 			progressInfo.completed =
   1354 				raidPtr->parity_rewrite_stripes_done;
   1355 			progressInfo.remaining = progressInfo.total -
   1356 				progressInfo.completed;
   1357 		} else {
   1358 			progressInfo.remaining = 0;
   1359 			progressInfo.completed = 100;
   1360 			progressInfo.total = 100;
   1361 		}
   1362 		retcode = copyout((caddr_t) &progressInfo,
   1363 				  (caddr_t) *progressInfoPtr,
   1364 				  sizeof(RF_ProgressInfo_t));
   1365 		return (retcode);
   1366 
   1367 	case RAIDFRAME_CHECK_COPYBACK_STATUS:
   1368 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1369 			/* This makes no sense on a RAID 0 */
   1370 			*(int *) data = 100;
   1371 			return(0);
   1372 		}
   1373 		if (raidPtr->copyback_in_progress == 1) {
   1374 			*(int *) data = 100 * raidPtr->copyback_stripes_done /
   1375 				raidPtr->Layout.numStripe;
   1376 		} else {
   1377 			*(int *) data = 100;
   1378 		}
   1379 		return (0);
   1380 
   1381 	case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
   1382 		progressInfoPtr = (RF_ProgressInfo_t **) data;
   1383 		if (raidPtr->copyback_in_progress == 1) {
   1384 			progressInfo.total = raidPtr->Layout.numStripe;
   1385 			progressInfo.completed =
   1386 				raidPtr->copyback_stripes_done;
   1387 			progressInfo.remaining = progressInfo.total -
   1388 				progressInfo.completed;
   1389 		} else {
   1390 			progressInfo.remaining = 0;
   1391 			progressInfo.completed = 100;
   1392 			progressInfo.total = 100;
   1393 		}
   1394 		retcode = copyout((caddr_t) &progressInfo,
   1395 				  (caddr_t) *progressInfoPtr,
   1396 				  sizeof(RF_ProgressInfo_t));
   1397 		return (retcode);
   1398 
   1399 		/* the sparetable daemon calls this to wait for the kernel to
   1400 		 * need a spare table. this ioctl does not return until a
   1401 		 * spare table is needed. XXX -- calling mpsleep here in the
   1402 		 * ioctl code is almost certainly wrong and evil. -- XXX XXX
   1403 		 * -- I should either compute the spare table in the kernel,
   1404 		 * or have a different -- XXX XXX -- interface (a different
   1405 		 * character device) for delivering the table     -- XXX */
   1406 #if 0
   1407 	case RAIDFRAME_SPARET_WAIT:
   1408 		RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1409 		while (!rf_sparet_wait_queue)
   1410 			mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE);
   1411 		waitreq = rf_sparet_wait_queue;
   1412 		rf_sparet_wait_queue = rf_sparet_wait_queue->next;
   1413 		RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1414 
   1415 		/* structure assignment */
   1416 		*((RF_SparetWait_t *) data) = *waitreq;
   1417 
   1418 		RF_Free(waitreq, sizeof(*waitreq));
   1419 		return (0);
   1420 
   1421 		/* wakes up a process waiting on SPARET_WAIT and puts an error
   1422 		 * code in it that will cause the dameon to exit */
   1423 	case RAIDFRAME_ABORT_SPARET_WAIT:
   1424 		RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
   1425 		waitreq->fcol = -1;
   1426 		RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1427 		waitreq->next = rf_sparet_wait_queue;
   1428 		rf_sparet_wait_queue = waitreq;
   1429 		RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1430 		wakeup(&rf_sparet_wait_queue);
   1431 		return (0);
   1432 
   1433 		/* used by the spare table daemon to deliver a spare table
   1434 		 * into the kernel */
   1435 	case RAIDFRAME_SEND_SPARET:
   1436 
   1437 		/* install the spare table */
   1438 		retcode = rf_SetSpareTable(raidPtr, *(void **) data);
   1439 
   1440 		/* respond to the requestor.  the return status of the spare
   1441 		 * table installation is passed in the "fcol" field */
   1442 		RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
   1443 		waitreq->fcol = retcode;
   1444 		RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1445 		waitreq->next = rf_sparet_resp_queue;
   1446 		rf_sparet_resp_queue = waitreq;
   1447 		wakeup(&rf_sparet_resp_queue);
   1448 		RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1449 
   1450 		return (retcode);
   1451 #endif
   1452 
   1453 	default:
   1454 		break; /* fall through to the os-specific code below */
   1455 
   1456 	}
   1457 
   1458 	if (!raidPtr->valid)
   1459 		return (EINVAL);
   1460 
   1461 	/*
   1462 	 * Add support for "regular" device ioctls here.
   1463 	 */
   1464 
   1465 	switch (cmd) {
   1466 	case DIOCGDINFO:
   1467 		*(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
   1468 		break;
   1469 #ifdef __HAVE_OLD_DISKLABEL
   1470 	case ODIOCGDINFO:
   1471 		newlabel = *(rs->sc_dkdev.dk_label);
   1472 		if (newlabel.d_npartitions > OLDMAXPARTITIONS)
   1473 			return ENOTTY;
   1474 		memcpy(data, &newlabel, sizeof (struct olddisklabel));
   1475 		break;
   1476 #endif
   1477 
   1478 	case DIOCGPART:
   1479 		((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
   1480 		((struct partinfo *) data)->part =
   1481 		    &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
   1482 		break;
   1483 
   1484 	case DIOCWDINFO:
   1485 	case DIOCSDINFO:
   1486 #ifdef __HAVE_OLD_DISKLABEL
   1487 	case ODIOCWDINFO:
   1488 	case ODIOCSDINFO:
   1489 #endif
   1490 	{
   1491 		struct disklabel *lp;
   1492 #ifdef __HAVE_OLD_DISKLABEL
   1493 		if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) {
   1494 			memset(&newlabel, 0, sizeof newlabel);
   1495 			memcpy(&newlabel, data, sizeof (struct olddisklabel));
   1496 			lp = &newlabel;
   1497 		} else
   1498 #endif
   1499 		lp = (struct disklabel *)data;
   1500 
   1501 		if ((error = raidlock(rs)) != 0)
   1502 			return (error);
   1503 
   1504 		rs->sc_flags |= RAIDF_LABELLING;
   1505 
   1506 		error = setdisklabel(rs->sc_dkdev.dk_label,
   1507 		    lp, 0, rs->sc_dkdev.dk_cpulabel);
   1508 		if (error == 0) {
   1509 			if (cmd == DIOCWDINFO
   1510 #ifdef __HAVE_OLD_DISKLABEL
   1511 			    || cmd == ODIOCWDINFO
   1512 #endif
   1513 			   )
   1514 				error = writedisklabel(RAIDLABELDEV(dev),
   1515 				    raidstrategy, rs->sc_dkdev.dk_label,
   1516 				    rs->sc_dkdev.dk_cpulabel);
   1517 		}
   1518 		rs->sc_flags &= ~RAIDF_LABELLING;
   1519 
   1520 		raidunlock(rs);
   1521 
   1522 		if (error)
   1523 			return (error);
   1524 		break;
   1525 	}
   1526 
   1527 	case DIOCWLABEL:
   1528 		if (*(int *) data != 0)
   1529 			rs->sc_flags |= RAIDF_WLABEL;
   1530 		else
   1531 			rs->sc_flags &= ~RAIDF_WLABEL;
   1532 		break;
   1533 
   1534 	case DIOCGDEFLABEL:
   1535 		raidgetdefaultlabel(raidPtr, rs, (struct disklabel *) data);
   1536 		break;
   1537 
   1538 #ifdef __HAVE_OLD_DISKLABEL
   1539 	case ODIOCGDEFLABEL:
   1540 		raidgetdefaultlabel(raidPtr, rs, &newlabel);
   1541 		if (newlabel.d_npartitions > OLDMAXPARTITIONS)
   1542 			return ENOTTY;
   1543 		memcpy(data, &newlabel, sizeof (struct olddisklabel));
   1544 		break;
   1545 #endif
   1546 
   1547 	default:
   1548 		retcode = ENOTTY;
   1549 	}
   1550 	return (retcode);
   1551 
   1552 }
   1553 
   1554 
   1555 /* raidinit -- complete the rest of the initialization for the
   1556    RAIDframe device.  */
   1557 
   1558 
   1559 static void
   1560 raidinit(raidPtr)
   1561 	RF_Raid_t *raidPtr;
   1562 {
   1563 	struct raid_softc *rs;
   1564 	int     unit;
   1565 
   1566 	unit = raidPtr->raidid;
   1567 
   1568 	rs = &raid_softc[unit];
   1569 
   1570 	/* XXX should check return code first... */
   1571 	rs->sc_flags |= RAIDF_INITED;
   1572 
   1573 	sprintf(rs->sc_xname, "raid%d", unit);	/* XXX doesn't check bounds. */
   1574 
   1575 	rs->sc_dkdev.dk_name = rs->sc_xname;
   1576 
   1577 	/* disk_attach actually creates space for the CPU disklabel, among
   1578 	 * other things, so it's critical to call this *BEFORE* we try putzing
   1579 	 * with disklabels. */
   1580 
   1581 	disk_attach(&rs->sc_dkdev);
   1582 
   1583 	/* XXX There may be a weird interaction here between this, and
   1584 	 * protectedSectors, as used in RAIDframe.  */
   1585 
   1586 	rs->sc_size = raidPtr->totalSectors;
   1587 
   1588 }
   1589 
   1590 /* wake up the daemon & tell it to get us a spare table
   1591  * XXX
   1592  * the entries in the queues should be tagged with the raidPtr
   1593  * so that in the extremely rare case that two recons happen at once,
   1594  * we know for which device were requesting a spare table
   1595  * XXX
   1596  *
   1597  * XXX This code is not currently used. GO
   1598  */
   1599 int
   1600 rf_GetSpareTableFromDaemon(req)
   1601 	RF_SparetWait_t *req;
   1602 {
   1603 	int     retcode;
   1604 
   1605 	RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1606 	req->next = rf_sparet_wait_queue;
   1607 	rf_sparet_wait_queue = req;
   1608 	wakeup(&rf_sparet_wait_queue);
   1609 
   1610 	/* mpsleep unlocks the mutex */
   1611 	while (!rf_sparet_resp_queue) {
   1612 		tsleep(&rf_sparet_resp_queue, PRIBIO,
   1613 		    "raidframe getsparetable", 0);
   1614 	}
   1615 	req = rf_sparet_resp_queue;
   1616 	rf_sparet_resp_queue = req->next;
   1617 	RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1618 
   1619 	retcode = req->fcol;
   1620 	RF_Free(req, sizeof(*req));	/* this is not the same req as we
   1621 					 * alloc'd */
   1622 	return (retcode);
   1623 }
   1624 
   1625 /* a wrapper around rf_DoAccess that extracts appropriate info from the
   1626  * bp & passes it down.
   1627  * any calls originating in the kernel must use non-blocking I/O
   1628  * do some extra sanity checking to return "appropriate" error values for
   1629  * certain conditions (to make some standard utilities work)
   1630  *
   1631  * Formerly known as: rf_DoAccessKernel
   1632  */
   1633 void
   1634 raidstart(raidPtr)
   1635 	RF_Raid_t *raidPtr;
   1636 {
   1637 	RF_SectorCount_t num_blocks, pb, sum;
   1638 	RF_RaidAddr_t raid_addr;
   1639 	int     retcode;
   1640 	struct partition *pp;
   1641 	daddr_t blocknum;
   1642 	int     unit;
   1643 	struct raid_softc *rs;
   1644 	int     do_async;
   1645 	struct buf *bp;
   1646 
   1647 	unit = raidPtr->raidid;
   1648 	rs = &raid_softc[unit];
   1649 
   1650 	/* quick check to see if anything has died recently */
   1651 	RF_LOCK_MUTEX(raidPtr->mutex);
   1652 	if (raidPtr->numNewFailures > 0) {
   1653 		rf_update_component_labels(raidPtr,
   1654 					   RF_NORMAL_COMPONENT_UPDATE);
   1655 		raidPtr->numNewFailures--;
   1656 	}
   1657 
   1658 	/* Check to see if we're at the limit... */
   1659 	while (raidPtr->openings > 0) {
   1660 		RF_UNLOCK_MUTEX(raidPtr->mutex);
   1661 
   1662 		/* get the next item, if any, from the queue */
   1663 		if ((bp = BUFQ_GET(&rs->buf_queue)) == NULL) {
   1664 			/* nothing more to do */
   1665 			return;
   1666 		}
   1667 
   1668 		/* Ok, for the bp we have here, bp->b_blkno is relative to the
   1669 		 * partition.. Need to make it absolute to the underlying
   1670 		 * device.. */
   1671 
   1672 		blocknum = bp->b_blkno;
   1673 		if (DISKPART(bp->b_dev) != RAW_PART) {
   1674 			pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
   1675 			blocknum += pp->p_offset;
   1676 		}
   1677 
   1678 		db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
   1679 			    (int) blocknum));
   1680 
   1681 		db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
   1682 		db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
   1683 
   1684 		/* *THIS* is where we adjust what block we're going to...
   1685 		 * but DO NOT TOUCH bp->b_blkno!!! */
   1686 		raid_addr = blocknum;
   1687 
   1688 		num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
   1689 		pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
   1690 		sum = raid_addr + num_blocks + pb;
   1691 		if (1 || rf_debugKernelAccess) {
   1692 			db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
   1693 				    (int) raid_addr, (int) sum, (int) num_blocks,
   1694 				    (int) pb, (int) bp->b_resid));
   1695 		}
   1696 		if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
   1697 		    || (sum < num_blocks) || (sum < pb)) {
   1698 			bp->b_error = ENOSPC;
   1699 			bp->b_flags |= B_ERROR;
   1700 			bp->b_resid = bp->b_bcount;
   1701 			biodone(bp);
   1702 			RF_LOCK_MUTEX(raidPtr->mutex);
   1703 			continue;
   1704 		}
   1705 		/*
   1706 		 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
   1707 		 */
   1708 
   1709 		if (bp->b_bcount & raidPtr->sectorMask) {
   1710 			bp->b_error = EINVAL;
   1711 			bp->b_flags |= B_ERROR;
   1712 			bp->b_resid = bp->b_bcount;
   1713 			biodone(bp);
   1714 			RF_LOCK_MUTEX(raidPtr->mutex);
   1715 			continue;
   1716 
   1717 		}
   1718 		db1_printf(("Calling DoAccess..\n"));
   1719 
   1720 
   1721 		RF_LOCK_MUTEX(raidPtr->mutex);
   1722 		raidPtr->openings--;
   1723 		RF_UNLOCK_MUTEX(raidPtr->mutex);
   1724 
   1725 		/*
   1726 		 * Everything is async.
   1727 		 */
   1728 		do_async = 1;
   1729 
   1730 		disk_busy(&rs->sc_dkdev);
   1731 
   1732 		/* XXX we're still at splbio() here... do we *really*
   1733 		   need to be? */
   1734 
   1735 		/* don't ever condition on bp->b_flags & B_WRITE.
   1736 		 * always condition on B_READ instead */
   1737 
   1738 		retcode = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
   1739 				      RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
   1740 				      do_async, raid_addr, num_blocks,
   1741 				      bp->b_data, bp, RF_DAG_NONBLOCKING_IO);
   1742 
   1743 		RF_LOCK_MUTEX(raidPtr->mutex);
   1744 	}
   1745 	RF_UNLOCK_MUTEX(raidPtr->mutex);
   1746 }
   1747 
   1748 
   1749 
   1750 
   1751 /* invoke an I/O from kernel mode.  Disk queue should be locked upon entry */
   1752 
   1753 int
   1754 rf_DispatchKernelIO(queue, req)
   1755 	RF_DiskQueue_t *queue;
   1756 	RF_DiskQueueData_t *req;
   1757 {
   1758 	int     op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
   1759 	struct buf *bp;
   1760 	struct raidbuf *raidbp = NULL;
   1761 
   1762 	req->queue = queue;
   1763 
   1764 #if DIAGNOSTIC
   1765 	if (queue->raidPtr->raidid >= numraid) {
   1766 		printf("Invalid unit number: %d %d\n", queue->raidPtr->raidid,
   1767 		    numraid);
   1768 		panic("Invalid Unit number in rf_DispatchKernelIO");
   1769 	}
   1770 #endif
   1771 
   1772 	bp = req->bp;
   1773 #if 1
   1774 	/* XXX when there is a physical disk failure, someone is passing us a
   1775 	 * buffer that contains old stuff!!  Attempt to deal with this problem
   1776 	 * without taking a performance hit... (not sure where the real bug
   1777 	 * is.  It's buried in RAIDframe somewhere) :-(  GO ) */
   1778 
   1779 	if (bp->b_flags & B_ERROR) {
   1780 		bp->b_flags &= ~B_ERROR;
   1781 	}
   1782 	if (bp->b_error != 0) {
   1783 		bp->b_error = 0;
   1784 	}
   1785 #endif
   1786 	raidbp = pool_get(&raidframe_cbufpool, PR_NOWAIT);
   1787 
   1788 	/*
   1789 	 * context for raidiodone
   1790 	 */
   1791 	raidbp->rf_obp = bp;
   1792 	raidbp->req = req;
   1793 
   1794 	LIST_INIT(&raidbp->rf_buf.b_dep);
   1795 
   1796 	switch (req->type) {
   1797 	case RF_IO_TYPE_NOP:	/* used primarily to unlock a locked queue */
   1798 		/* XXX need to do something extra here.. */
   1799 		/* I'm leaving this in, as I've never actually seen it used,
   1800 		 * and I'd like folks to report it... GO */
   1801 		printf(("WAKEUP CALLED\n"));
   1802 		queue->numOutstanding++;
   1803 
   1804 		/* XXX need to glue the original buffer into this??  */
   1805 
   1806 		KernelWakeupFunc(&raidbp->rf_buf);
   1807 		break;
   1808 
   1809 	case RF_IO_TYPE_READ:
   1810 	case RF_IO_TYPE_WRITE:
   1811 
   1812 		if (req->tracerec) {
   1813 			RF_ETIMER_START(req->tracerec->timer);
   1814 		}
   1815 		InitBP(&raidbp->rf_buf, queue->rf_cinfo->ci_vp,
   1816 		    op | bp->b_flags, queue->rf_cinfo->ci_dev,
   1817 		    req->sectorOffset, req->numSector,
   1818 		    req->buf, KernelWakeupFunc, (void *) req,
   1819 		    queue->raidPtr->logBytesPerSector, req->b_proc);
   1820 
   1821 		if (rf_debugKernelAccess) {
   1822 			db1_printf(("dispatch: bp->b_blkno = %ld\n",
   1823 				(long) bp->b_blkno));
   1824 		}
   1825 		queue->numOutstanding++;
   1826 		queue->last_deq_sector = req->sectorOffset;
   1827 		/* acc wouldn't have been let in if there were any pending
   1828 		 * reqs at any other priority */
   1829 		queue->curPriority = req->priority;
   1830 
   1831 		db1_printf(("Going for %c to unit %d row %d col %d\n",
   1832 			    req->type, queue->raidPtr->raidid,
   1833 			    queue->row, queue->col));
   1834 		db1_printf(("sector %d count %d (%d bytes) %d\n",
   1835 			(int) req->sectorOffset, (int) req->numSector,
   1836 			(int) (req->numSector <<
   1837 			    queue->raidPtr->logBytesPerSector),
   1838 			(int) queue->raidPtr->logBytesPerSector));
   1839 		if ((raidbp->rf_buf.b_flags & B_READ) == 0) {
   1840 			raidbp->rf_buf.b_vp->v_numoutput++;
   1841 		}
   1842 		VOP_STRATEGY(&raidbp->rf_buf);
   1843 
   1844 		break;
   1845 
   1846 	default:
   1847 		panic("bad req->type in rf_DispatchKernelIO");
   1848 	}
   1849 	db1_printf(("Exiting from DispatchKernelIO\n"));
   1850 
   1851 	return (0);
   1852 }
   1853 /* this is the callback function associated with a I/O invoked from
   1854    kernel code.
   1855  */
   1856 static void
   1857 KernelWakeupFunc(vbp)
   1858 	struct buf *vbp;
   1859 {
   1860 	RF_DiskQueueData_t *req = NULL;
   1861 	RF_DiskQueue_t *queue;
   1862 	struct raidbuf *raidbp = (struct raidbuf *) vbp;
   1863 	struct buf *bp;
   1864 	int s;
   1865 
   1866 	s = splbio();
   1867 	db1_printf(("recovering the request queue:\n"));
   1868 	req = raidbp->req;
   1869 
   1870 	bp = raidbp->rf_obp;
   1871 
   1872 	queue = (RF_DiskQueue_t *) req->queue;
   1873 
   1874 	if (raidbp->rf_buf.b_flags & B_ERROR) {
   1875 		bp->b_flags |= B_ERROR;
   1876 		bp->b_error = raidbp->rf_buf.b_error ?
   1877 		    raidbp->rf_buf.b_error : EIO;
   1878 	}
   1879 
   1880 	/* XXX methinks this could be wrong... */
   1881 #if 1
   1882 	bp->b_resid = raidbp->rf_buf.b_resid;
   1883 #endif
   1884 
   1885 	if (req->tracerec) {
   1886 		RF_ETIMER_STOP(req->tracerec->timer);
   1887 		RF_ETIMER_EVAL(req->tracerec->timer);
   1888 		RF_LOCK_MUTEX(rf_tracing_mutex);
   1889 		req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
   1890 		req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
   1891 		req->tracerec->num_phys_ios++;
   1892 		RF_UNLOCK_MUTEX(rf_tracing_mutex);
   1893 	}
   1894 	bp->b_bcount = raidbp->rf_buf.b_bcount;	/* XXXX ?? */
   1895 
   1896 	/* XXX Ok, let's get aggressive... If B_ERROR is set, let's go
   1897 	 * ballistic, and mark the component as hosed... */
   1898 
   1899 	if (bp->b_flags & B_ERROR) {
   1900 		/* Mark the disk as dead */
   1901 		/* but only mark it once... */
   1902 		if (queue->raidPtr->Disks[queue->row][queue->col].status ==
   1903 		    rf_ds_optimal) {
   1904 			printf("raid%d: IO Error.  Marking %s as failed.\n",
   1905 			       queue->raidPtr->raidid,
   1906 			       queue->raidPtr->Disks[queue->row][queue->col].devname);
   1907 			queue->raidPtr->Disks[queue->row][queue->col].status =
   1908 			    rf_ds_failed;
   1909 			queue->raidPtr->status[queue->row] = rf_rs_degraded;
   1910 			queue->raidPtr->numFailures++;
   1911 			queue->raidPtr->numNewFailures++;
   1912 		} else {	/* Disk is already dead... */
   1913 			/* printf("Disk already marked as dead!\n"); */
   1914 		}
   1915 
   1916 	}
   1917 
   1918 	pool_put(&raidframe_cbufpool, raidbp);
   1919 
   1920 	/* Fill in the error value */
   1921 
   1922 	req->error = (bp->b_flags & B_ERROR) ? bp->b_error : 0;
   1923 
   1924 	simple_lock(&queue->raidPtr->iodone_lock);
   1925 
   1926 	/* Drop this one on the "finished" queue... */
   1927 	TAILQ_INSERT_TAIL(&(queue->raidPtr->iodone), req, iodone_entries);
   1928 
   1929 	/* Let the raidio thread know there is work to be done. */
   1930 	wakeup(&(queue->raidPtr->iodone));
   1931 
   1932 	simple_unlock(&queue->raidPtr->iodone_lock);
   1933 
   1934 	splx(s);
   1935 }
   1936 
   1937 
   1938 
   1939 /*
   1940  * initialize a buf structure for doing an I/O in the kernel.
   1941  */
   1942 static void
   1943 InitBP(bp, b_vp, rw_flag, dev, startSect, numSect, buf, cbFunc, cbArg,
   1944        logBytesPerSector, b_proc)
   1945 	struct buf *bp;
   1946 	struct vnode *b_vp;
   1947 	unsigned rw_flag;
   1948 	dev_t dev;
   1949 	RF_SectorNum_t startSect;
   1950 	RF_SectorCount_t numSect;
   1951 	caddr_t buf;
   1952 	void (*cbFunc) (struct buf *);
   1953 	void *cbArg;
   1954 	int logBytesPerSector;
   1955 	struct proc *b_proc;
   1956 {
   1957 	/* bp->b_flags       = B_PHYS | rw_flag; */
   1958 	bp->b_flags = B_CALL | rw_flag;	/* XXX need B_PHYS here too??? */
   1959 	bp->b_bcount = numSect << logBytesPerSector;
   1960 	bp->b_bufsize = bp->b_bcount;
   1961 	bp->b_error = 0;
   1962 	bp->b_dev = dev;
   1963 	bp->b_data = buf;
   1964 	bp->b_blkno = startSect;
   1965 	bp->b_resid = bp->b_bcount;	/* XXX is this right!??!?!! */
   1966 	if (bp->b_bcount == 0) {
   1967 		panic("bp->b_bcount is zero in InitBP!!");
   1968 	}
   1969 	bp->b_proc = b_proc;
   1970 	bp->b_iodone = cbFunc;
   1971 	bp->b_vp = b_vp;
   1972 
   1973 }
   1974 
   1975 static void
   1976 raidgetdefaultlabel(raidPtr, rs, lp)
   1977 	RF_Raid_t *raidPtr;
   1978 	struct raid_softc *rs;
   1979 	struct disklabel *lp;
   1980 {
   1981 	memset(lp, 0, sizeof(*lp));
   1982 
   1983 	/* fabricate a label... */
   1984 	lp->d_secperunit = raidPtr->totalSectors;
   1985 	lp->d_secsize = raidPtr->bytesPerSector;
   1986 	lp->d_nsectors = raidPtr->Layout.dataSectorsPerStripe;
   1987 	lp->d_ntracks = 4 * raidPtr->numCol;
   1988 	lp->d_ncylinders = raidPtr->totalSectors /
   1989 		(lp->d_nsectors * lp->d_ntracks);
   1990 	lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
   1991 
   1992 	strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
   1993 	lp->d_type = DTYPE_RAID;
   1994 	strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
   1995 	lp->d_rpm = 3600;
   1996 	lp->d_interleave = 1;
   1997 	lp->d_flags = 0;
   1998 
   1999 	lp->d_partitions[RAW_PART].p_offset = 0;
   2000 	lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors;
   2001 	lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
   2002 	lp->d_npartitions = RAW_PART + 1;
   2003 
   2004 	lp->d_magic = DISKMAGIC;
   2005 	lp->d_magic2 = DISKMAGIC;
   2006 	lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
   2007 
   2008 }
   2009 /*
   2010  * Read the disklabel from the raid device.  If one is not present, fake one
   2011  * up.
   2012  */
   2013 static void
   2014 raidgetdisklabel(dev)
   2015 	dev_t   dev;
   2016 {
   2017 	int     unit = raidunit(dev);
   2018 	struct raid_softc *rs = &raid_softc[unit];
   2019 	char   *errstring;
   2020 	struct disklabel *lp = rs->sc_dkdev.dk_label;
   2021 	struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel;
   2022 	RF_Raid_t *raidPtr;
   2023 
   2024 	db1_printf(("Getting the disklabel...\n"));
   2025 
   2026 	memset(clp, 0, sizeof(*clp));
   2027 
   2028 	raidPtr = raidPtrs[unit];
   2029 
   2030 	raidgetdefaultlabel(raidPtr, rs, lp);
   2031 
   2032 	/*
   2033 	 * Call the generic disklabel extraction routine.
   2034 	 */
   2035 	errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
   2036 	    rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
   2037 	if (errstring)
   2038 		raidmakedisklabel(rs);
   2039 	else {
   2040 		int     i;
   2041 		struct partition *pp;
   2042 
   2043 		/*
   2044 		 * Sanity check whether the found disklabel is valid.
   2045 		 *
   2046 		 * This is necessary since total size of the raid device
   2047 		 * may vary when an interleave is changed even though exactly
   2048 		 * same componets are used, and old disklabel may used
   2049 		 * if that is found.
   2050 		 */
   2051 		if (lp->d_secperunit != rs->sc_size)
   2052 			printf("raid%d: WARNING: %s: "
   2053 			    "total sector size in disklabel (%d) != "
   2054 			    "the size of raid (%ld)\n", unit, rs->sc_xname,
   2055 			    lp->d_secperunit, (long) rs->sc_size);
   2056 		for (i = 0; i < lp->d_npartitions; i++) {
   2057 			pp = &lp->d_partitions[i];
   2058 			if (pp->p_offset + pp->p_size > rs->sc_size)
   2059 				printf("raid%d: WARNING: %s: end of partition `%c' "
   2060 				       "exceeds the size of raid (%ld)\n",
   2061 				       unit, rs->sc_xname, 'a' + i, (long) rs->sc_size);
   2062 		}
   2063 	}
   2064 
   2065 }
   2066 /*
   2067  * Take care of things one might want to take care of in the event
   2068  * that a disklabel isn't present.
   2069  */
   2070 static void
   2071 raidmakedisklabel(rs)
   2072 	struct raid_softc *rs;
   2073 {
   2074 	struct disklabel *lp = rs->sc_dkdev.dk_label;
   2075 	db1_printf(("Making a label..\n"));
   2076 
   2077 	/*
   2078 	 * For historical reasons, if there's no disklabel present
   2079 	 * the raw partition must be marked FS_BSDFFS.
   2080 	 */
   2081 
   2082 	lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
   2083 
   2084 	strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
   2085 
   2086 	lp->d_checksum = dkcksum(lp);
   2087 }
   2088 /*
   2089  * Lookup the provided name in the filesystem.  If the file exists,
   2090  * is a valid block device, and isn't being used by anyone else,
   2091  * set *vpp to the file's vnode.
   2092  * You'll find the original of this in ccd.c
   2093  */
   2094 int
   2095 raidlookup(path, p, vpp)
   2096 	char   *path;
   2097 	struct proc *p;
   2098 	struct vnode **vpp;	/* result */
   2099 {
   2100 	struct nameidata nd;
   2101 	struct vnode *vp;
   2102 	struct vattr va;
   2103 	int     error;
   2104 
   2105 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, path, p);
   2106 	if ((error = vn_open(&nd, FREAD | FWRITE, 0)) != 0) {
   2107 		return (error);
   2108 	}
   2109 	vp = nd.ni_vp;
   2110 	if (vp->v_usecount > 1) {
   2111 		VOP_UNLOCK(vp, 0);
   2112 		(void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
   2113 		return (EBUSY);
   2114 	}
   2115 	if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)) != 0) {
   2116 		VOP_UNLOCK(vp, 0);
   2117 		(void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
   2118 		return (error);
   2119 	}
   2120 	/* XXX: eventually we should handle VREG, too. */
   2121 	if (va.va_type != VBLK) {
   2122 		VOP_UNLOCK(vp, 0);
   2123 		(void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
   2124 		return (ENOTBLK);
   2125 	}
   2126 	VOP_UNLOCK(vp, 0);
   2127 	*vpp = vp;
   2128 	return (0);
   2129 }
   2130 /*
   2131  * Wait interruptibly for an exclusive lock.
   2132  *
   2133  * XXX
   2134  * Several drivers do this; it should be abstracted and made MP-safe.
   2135  * (Hmm... where have we seen this warning before :->  GO )
   2136  */
   2137 static int
   2138 raidlock(rs)
   2139 	struct raid_softc *rs;
   2140 {
   2141 	int     error;
   2142 
   2143 	while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
   2144 		rs->sc_flags |= RAIDF_WANTED;
   2145 		if ((error =
   2146 			tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
   2147 			return (error);
   2148 	}
   2149 	rs->sc_flags |= RAIDF_LOCKED;
   2150 	return (0);
   2151 }
   2152 /*
   2153  * Unlock and wake up any waiters.
   2154  */
   2155 static void
   2156 raidunlock(rs)
   2157 	struct raid_softc *rs;
   2158 {
   2159 
   2160 	rs->sc_flags &= ~RAIDF_LOCKED;
   2161 	if ((rs->sc_flags & RAIDF_WANTED) != 0) {
   2162 		rs->sc_flags &= ~RAIDF_WANTED;
   2163 		wakeup(rs);
   2164 	}
   2165 }
   2166 
   2167 
   2168 #define RF_COMPONENT_INFO_OFFSET  16384 /* bytes */
   2169 #define RF_COMPONENT_INFO_SIZE     1024 /* bytes */
   2170 
   2171 int
   2172 raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter)
   2173 {
   2174 	RF_ComponentLabel_t clabel;
   2175 	raidread_component_label(dev, b_vp, &clabel);
   2176 	clabel.mod_counter = mod_counter;
   2177 	clabel.clean = RF_RAID_CLEAN;
   2178 	raidwrite_component_label(dev, b_vp, &clabel);
   2179 	return(0);
   2180 }
   2181 
   2182 
   2183 int
   2184 raidmarkdirty(dev_t dev, struct vnode *b_vp, int mod_counter)
   2185 {
   2186 	RF_ComponentLabel_t clabel;
   2187 	raidread_component_label(dev, b_vp, &clabel);
   2188 	clabel.mod_counter = mod_counter;
   2189 	clabel.clean = RF_RAID_DIRTY;
   2190 	raidwrite_component_label(dev, b_vp, &clabel);
   2191 	return(0);
   2192 }
   2193 
   2194 /* ARGSUSED */
   2195 int
   2196 raidread_component_label(dev, b_vp, clabel)
   2197 	dev_t dev;
   2198 	struct vnode *b_vp;
   2199 	RF_ComponentLabel_t *clabel;
   2200 {
   2201 	struct buf *bp;
   2202 	const struct bdevsw *bdev;
   2203 	int error;
   2204 
   2205 	/* XXX should probably ensure that we don't try to do this if
   2206 	   someone has changed rf_protected_sectors. */
   2207 
   2208 	if (b_vp == NULL) {
   2209 		/* For whatever reason, this component is not valid.
   2210 		   Don't try to read a component label from it. */
   2211 		return(EINVAL);
   2212 	}
   2213 
   2214 	/* get a block of the appropriate size... */
   2215 	bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
   2216 	bp->b_dev = dev;
   2217 
   2218 	/* get our ducks in a row for the read */
   2219 	bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
   2220 	bp->b_bcount = RF_COMPONENT_INFO_SIZE;
   2221 	bp->b_flags |= B_READ;
   2222  	bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
   2223 
   2224 	bdev = bdevsw_lookup(bp->b_dev);
   2225 	if (bdev == NULL)
   2226 		return (ENXIO);
   2227 	(*bdev->d_strategy)(bp);
   2228 
   2229 	error = biowait(bp);
   2230 
   2231 	if (!error) {
   2232 		memcpy(clabel, bp->b_data,
   2233 		       sizeof(RF_ComponentLabel_t));
   2234         }
   2235 
   2236 	brelse(bp);
   2237 	return(error);
   2238 }
   2239 /* ARGSUSED */
   2240 int
   2241 raidwrite_component_label(dev, b_vp, clabel)
   2242 	dev_t dev;
   2243 	struct vnode *b_vp;
   2244 	RF_ComponentLabel_t *clabel;
   2245 {
   2246 	struct buf *bp;
   2247 	const struct bdevsw *bdev;
   2248 	int error;
   2249 
   2250 	/* get a block of the appropriate size... */
   2251 	bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
   2252 	bp->b_dev = dev;
   2253 
   2254 	/* get our ducks in a row for the write */
   2255 	bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
   2256 	bp->b_bcount = RF_COMPONENT_INFO_SIZE;
   2257 	bp->b_flags |= B_WRITE;
   2258  	bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
   2259 
   2260 	memset(bp->b_data, 0, RF_COMPONENT_INFO_SIZE );
   2261 
   2262 	memcpy(bp->b_data, clabel, sizeof(RF_ComponentLabel_t));
   2263 
   2264 	bdev = bdevsw_lookup(bp->b_dev);
   2265 	if (bdev == NULL)
   2266 		return (ENXIO);
   2267 	(*bdev->d_strategy)(bp);
   2268 	error = biowait(bp);
   2269 	brelse(bp);
   2270 	if (error) {
   2271 #if 1
   2272 		printf("Failed to write RAID component info!\n");
   2273 #endif
   2274 	}
   2275 
   2276 	return(error);
   2277 }
   2278 
   2279 void
   2280 rf_markalldirty(raidPtr)
   2281 	RF_Raid_t *raidPtr;
   2282 {
   2283 	RF_ComponentLabel_t clabel;
   2284 	int sparecol;
   2285 	int r,c;
   2286 	int i,j;
   2287 	int srow, scol;
   2288 
   2289 	raidPtr->mod_counter++;
   2290 	for (r = 0; r < raidPtr->numRow; r++) {
   2291 		for (c = 0; c < raidPtr->numCol; c++) {
   2292 			/* we don't want to touch (at all) a disk that has
   2293 			   failed */
   2294 			if (!RF_DEAD_DISK(raidPtr->Disks[r][c].status)) {
   2295 				raidread_component_label(
   2296 					raidPtr->Disks[r][c].dev,
   2297 					raidPtr->raid_cinfo[r][c].ci_vp,
   2298 					&clabel);
   2299 				if (clabel.status == rf_ds_spared) {
   2300 					/* XXX do something special...
   2301 					 but whatever you do, don't
   2302 					 try to access it!! */
   2303 				} else {
   2304 					raidmarkdirty(
   2305 					      raidPtr->Disks[r][c].dev,
   2306 					      raidPtr->raid_cinfo[r][c].ci_vp,
   2307 					      raidPtr->mod_counter);
   2308 				}
   2309 			}
   2310 		}
   2311 	}
   2312 
   2313 	for( c = 0; c < raidPtr->numSpare ; c++) {
   2314 		sparecol = raidPtr->numCol + c;
   2315 		if (raidPtr->Disks[0][sparecol].status == rf_ds_used_spare) {
   2316 			/*
   2317 
   2318 			   we claim this disk is "optimal" if it's
   2319 			   rf_ds_used_spare, as that means it should be
   2320 			   directly substitutable for the disk it replaced.
   2321 			   We note that too...
   2322 
   2323 			 */
   2324 
   2325 			for(i=0;i<raidPtr->numRow;i++) {
   2326 				for(j=0;j<raidPtr->numCol;j++) {
   2327 					if ((raidPtr->Disks[i][j].spareRow ==
   2328 					     0) &&
   2329 					    (raidPtr->Disks[i][j].spareCol ==
   2330 					     sparecol)) {
   2331 						srow = i;
   2332 						scol = j;
   2333 						break;
   2334 					}
   2335 				}
   2336 			}
   2337 
   2338 			raidread_component_label(
   2339 				 raidPtr->Disks[0][sparecol].dev,
   2340 				 raidPtr->raid_cinfo[0][sparecol].ci_vp,
   2341 				 &clabel);
   2342 			/* make sure status is noted */
   2343 
   2344 			raid_init_component_label(raidPtr, &clabel);
   2345 
   2346 			clabel.row = srow;
   2347 			clabel.column = scol;
   2348 			/* Note: we *don't* change status from rf_ds_used_spare
   2349 			   to rf_ds_optimal */
   2350 			/* clabel.status = rf_ds_optimal; */
   2351 
   2352 			raidmarkdirty(raidPtr->Disks[0][sparecol].dev,
   2353 				      raidPtr->raid_cinfo[0][sparecol].ci_vp,
   2354 				      raidPtr->mod_counter);
   2355 		}
   2356 	}
   2357 }
   2358 
   2359 
   2360 void
   2361 rf_update_component_labels(raidPtr, final)
   2362 	RF_Raid_t *raidPtr;
   2363 	int final;
   2364 {
   2365 	RF_ComponentLabel_t clabel;
   2366 	int sparecol;
   2367 	int r,c;
   2368 	int i,j;
   2369 	int srow, scol;
   2370 
   2371 	srow = -1;
   2372 	scol = -1;
   2373 
   2374 	/* XXX should do extra checks to make sure things really are clean,
   2375 	   rather than blindly setting the clean bit... */
   2376 
   2377 	raidPtr->mod_counter++;
   2378 
   2379 	for (r = 0; r < raidPtr->numRow; r++) {
   2380 		for (c = 0; c < raidPtr->numCol; c++) {
   2381 			if (raidPtr->Disks[r][c].status == rf_ds_optimal) {
   2382 				raidread_component_label(
   2383 					raidPtr->Disks[r][c].dev,
   2384 					raidPtr->raid_cinfo[r][c].ci_vp,
   2385 					&clabel);
   2386 				/* make sure status is noted */
   2387 				clabel.status = rf_ds_optimal;
   2388 				/* bump the counter */
   2389 				clabel.mod_counter = raidPtr->mod_counter;
   2390 
   2391 				raidwrite_component_label(
   2392 					raidPtr->Disks[r][c].dev,
   2393 					raidPtr->raid_cinfo[r][c].ci_vp,
   2394 					&clabel);
   2395 				if (final == RF_FINAL_COMPONENT_UPDATE) {
   2396 					if (raidPtr->parity_good == RF_RAID_CLEAN) {
   2397 						raidmarkclean(
   2398 							      raidPtr->Disks[r][c].dev,
   2399 							      raidPtr->raid_cinfo[r][c].ci_vp,
   2400 							      raidPtr->mod_counter);
   2401 					}
   2402 				}
   2403 			}
   2404 			/* else we don't touch it.. */
   2405 		}
   2406 	}
   2407 
   2408 	for( c = 0; c < raidPtr->numSpare ; c++) {
   2409 		sparecol = raidPtr->numCol + c;
   2410 		/* Need to ensure that the reconstruct actually completed! */
   2411 		if (raidPtr->Disks[0][sparecol].status == rf_ds_used_spare) {
   2412 			/*
   2413 
   2414 			   we claim this disk is "optimal" if it's
   2415 			   rf_ds_used_spare, as that means it should be
   2416 			   directly substitutable for the disk it replaced.
   2417 			   We note that too...
   2418 
   2419 			 */
   2420 
   2421 			for(i=0;i<raidPtr->numRow;i++) {
   2422 				for(j=0;j<raidPtr->numCol;j++) {
   2423 					if ((raidPtr->Disks[i][j].spareRow ==
   2424 					     0) &&
   2425 					    (raidPtr->Disks[i][j].spareCol ==
   2426 					     sparecol)) {
   2427 						srow = i;
   2428 						scol = j;
   2429 						break;
   2430 					}
   2431 				}
   2432 			}
   2433 
   2434 			/* XXX shouldn't *really* need this... */
   2435 			raidread_component_label(
   2436 				      raidPtr->Disks[0][sparecol].dev,
   2437 				      raidPtr->raid_cinfo[0][sparecol].ci_vp,
   2438 				      &clabel);
   2439 			/* make sure status is noted */
   2440 
   2441 			raid_init_component_label(raidPtr, &clabel);
   2442 
   2443 			clabel.mod_counter = raidPtr->mod_counter;
   2444 			clabel.row = srow;
   2445 			clabel.column = scol;
   2446 			clabel.status = rf_ds_optimal;
   2447 
   2448 			raidwrite_component_label(
   2449 				      raidPtr->Disks[0][sparecol].dev,
   2450 				      raidPtr->raid_cinfo[0][sparecol].ci_vp,
   2451 				      &clabel);
   2452 			if (final == RF_FINAL_COMPONENT_UPDATE) {
   2453 				if (raidPtr->parity_good == RF_RAID_CLEAN) {
   2454 					raidmarkclean( raidPtr->Disks[0][sparecol].dev,
   2455 						       raidPtr->raid_cinfo[0][sparecol].ci_vp,
   2456 						       raidPtr->mod_counter);
   2457 				}
   2458 			}
   2459 		}
   2460 	}
   2461 }
   2462 
   2463 void
   2464 rf_close_component(raidPtr, vp, auto_configured)
   2465 	RF_Raid_t *raidPtr;
   2466 	struct vnode *vp;
   2467 	int auto_configured;
   2468 {
   2469 	struct proc *p;
   2470 
   2471 	p = raidPtr->engine_thread;
   2472 
   2473 	if (vp != NULL) {
   2474 		if (auto_configured == 1) {
   2475 			vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
   2476 			VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
   2477 			vput(vp);
   2478 
   2479 		} else {
   2480 			(void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
   2481 		}
   2482 	}
   2483 }
   2484 
   2485 
   2486 void
   2487 rf_UnconfigureVnodes(raidPtr)
   2488 	RF_Raid_t *raidPtr;
   2489 {
   2490 	int r,c;
   2491 	struct vnode *vp;
   2492 	int acd;
   2493 
   2494 
   2495 	/* We take this opportunity to close the vnodes like we should.. */
   2496 
   2497 	for (r = 0; r < raidPtr->numRow; r++) {
   2498 		for (c = 0; c < raidPtr->numCol; c++) {
   2499 			vp = raidPtr->raid_cinfo[r][c].ci_vp;
   2500 			acd = raidPtr->Disks[r][c].auto_configured;
   2501 			rf_close_component(raidPtr, vp, acd);
   2502 			raidPtr->raid_cinfo[r][c].ci_vp = NULL;
   2503 			raidPtr->Disks[r][c].auto_configured = 0;
   2504 		}
   2505 	}
   2506 	for (r = 0; r < raidPtr->numSpare; r++) {
   2507 		vp = raidPtr->raid_cinfo[0][raidPtr->numCol + r].ci_vp;
   2508 		acd = raidPtr->Disks[0][raidPtr->numCol + r].auto_configured;
   2509 		rf_close_component(raidPtr, vp, acd);
   2510 		raidPtr->raid_cinfo[0][raidPtr->numCol + r].ci_vp = NULL;
   2511 		raidPtr->Disks[0][raidPtr->numCol + r].auto_configured = 0;
   2512 	}
   2513 }
   2514 
   2515 
   2516 void
   2517 rf_ReconThread(req)
   2518 	struct rf_recon_req *req;
   2519 {
   2520 	int     s;
   2521 	RF_Raid_t *raidPtr;
   2522 
   2523 	s = splbio();
   2524 	raidPtr = (RF_Raid_t *) req->raidPtr;
   2525 	raidPtr->recon_in_progress = 1;
   2526 
   2527 	rf_FailDisk((RF_Raid_t *) req->raidPtr, req->row, req->col,
   2528 		    ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
   2529 
   2530 	/* XXX get rid of this! we don't need it at all.. */
   2531 	RF_Free(req, sizeof(*req));
   2532 
   2533 	raidPtr->recon_in_progress = 0;
   2534 	splx(s);
   2535 
   2536 	/* That's all... */
   2537 	kthread_exit(0);        /* does not return */
   2538 }
   2539 
   2540 void
   2541 rf_RewriteParityThread(raidPtr)
   2542 	RF_Raid_t *raidPtr;
   2543 {
   2544 	int retcode;
   2545 	int s;
   2546 
   2547 	raidPtr->parity_rewrite_in_progress = 1;
   2548 	s = splbio();
   2549 	retcode = rf_RewriteParity(raidPtr);
   2550 	splx(s);
   2551 	if (retcode) {
   2552 		printf("raid%d: Error re-writing parity!\n",raidPtr->raidid);
   2553 	} else {
   2554 		/* set the clean bit!  If we shutdown correctly,
   2555 		   the clean bit on each component label will get
   2556 		   set */
   2557 		raidPtr->parity_good = RF_RAID_CLEAN;
   2558 	}
   2559 	raidPtr->parity_rewrite_in_progress = 0;
   2560 
   2561 	/* Anyone waiting for us to stop?  If so, inform them... */
   2562 	if (raidPtr->waitShutdown) {
   2563 		wakeup(&raidPtr->parity_rewrite_in_progress);
   2564 	}
   2565 
   2566 	/* That's all... */
   2567 	kthread_exit(0);        /* does not return */
   2568 }
   2569 
   2570 
   2571 void
   2572 rf_CopybackThread(raidPtr)
   2573 	RF_Raid_t *raidPtr;
   2574 {
   2575 	int s;
   2576 
   2577 	raidPtr->copyback_in_progress = 1;
   2578 	s = splbio();
   2579 	rf_CopybackReconstructedData(raidPtr);
   2580 	splx(s);
   2581 	raidPtr->copyback_in_progress = 0;
   2582 
   2583 	/* That's all... */
   2584 	kthread_exit(0);        /* does not return */
   2585 }
   2586 
   2587 
   2588 void
   2589 rf_ReconstructInPlaceThread(req)
   2590 	struct rf_recon_req *req;
   2591 {
   2592 	int retcode;
   2593 	int s;
   2594 	RF_Raid_t *raidPtr;
   2595 
   2596 	s = splbio();
   2597 	raidPtr = req->raidPtr;
   2598 	raidPtr->recon_in_progress = 1;
   2599 	retcode = rf_ReconstructInPlace(raidPtr, req->row, req->col);
   2600 	RF_Free(req, sizeof(*req));
   2601 	raidPtr->recon_in_progress = 0;
   2602 	splx(s);
   2603 
   2604 	/* That's all... */
   2605 	kthread_exit(0);        /* does not return */
   2606 }
   2607 
   2608 RF_AutoConfig_t *
   2609 rf_find_raid_components()
   2610 {
   2611 	struct vnode *vp;
   2612 	struct disklabel label;
   2613 	struct device *dv;
   2614 	dev_t dev;
   2615 	int bmajor;
   2616 	int error;
   2617 	int i;
   2618 	int good_one;
   2619 	RF_ComponentLabel_t *clabel;
   2620 	RF_AutoConfig_t *ac_list;
   2621 	RF_AutoConfig_t *ac;
   2622 
   2623 
   2624 	/* initialize the AutoConfig list */
   2625 	ac_list = NULL;
   2626 
   2627 	/* we begin by trolling through *all* the devices on the system */
   2628 
   2629 	for (dv = alldevs.tqh_first; dv != NULL;
   2630 	     dv = dv->dv_list.tqe_next) {
   2631 
   2632 		/* we are only interested in disks... */
   2633 		if (dv->dv_class != DV_DISK)
   2634 			continue;
   2635 
   2636 		/* we don't care about floppies... */
   2637 		if (!strcmp(dv->dv_cfdata->cf_name,"fd")) {
   2638 			continue;
   2639 		}
   2640 
   2641 		/* we don't care about CD's... */
   2642 		if (!strcmp(dv->dv_cfdata->cf_name,"cd")) {
   2643 			continue;
   2644 		}
   2645 
   2646 		/* hdfd is the Atari/Hades floppy driver */
   2647 		if (!strcmp(dv->dv_cfdata->cf_name,"hdfd")) {
   2648 			continue;
   2649 		}
   2650 		/* fdisa is the Atari/Milan floppy driver */
   2651 		if (!strcmp(dv->dv_cfdata->cf_name,"fdisa")) {
   2652 			continue;
   2653 		}
   2654 
   2655 		/* need to find the device_name_to_block_device_major stuff */
   2656 		bmajor = devsw_name2blk(dv->dv_xname, NULL, 0);
   2657 
   2658 		/* get a vnode for the raw partition of this disk */
   2659 
   2660 		dev = MAKEDISKDEV(bmajor, dv->dv_unit, RAW_PART);
   2661 		if (bdevvp(dev, &vp))
   2662 			panic("RAID can't alloc vnode");
   2663 
   2664 		error = VOP_OPEN(vp, FREAD, NOCRED, 0);
   2665 
   2666 		if (error) {
   2667 			/* "Who cares."  Continue looking
   2668 			   for something that exists*/
   2669 			vput(vp);
   2670 			continue;
   2671 		}
   2672 
   2673 		/* Ok, the disk exists.  Go get the disklabel. */
   2674 		error = VOP_IOCTL(vp, DIOCGDINFO, (caddr_t)&label,
   2675 				  FREAD, NOCRED, 0);
   2676 		if (error) {
   2677 			/*
   2678 			 * XXX can't happen - open() would
   2679 			 * have errored out (or faked up one)
   2680 			 */
   2681 			printf("can't get label for dev %s%c (%d)!?!?\n",
   2682 			       dv->dv_xname, 'a' + RAW_PART, error);
   2683 		}
   2684 
   2685 		/* don't need this any more.  We'll allocate it again
   2686 		   a little later if we really do... */
   2687 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
   2688 		VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
   2689 		vput(vp);
   2690 
   2691 		for (i=0; i < label.d_npartitions; i++) {
   2692 			/* We only support partitions marked as RAID */
   2693 			if (label.d_partitions[i].p_fstype != FS_RAID)
   2694 				continue;
   2695 
   2696 			dev = MAKEDISKDEV(bmajor, dv->dv_unit, i);
   2697 			if (bdevvp(dev, &vp))
   2698 				panic("RAID can't alloc vnode");
   2699 
   2700 			error = VOP_OPEN(vp, FREAD, NOCRED, 0);
   2701 			if (error) {
   2702 				/* Whatever... */
   2703 				vput(vp);
   2704 				continue;
   2705 			}
   2706 
   2707 			good_one = 0;
   2708 
   2709 			clabel = (RF_ComponentLabel_t *)
   2710 				malloc(sizeof(RF_ComponentLabel_t),
   2711 				       M_RAIDFRAME, M_NOWAIT);
   2712 			if (clabel == NULL) {
   2713 				/* XXX CLEANUP HERE */
   2714 				printf("RAID auto config: out of memory!\n");
   2715 				return(NULL); /* XXX probably should panic? */
   2716 			}
   2717 
   2718 			if (!raidread_component_label(dev, vp, clabel)) {
   2719 				/* Got the label.  Does it look reasonable? */
   2720 				if (rf_reasonable_label(clabel) &&
   2721 				    (clabel->partitionSize <=
   2722 				     label.d_partitions[i].p_size)) {
   2723 #if DEBUG
   2724 					printf("Component on: %s%c: %d\n",
   2725 					       dv->dv_xname, 'a'+i,
   2726 					       label.d_partitions[i].p_size);
   2727 					rf_print_component_label(clabel);
   2728 #endif
   2729 					/* if it's reasonable, add it,
   2730 					   else ignore it. */
   2731 					ac = (RF_AutoConfig_t *)
   2732 						malloc(sizeof(RF_AutoConfig_t),
   2733 						       M_RAIDFRAME,
   2734 						       M_NOWAIT);
   2735 					if (ac == NULL) {
   2736 						/* XXX should panic?? */
   2737 						return(NULL);
   2738 					}
   2739 
   2740 					sprintf(ac->devname, "%s%c",
   2741 						dv->dv_xname, 'a'+i);
   2742 					ac->dev = dev;
   2743 					ac->vp = vp;
   2744 					ac->clabel = clabel;
   2745 					ac->next = ac_list;
   2746 					ac_list = ac;
   2747 					good_one = 1;
   2748 				}
   2749 			}
   2750 			if (!good_one) {
   2751 				/* cleanup */
   2752 				free(clabel, M_RAIDFRAME);
   2753 				vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
   2754 				VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
   2755 				vput(vp);
   2756 			}
   2757 		}
   2758 	}
   2759 	return(ac_list);
   2760 }
   2761 
   2762 static int
   2763 rf_reasonable_label(clabel)
   2764 	RF_ComponentLabel_t *clabel;
   2765 {
   2766 
   2767 	if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) ||
   2768 	     (clabel->version==RF_COMPONENT_LABEL_VERSION)) &&
   2769 	    ((clabel->clean == RF_RAID_CLEAN) ||
   2770 	     (clabel->clean == RF_RAID_DIRTY)) &&
   2771 	    clabel->row >=0 &&
   2772 	    clabel->column >= 0 &&
   2773 	    clabel->num_rows > 0 &&
   2774 	    clabel->num_columns > 0 &&
   2775 	    clabel->row < clabel->num_rows &&
   2776 	    clabel->column < clabel->num_columns &&
   2777 	    clabel->blockSize > 0 &&
   2778 	    clabel->numBlocks > 0) {
   2779 		/* label looks reasonable enough... */
   2780 		return(1);
   2781 	}
   2782 	return(0);
   2783 }
   2784 
   2785 
   2786 #if DEBUG
   2787 void
   2788 rf_print_component_label(clabel)
   2789 	RF_ComponentLabel_t *clabel;
   2790 {
   2791 	printf("   Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
   2792 	       clabel->row, clabel->column,
   2793 	       clabel->num_rows, clabel->num_columns);
   2794 	printf("   Version: %d Serial Number: %d Mod Counter: %d\n",
   2795 	       clabel->version, clabel->serial_number,
   2796 	       clabel->mod_counter);
   2797 	printf("   Clean: %s Status: %d\n",
   2798 	       clabel->clean ? "Yes" : "No", clabel->status );
   2799 	printf("   sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
   2800 	       clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
   2801 	printf("   RAID Level: %c  blocksize: %d numBlocks: %d\n",
   2802 	       (char) clabel->parityConfig, clabel->blockSize,
   2803 	       clabel->numBlocks);
   2804 	printf("   Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No" );
   2805 	printf("   Contains root partition: %s\n",
   2806 	       clabel->root_partition ? "Yes" : "No" );
   2807 	printf("   Last configured as: raid%d\n", clabel->last_unit );
   2808 #if 0
   2809 	   printf("   Config order: %d\n", clabel->config_order);
   2810 #endif
   2811 
   2812 }
   2813 #endif
   2814 
   2815 RF_ConfigSet_t *
   2816 rf_create_auto_sets(ac_list)
   2817 	RF_AutoConfig_t *ac_list;
   2818 {
   2819 	RF_AutoConfig_t *ac;
   2820 	RF_ConfigSet_t *config_sets;
   2821 	RF_ConfigSet_t *cset;
   2822 	RF_AutoConfig_t *ac_next;
   2823 
   2824 
   2825 	config_sets = NULL;
   2826 
   2827 	/* Go through the AutoConfig list, and figure out which components
   2828 	   belong to what sets.  */
   2829 	ac = ac_list;
   2830 	while(ac!=NULL) {
   2831 		/* we're going to putz with ac->next, so save it here
   2832 		   for use at the end of the loop */
   2833 		ac_next = ac->next;
   2834 
   2835 		if (config_sets == NULL) {
   2836 			/* will need at least this one... */
   2837 			config_sets = (RF_ConfigSet_t *)
   2838 				malloc(sizeof(RF_ConfigSet_t),
   2839 				       M_RAIDFRAME, M_NOWAIT);
   2840 			if (config_sets == NULL) {
   2841 				panic("rf_create_auto_sets: No memory!");
   2842 			}
   2843 			/* this one is easy :) */
   2844 			config_sets->ac = ac;
   2845 			config_sets->next = NULL;
   2846 			config_sets->rootable = 0;
   2847 			ac->next = NULL;
   2848 		} else {
   2849 			/* which set does this component fit into? */
   2850 			cset = config_sets;
   2851 			while(cset!=NULL) {
   2852 				if (rf_does_it_fit(cset, ac)) {
   2853 					/* looks like it matches... */
   2854 					ac->next = cset->ac;
   2855 					cset->ac = ac;
   2856 					break;
   2857 				}
   2858 				cset = cset->next;
   2859 			}
   2860 			if (cset==NULL) {
   2861 				/* didn't find a match above... new set..*/
   2862 				cset = (RF_ConfigSet_t *)
   2863 					malloc(sizeof(RF_ConfigSet_t),
   2864 					       M_RAIDFRAME, M_NOWAIT);
   2865 				if (cset == NULL) {
   2866 					panic("rf_create_auto_sets: No memory!");
   2867 				}
   2868 				cset->ac = ac;
   2869 				ac->next = NULL;
   2870 				cset->next = config_sets;
   2871 				cset->rootable = 0;
   2872 				config_sets = cset;
   2873 			}
   2874 		}
   2875 		ac = ac_next;
   2876 	}
   2877 
   2878 
   2879 	return(config_sets);
   2880 }
   2881 
   2882 static int
   2883 rf_does_it_fit(cset, ac)
   2884 	RF_ConfigSet_t *cset;
   2885 	RF_AutoConfig_t *ac;
   2886 {
   2887 	RF_ComponentLabel_t *clabel1, *clabel2;
   2888 
   2889 	/* If this one matches the *first* one in the set, that's good
   2890 	   enough, since the other members of the set would have been
   2891 	   through here too... */
   2892 	/* note that we are not checking partitionSize here..
   2893 
   2894 	   Note that we are also not checking the mod_counters here.
   2895 	   If everything else matches execpt the mod_counter, that's
   2896 	   good enough for this test.  We will deal with the mod_counters
   2897 	   a little later in the autoconfiguration process.
   2898 
   2899 	    (clabel1->mod_counter == clabel2->mod_counter) &&
   2900 
   2901 	   The reason we don't check for this is that failed disks
   2902 	   will have lower modification counts.  If those disks are
   2903 	   not added to the set they used to belong to, then they will
   2904 	   form their own set, which may result in 2 different sets,
   2905 	   for example, competing to be configured at raid0, and
   2906 	   perhaps competing to be the root filesystem set.  If the
   2907 	   wrong ones get configured, or both attempt to become /,
   2908 	   weird behaviour and or serious lossage will occur.  Thus we
   2909 	   need to bring them into the fold here, and kick them out at
   2910 	   a later point.
   2911 
   2912 	*/
   2913 
   2914 	clabel1 = cset->ac->clabel;
   2915 	clabel2 = ac->clabel;
   2916 	if ((clabel1->version == clabel2->version) &&
   2917 	    (clabel1->serial_number == clabel2->serial_number) &&
   2918 	    (clabel1->num_rows == clabel2->num_rows) &&
   2919 	    (clabel1->num_columns == clabel2->num_columns) &&
   2920 	    (clabel1->sectPerSU == clabel2->sectPerSU) &&
   2921 	    (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
   2922 	    (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
   2923 	    (clabel1->parityConfig == clabel2->parityConfig) &&
   2924 	    (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
   2925 	    (clabel1->blockSize == clabel2->blockSize) &&
   2926 	    (clabel1->numBlocks == clabel2->numBlocks) &&
   2927 	    (clabel1->autoconfigure == clabel2->autoconfigure) &&
   2928 	    (clabel1->root_partition == clabel2->root_partition) &&
   2929 	    (clabel1->last_unit == clabel2->last_unit) &&
   2930 	    (clabel1->config_order == clabel2->config_order)) {
   2931 		/* if it get's here, it almost *has* to be a match */
   2932 	} else {
   2933 		/* it's not consistent with somebody in the set..
   2934 		   punt */
   2935 		return(0);
   2936 	}
   2937 	/* all was fine.. it must fit... */
   2938 	return(1);
   2939 }
   2940 
   2941 int
   2942 rf_have_enough_components(cset)
   2943 	RF_ConfigSet_t *cset;
   2944 {
   2945 	RF_AutoConfig_t *ac;
   2946 	RF_AutoConfig_t *auto_config;
   2947 	RF_ComponentLabel_t *clabel;
   2948 	int r,c;
   2949 	int num_rows;
   2950 	int num_cols;
   2951 	int num_missing;
   2952 	int mod_counter;
   2953 	int mod_counter_found;
   2954 	int even_pair_failed;
   2955 	char parity_type;
   2956 
   2957 
   2958 	/* check to see that we have enough 'live' components
   2959 	   of this set.  If so, we can configure it if necessary */
   2960 
   2961 	num_rows = cset->ac->clabel->num_rows;
   2962 	num_cols = cset->ac->clabel->num_columns;
   2963 	parity_type = cset->ac->clabel->parityConfig;
   2964 
   2965 	/* XXX Check for duplicate components!?!?!? */
   2966 
   2967 	/* Determine what the mod_counter is supposed to be for this set. */
   2968 
   2969 	mod_counter_found = 0;
   2970 	mod_counter = 0;
   2971 	ac = cset->ac;
   2972 	while(ac!=NULL) {
   2973 		if (mod_counter_found==0) {
   2974 			mod_counter = ac->clabel->mod_counter;
   2975 			mod_counter_found = 1;
   2976 		} else {
   2977 			if (ac->clabel->mod_counter > mod_counter) {
   2978 				mod_counter = ac->clabel->mod_counter;
   2979 			}
   2980 		}
   2981 		ac = ac->next;
   2982 	}
   2983 
   2984 	num_missing = 0;
   2985 	auto_config = cset->ac;
   2986 
   2987 	for(r=0; r<num_rows; r++) {
   2988 		even_pair_failed = 0;
   2989 		for(c=0; c<num_cols; c++) {
   2990 			ac = auto_config;
   2991 			while(ac!=NULL) {
   2992 				if ((ac->clabel->row == r) &&
   2993 				    (ac->clabel->column == c) &&
   2994 				    (ac->clabel->mod_counter == mod_counter)) {
   2995 					/* it's this one... */
   2996 #if DEBUG
   2997 					printf("Found: %s at %d,%d\n",
   2998 					       ac->devname,r,c);
   2999 #endif
   3000 					break;
   3001 				}
   3002 				ac=ac->next;
   3003 			}
   3004 			if (ac==NULL) {
   3005 				/* Didn't find one here! */
   3006 				/* special case for RAID 1, especially
   3007 				   where there are more than 2
   3008 				   components (where RAIDframe treats
   3009 				   things a little differently :( ) */
   3010 				if (parity_type == '1') {
   3011 					if (c%2 == 0) { /* even component */
   3012 						even_pair_failed = 1;
   3013 					} else { /* odd component.  If
   3014                                                     we're failed, and
   3015                                                     so is the even
   3016                                                     component, it's
   3017                                                     "Good Night, Charlie" */
   3018 						if (even_pair_failed == 1) {
   3019 							return(0);
   3020 						}
   3021 					}
   3022 				} else {
   3023 					/* normal accounting */
   3024 					num_missing++;
   3025 				}
   3026 			}
   3027 			if ((parity_type == '1') && (c%2 == 1)) {
   3028 				/* Just did an even component, and we didn't
   3029 				   bail.. reset the even_pair_failed flag,
   3030 				   and go on to the next component.... */
   3031 				even_pair_failed = 0;
   3032 			}
   3033 		}
   3034 	}
   3035 
   3036 	clabel = cset->ac->clabel;
   3037 
   3038 	if (((clabel->parityConfig == '0') && (num_missing > 0)) ||
   3039 	    ((clabel->parityConfig == '4') && (num_missing > 1)) ||
   3040 	    ((clabel->parityConfig == '5') && (num_missing > 1))) {
   3041 		/* XXX this needs to be made *much* more general */
   3042 		/* Too many failures */
   3043 		return(0);
   3044 	}
   3045 	/* otherwise, all is well, and we've got enough to take a kick
   3046 	   at autoconfiguring this set */
   3047 	return(1);
   3048 }
   3049 
   3050 void
   3051 rf_create_configuration(ac,config,raidPtr)
   3052 	RF_AutoConfig_t *ac;
   3053 	RF_Config_t *config;
   3054 	RF_Raid_t *raidPtr;
   3055 {
   3056 	RF_ComponentLabel_t *clabel;
   3057 	int i;
   3058 
   3059 	clabel = ac->clabel;
   3060 
   3061 	/* 1. Fill in the common stuff */
   3062 	config->numRow = clabel->num_rows;
   3063 	config->numCol = clabel->num_columns;
   3064 	config->numSpare = 0; /* XXX should this be set here? */
   3065 	config->sectPerSU = clabel->sectPerSU;
   3066 	config->SUsPerPU = clabel->SUsPerPU;
   3067 	config->SUsPerRU = clabel->SUsPerRU;
   3068 	config->parityConfig = clabel->parityConfig;
   3069 	/* XXX... */
   3070 	strcpy(config->diskQueueType,"fifo");
   3071 	config->maxOutstandingDiskReqs = clabel->maxOutstanding;
   3072 	config->layoutSpecificSize = 0; /* XXX ?? */
   3073 
   3074 	while(ac!=NULL) {
   3075 		/* row/col values will be in range due to the checks
   3076 		   in reasonable_label() */
   3077 		strcpy(config->devnames[ac->clabel->row][ac->clabel->column],
   3078 		       ac->devname);
   3079 		ac = ac->next;
   3080 	}
   3081 
   3082 	for(i=0;i<RF_MAXDBGV;i++) {
   3083 		config->debugVars[i][0] = NULL;
   3084 	}
   3085 }
   3086 
   3087 int
   3088 rf_set_autoconfig(raidPtr, new_value)
   3089 	RF_Raid_t *raidPtr;
   3090 	int new_value;
   3091 {
   3092 	RF_ComponentLabel_t clabel;
   3093 	struct vnode *vp;
   3094 	dev_t dev;
   3095 	int row, column;
   3096 
   3097 	raidPtr->autoconfigure = new_value;
   3098 	for(row=0; row<raidPtr->numRow; row++) {
   3099 		for(column=0; column<raidPtr->numCol; column++) {
   3100 			if (raidPtr->Disks[row][column].status ==
   3101 			    rf_ds_optimal) {
   3102 				dev = raidPtr->Disks[row][column].dev;
   3103 				vp = raidPtr->raid_cinfo[row][column].ci_vp;
   3104 				raidread_component_label(dev, vp, &clabel);
   3105 				clabel.autoconfigure = new_value;
   3106 				raidwrite_component_label(dev, vp, &clabel);
   3107 			}
   3108 		}
   3109 	}
   3110 	return(new_value);
   3111 }
   3112 
   3113 int
   3114 rf_set_rootpartition(raidPtr, new_value)
   3115 	RF_Raid_t *raidPtr;
   3116 	int new_value;
   3117 {
   3118 	RF_ComponentLabel_t clabel;
   3119 	struct vnode *vp;
   3120 	dev_t dev;
   3121 	int row, column;
   3122 
   3123 	raidPtr->root_partition = new_value;
   3124 	for(row=0; row<raidPtr->numRow; row++) {
   3125 		for(column=0; column<raidPtr->numCol; column++) {
   3126 			if (raidPtr->Disks[row][column].status ==
   3127 			    rf_ds_optimal) {
   3128 				dev = raidPtr->Disks[row][column].dev;
   3129 				vp = raidPtr->raid_cinfo[row][column].ci_vp;
   3130 				raidread_component_label(dev, vp, &clabel);
   3131 				clabel.root_partition = new_value;
   3132 				raidwrite_component_label(dev, vp, &clabel);
   3133 			}
   3134 		}
   3135 	}
   3136 	return(new_value);
   3137 }
   3138 
   3139 void
   3140 rf_release_all_vps(cset)
   3141 	RF_ConfigSet_t *cset;
   3142 {
   3143 	RF_AutoConfig_t *ac;
   3144 
   3145 	ac = cset->ac;
   3146 	while(ac!=NULL) {
   3147 		/* Close the vp, and give it back */
   3148 		if (ac->vp) {
   3149 			vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY);
   3150 			VOP_CLOSE(ac->vp, FREAD, NOCRED, 0);
   3151 			vput(ac->vp);
   3152 			ac->vp = NULL;
   3153 		}
   3154 		ac = ac->next;
   3155 	}
   3156 }
   3157 
   3158 
   3159 void
   3160 rf_cleanup_config_set(cset)
   3161 	RF_ConfigSet_t *cset;
   3162 {
   3163 	RF_AutoConfig_t *ac;
   3164 	RF_AutoConfig_t *next_ac;
   3165 
   3166 	ac = cset->ac;
   3167 	while(ac!=NULL) {
   3168 		next_ac = ac->next;
   3169 		/* nuke the label */
   3170 		free(ac->clabel, M_RAIDFRAME);
   3171 		/* cleanup the config structure */
   3172 		free(ac, M_RAIDFRAME);
   3173 		/* "next.." */
   3174 		ac = next_ac;
   3175 	}
   3176 	/* and, finally, nuke the config set */
   3177 	free(cset, M_RAIDFRAME);
   3178 }
   3179 
   3180 
   3181 void
   3182 raid_init_component_label(raidPtr, clabel)
   3183 	RF_Raid_t *raidPtr;
   3184 	RF_ComponentLabel_t *clabel;
   3185 {
   3186 	/* current version number */
   3187 	clabel->version = RF_COMPONENT_LABEL_VERSION;
   3188 	clabel->serial_number = raidPtr->serial_number;
   3189 	clabel->mod_counter = raidPtr->mod_counter;
   3190 	clabel->num_rows = raidPtr->numRow;
   3191 	clabel->num_columns = raidPtr->numCol;
   3192 	clabel->clean = RF_RAID_DIRTY; /* not clean */
   3193 	clabel->status = rf_ds_optimal; /* "It's good!" */
   3194 
   3195 	clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
   3196 	clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
   3197 	clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
   3198 
   3199 	clabel->blockSize = raidPtr->bytesPerSector;
   3200 	clabel->numBlocks = raidPtr->sectorsPerDisk;
   3201 
   3202 	/* XXX not portable */
   3203 	clabel->parityConfig = raidPtr->Layout.map->parityConfig;
   3204 	clabel->maxOutstanding = raidPtr->maxOutstanding;
   3205 	clabel->autoconfigure = raidPtr->autoconfigure;
   3206 	clabel->root_partition = raidPtr->root_partition;
   3207 	clabel->last_unit = raidPtr->raidid;
   3208 	clabel->config_order = raidPtr->config_order;
   3209 }
   3210 
   3211 int
   3212 rf_auto_config_set(cset,unit)
   3213 	RF_ConfigSet_t *cset;
   3214 	int *unit;
   3215 {
   3216 	RF_Raid_t *raidPtr;
   3217 	RF_Config_t *config;
   3218 	int raidID;
   3219 	int retcode;
   3220 
   3221 #if DEBUG
   3222 	printf("RAID autoconfigure\n");
   3223 #endif
   3224 
   3225 	retcode = 0;
   3226 	*unit = -1;
   3227 
   3228 	/* 1. Create a config structure */
   3229 
   3230 	config = (RF_Config_t *)malloc(sizeof(RF_Config_t),
   3231 				       M_RAIDFRAME,
   3232 				       M_NOWAIT);
   3233 	if (config==NULL) {
   3234 		printf("Out of mem!?!?\n");
   3235 				/* XXX do something more intelligent here. */
   3236 		return(1);
   3237 	}
   3238 
   3239 	memset(config, 0, sizeof(RF_Config_t));
   3240 
   3241 	/*
   3242 	   2. Figure out what RAID ID this one is supposed to live at
   3243 	   See if we can get the same RAID dev that it was configured
   3244 	   on last time..
   3245 	*/
   3246 
   3247 	raidID = cset->ac->clabel->last_unit;
   3248 	if ((raidID < 0) || (raidID >= numraid)) {
   3249 		/* let's not wander off into lala land. */
   3250 		raidID = numraid - 1;
   3251 	}
   3252 	if (raidPtrs[raidID]->valid != 0) {
   3253 
   3254 		/*
   3255 		   Nope... Go looking for an alternative...
   3256 		   Start high so we don't immediately use raid0 if that's
   3257 		   not taken.
   3258 		*/
   3259 
   3260 		for(raidID = numraid - 1; raidID >= 0; raidID--) {
   3261 			if (raidPtrs[raidID]->valid == 0) {
   3262 				/* can use this one! */
   3263 				break;
   3264 			}
   3265 		}
   3266 	}
   3267 
   3268 	if (raidID < 0) {
   3269 		/* punt... */
   3270 		printf("Unable to auto configure this set!\n");
   3271 		printf("(Out of RAID devs!)\n");
   3272 		return(1);
   3273 	}
   3274 
   3275 #if DEBUG
   3276 	printf("Configuring raid%d:\n",raidID);
   3277 #endif
   3278 
   3279 	raidPtr = raidPtrs[raidID];
   3280 
   3281 	/* XXX all this stuff should be done SOMEWHERE ELSE! */
   3282 	raidPtr->raidid = raidID;
   3283 	raidPtr->openings = RAIDOUTSTANDING;
   3284 
   3285 	/* 3. Build the configuration structure */
   3286 	rf_create_configuration(cset->ac, config, raidPtr);
   3287 
   3288 	/* 4. Do the configuration */
   3289 	retcode = rf_Configure(raidPtr, config, cset->ac);
   3290 
   3291 	if (retcode == 0) {
   3292 
   3293 		raidinit(raidPtrs[raidID]);
   3294 
   3295 		rf_markalldirty(raidPtrs[raidID]);
   3296 		raidPtrs[raidID]->autoconfigure = 1; /* XXX do this here? */
   3297 		if (cset->ac->clabel->root_partition==1) {
   3298 			/* everything configured just fine.  Make a note
   3299 			   that this set is eligible to be root. */
   3300 			cset->rootable = 1;
   3301 			/* XXX do this here? */
   3302 			raidPtrs[raidID]->root_partition = 1;
   3303 		}
   3304 	}
   3305 
   3306 	/* 5. Cleanup */
   3307 	free(config, M_RAIDFRAME);
   3308 
   3309 	*unit = raidID;
   3310 	return(retcode);
   3311 }
   3312 
   3313 void
   3314 rf_disk_unbusy(desc)
   3315 	RF_RaidAccessDesc_t *desc;
   3316 {
   3317 	struct buf *bp;
   3318 
   3319 	bp = (struct buf *)desc->bp;
   3320 	disk_unbusy(&raid_softc[desc->raidPtr->raidid].sc_dkdev,
   3321 	    (bp->b_bcount - bp->b_resid), (bp->b_flags & B_READ));
   3322 }
   3323