Home | History | Annotate | Line # | Download | only in raidframe
rf_netbsdkintf.c revision 1.55
      1 /*	$NetBSD: rf_netbsdkintf.c,v 1.55 2000/02/23 02:11:05 oster Exp $	*/
      2 /*-
      3  * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
      4  * All rights reserved.
      5  *
      6  * This code is derived from software contributed to The NetBSD Foundation
      7  * by Greg Oster; Jason R. Thorpe.
      8  *
      9  * Redistribution and use in source and binary forms, with or without
     10  * modification, are permitted provided that the following conditions
     11  * are met:
     12  * 1. Redistributions of source code must retain the above copyright
     13  *    notice, this list of conditions and the following disclaimer.
     14  * 2. Redistributions in binary form must reproduce the above copyright
     15  *    notice, this list of conditions and the following disclaimer in the
     16  *    documentation and/or other materials provided with the distribution.
     17  * 3. All advertising materials mentioning features or use of this software
     18  *    must display the following acknowledgement:
     19  *        This product includes software developed by the NetBSD
     20  *        Foundation, Inc. and its contributors.
     21  * 4. Neither the name of The NetBSD Foundation nor the names of its
     22  *    contributors may be used to endorse or promote products derived
     23  *    from this software without specific prior written permission.
     24  *
     25  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     26  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     27  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     28  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     29  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     30  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     31  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     32  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     33  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     34  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     35  * POSSIBILITY OF SUCH DAMAGE.
     36  */
     37 
     38 /*
     39  * Copyright (c) 1988 University of Utah.
     40  * Copyright (c) 1990, 1993
     41  *      The Regents of the University of California.  All rights reserved.
     42  *
     43  * This code is derived from software contributed to Berkeley by
     44  * the Systems Programming Group of the University of Utah Computer
     45  * Science Department.
     46  *
     47  * Redistribution and use in source and binary forms, with or without
     48  * modification, are permitted provided that the following conditions
     49  * are met:
     50  * 1. Redistributions of source code must retain the above copyright
     51  *    notice, this list of conditions and the following disclaimer.
     52  * 2. Redistributions in binary form must reproduce the above copyright
     53  *    notice, this list of conditions and the following disclaimer in the
     54  *    documentation and/or other materials provided with the distribution.
     55  * 3. All advertising materials mentioning features or use of this software
     56  *    must display the following acknowledgement:
     57  *      This product includes software developed by the University of
     58  *      California, Berkeley and its contributors.
     59  * 4. Neither the name of the University nor the names of its contributors
     60  *    may be used to endorse or promote products derived from this software
     61  *    without specific prior written permission.
     62  *
     63  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     64  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     65  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     66  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     67  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     68  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     69  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     70  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     71  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     72  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     73  * SUCH DAMAGE.
     74  *
     75  * from: Utah $Hdr: cd.c 1.6 90/11/28$
     76  *
     77  *      @(#)cd.c        8.2 (Berkeley) 11/16/93
     78  */
     79 
     80 
     81 
     82 
     83 /*
     84  * Copyright (c) 1995 Carnegie-Mellon University.
     85  * All rights reserved.
     86  *
     87  * Authors: Mark Holland, Jim Zelenka
     88  *
     89  * Permission to use, copy, modify and distribute this software and
     90  * its documentation is hereby granted, provided that both the copyright
     91  * notice and this permission notice appear in all copies of the
     92  * software, derivative works or modified versions, and any portions
     93  * thereof, and that both notices appear in supporting documentation.
     94  *
     95  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
     96  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
     97  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
     98  *
     99  * Carnegie Mellon requests users of this software to return to
    100  *
    101  *  Software Distribution Coordinator  or  Software.Distribution (at) CS.CMU.EDU
    102  *  School of Computer Science
    103  *  Carnegie Mellon University
    104  *  Pittsburgh PA 15213-3890
    105  *
    106  * any improvements or extensions that they make and grant Carnegie the
    107  * rights to redistribute these changes.
    108  */
    109 
    110 /***********************************************************
    111  *
    112  * rf_kintf.c -- the kernel interface routines for RAIDframe
    113  *
    114  ***********************************************************/
    115 
    116 #include <sys/errno.h>
    117 #include <sys/param.h>
    118 #include <sys/pool.h>
    119 #include <sys/queue.h>
    120 #include <sys/disk.h>
    121 #include <sys/device.h>
    122 #include <sys/stat.h>
    123 #include <sys/ioctl.h>
    124 #include <sys/fcntl.h>
    125 #include <sys/systm.h>
    126 #include <sys/namei.h>
    127 #include <sys/vnode.h>
    128 #include <sys/param.h>
    129 #include <sys/types.h>
    130 #include <machine/types.h>
    131 #include <sys/disklabel.h>
    132 #include <sys/conf.h>
    133 #include <sys/lock.h>
    134 #include <sys/buf.h>
    135 #include <sys/user.h>
    136 
    137 #include "raid.h"
    138 #include "rf_raid.h"
    139 #include "rf_raidframe.h"
    140 #include "rf_copyback.h"
    141 #include "rf_dag.h"
    142 #include "rf_dagflags.h"
    143 #include "rf_diskqueue.h"
    144 #include "rf_acctrace.h"
    145 #include "rf_etimer.h"
    146 #include "rf_general.h"
    147 #include "rf_debugMem.h"
    148 #include "rf_kintf.h"
    149 #include "rf_options.h"
    150 #include "rf_driver.h"
    151 #include "rf_parityscan.h"
    152 #include "rf_debugprint.h"
    153 #include "rf_threadstuff.h"
    154 
    155 int     rf_kdebug_level = 0;
    156 
    157 #ifdef DEBUG
    158 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
    159 #else				/* DEBUG */
    160 #define db1_printf(a) { }
    161 #endif				/* DEBUG */
    162 
    163 static RF_Raid_t **raidPtrs;	/* global raid device descriptors */
    164 
    165 RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex)
    166 
    167 static RF_SparetWait_t *rf_sparet_wait_queue;	/* requests to install a
    168 						 * spare table */
    169 static RF_SparetWait_t *rf_sparet_resp_queue;	/* responses from
    170 						 * installation process */
    171 
    172 /* prototypes */
    173 static void KernelWakeupFunc(struct buf * bp);
    174 static void InitBP(struct buf * bp, struct vnode *, unsigned rw_flag,
    175 		   dev_t dev, RF_SectorNum_t startSect,
    176 		   RF_SectorCount_t numSect, caddr_t buf,
    177 		   void (*cbFunc) (struct buf *), void *cbArg,
    178 		   int logBytesPerSector, struct proc * b_proc);
    179 static int raidinit __P((dev_t, RF_Raid_t *, int));
    180 
    181 void raidattach __P((int));
    182 int raidsize __P((dev_t));
    183 int raidopen __P((dev_t, int, int, struct proc *));
    184 int raidclose __P((dev_t, int, int, struct proc *));
    185 int raidioctl __P((dev_t, u_long, caddr_t, int, struct proc *));
    186 int raidwrite __P((dev_t, struct uio *, int));
    187 int raidread __P((dev_t, struct uio *, int));
    188 void raidstrategy __P((struct buf *));
    189 int raiddump __P((dev_t, daddr_t, caddr_t, size_t));
    190 
    191 /*
    192  * Pilfered from ccd.c
    193  */
    194 
    195 struct raidbuf {
    196 	struct buf rf_buf;	/* new I/O buf.  MUST BE FIRST!!! */
    197 	struct buf *rf_obp;	/* ptr. to original I/O buf */
    198 	int     rf_flags;	/* misc. flags */
    199 	RF_DiskQueueData_t *req;/* the request that this was part of.. */
    200 };
    201 
    202 
    203 #define RAIDGETBUF(rs) pool_get(&(rs)->sc_cbufpool, PR_NOWAIT)
    204 #define	RAIDPUTBUF(rs, cbp) pool_put(&(rs)->sc_cbufpool, cbp)
    205 
    206 /* XXX Not sure if the following should be replacing the raidPtrs above,
    207    or if it should be used in conjunction with that...
    208    Note: Don't use sc_dev until the raidinit(0,_,_) call in
    209    rf_auto_config_set() actually passes in a real dev_t!  */
    210 
    211 struct raid_softc {
    212 	int     sc_flags;	/* flags */
    213 	int     sc_cflags;	/* configuration flags */
    214 	size_t  sc_size;        /* size of the raid device */
    215 	dev_t   sc_dev;	        /* our device.. */
    216 	char    sc_xname[20];	/* XXX external name */
    217 	struct disk sc_dkdev;	/* generic disk device info */
    218 	struct pool sc_cbufpool;	/* component buffer pool */
    219 	struct buf_queue buf_queue;	/* used for the device queue */
    220 };
    221 /* sc_flags */
    222 #define RAIDF_INITED	0x01	/* unit has been initialized */
    223 #define RAIDF_WLABEL	0x02	/* label area is writable */
    224 #define RAIDF_LABELLING	0x04	/* unit is currently being labelled */
    225 #define RAIDF_WANTED	0x40	/* someone is waiting to obtain a lock */
    226 #define RAIDF_LOCKED	0x80	/* unit is locked */
    227 
    228 #define	raidunit(x)	DISKUNIT(x)
    229 int numraid = 0;
    230 
    231 /*
    232  * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
    233  * Be aware that large numbers can allow the driver to consume a lot of
    234  * kernel memory, especially on writes, and in degraded mode reads.
    235  *
    236  * For example: with a stripe width of 64 blocks (32k) and 5 disks,
    237  * a single 64K write will typically require 64K for the old data,
    238  * 64K for the old parity, and 64K for the new parity, for a total
    239  * of 192K (if the parity buffer is not re-used immediately).
    240  * Even it if is used immedately, that's still 128K, which when multiplied
    241  * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
    242  *
    243  * Now in degraded mode, for example, a 64K read on the above setup may
    244  * require data reconstruction, which will require *all* of the 4 remaining
    245  * disks to participate -- 4 * 32K/disk == 128K again.
    246  */
    247 
    248 #ifndef RAIDOUTSTANDING
    249 #define RAIDOUTSTANDING   6
    250 #endif
    251 
    252 #define RAIDLABELDEV(dev)	\
    253 	(MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
    254 
    255 /* declared here, and made public, for the benefit of KVM stuff.. */
    256 struct raid_softc *raid_softc;
    257 
    258 static void raidgetdefaultlabel __P((RF_Raid_t *, struct raid_softc *,
    259 				     struct disklabel *));
    260 static void raidgetdisklabel __P((dev_t));
    261 static void raidmakedisklabel __P((struct raid_softc *));
    262 
    263 static int raidlock __P((struct raid_softc *));
    264 static void raidunlock __P((struct raid_softc *));
    265 
    266 static void rf_markalldirty __P((RF_Raid_t *));
    267 void rf_mountroot_hook __P((struct device *));
    268 
    269 struct device *raidrootdev;
    270 struct cfdata cf_raidrootdev;
    271 struct cfdriver cfdrv;
    272 /* XXX these should be moved up */
    273 #include "rf_configure.h"
    274 #include <sys/reboot.h>
    275 
    276 void rf_ReconThread __P((struct rf_recon_req *));
    277 /* XXX what I want is: */
    278 /*void rf_ReconThread __P((RF_Raid_t *raidPtr));  */
    279 void rf_RewriteParityThread __P((RF_Raid_t *raidPtr));
    280 void rf_CopybackThread __P((RF_Raid_t *raidPtr));
    281 void rf_ReconstructInPlaceThread __P((struct rf_recon_req *));
    282 void rf_buildroothack __P((void *));
    283 
    284 RF_AutoConfig_t *rf_find_raid_components __P((void));
    285 void print_component_label __P((RF_ComponentLabel_t *));
    286 RF_ConfigSet_t *rf_create_auto_sets __P((RF_AutoConfig_t *));
    287 static int rf_does_it_fit __P((RF_ConfigSet_t *,RF_AutoConfig_t *));
    288 static int rf_reasonable_label __P((RF_ComponentLabel_t *));
    289 void rf_create_configuration __P((RF_AutoConfig_t *,RF_Config_t *,
    290 				  RF_Raid_t *));
    291 int rf_set_autoconfig __P((RF_Raid_t *, int));
    292 int rf_set_rootpartition __P((RF_Raid_t *, int));
    293 void rf_release_all_vps __P((RF_ConfigSet_t *));
    294 void rf_cleanup_config_set __P((RF_ConfigSet_t *));
    295 int rf_have_enough_components __P((RF_ConfigSet_t *));
    296 int rf_auto_config_set __P((RF_ConfigSet_t *, int *));
    297 
    298 static int raidautoconfig = 0; /* Debugging, mostly.  Set to 0 to not
    299 				  allow autoconfig to take place */
    300 /* XXX ugly hack. */
    301 const char *raid_rooty = "raid0";
    302 extern struct device *booted_device;
    303 
    304 void
    305 raidattach(num)
    306 	int     num;
    307 {
    308 	int raidID;
    309 	int i, rc;
    310 	RF_AutoConfig_t *ac_list; /* autoconfig list */
    311 	RF_ConfigSet_t *config_sets;
    312 
    313 #ifdef DEBUG
    314 	printf("raidattach: Asked for %d units\n", num);
    315 #endif
    316 
    317 	if (num <= 0) {
    318 #ifdef DIAGNOSTIC
    319 		panic("raidattach: count <= 0");
    320 #endif
    321 		return;
    322 	}
    323 	/* This is where all the initialization stuff gets done. */
    324 
    325 	numraid = num;
    326 
    327 	/* Make some space for requested number of units... */
    328 
    329 	RF_Calloc(raidPtrs, num, sizeof(RF_Raid_t *), (RF_Raid_t **));
    330 	if (raidPtrs == NULL) {
    331 		panic("raidPtrs is NULL!!\n");
    332 	}
    333 
    334 	rc = rf_mutex_init(&rf_sparet_wait_mutex);
    335 	if (rc) {
    336 		RF_PANIC();
    337 	}
    338 
    339 	rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
    340 
    341 	for (i = 0; i < numraid; i++)
    342 		raidPtrs[i] = NULL;
    343 	rc = rf_BootRaidframe();
    344 	if (rc == 0)
    345 		printf("Kernelized RAIDframe activated\n");
    346 	else
    347 		panic("Serious error booting RAID!!\n");
    348 
    349 	/* put together some datastructures like the CCD device does.. This
    350 	 * lets us lock the device and what-not when it gets opened. */
    351 
    352 	raid_softc = (struct raid_softc *)
    353 		malloc(num * sizeof(struct raid_softc),
    354 		       M_RAIDFRAME, M_NOWAIT);
    355 	if (raid_softc == NULL) {
    356 		printf("WARNING: no memory for RAIDframe driver\n");
    357 		return;
    358 	}
    359 
    360 	bzero(raid_softc, num * sizeof(struct raid_softc));
    361 
    362 	raidrootdev = (struct device *)malloc(num * sizeof(struct device),
    363 					      M_RAIDFRAME, M_NOWAIT);
    364 	if (raidrootdev == NULL) {
    365 		panic("No memory for RAIDframe driver!!?!?!\n");
    366 	}
    367 
    368 	for (raidID = 0; raidID < num; raidID++) {
    369 		BUFQ_INIT(&raid_softc[raidID].buf_queue);
    370 
    371 		raidrootdev[raidID].dv_class  = DV_DISK;
    372 		raidrootdev[raidID].dv_cfdata = NULL;
    373 		raidrootdev[raidID].dv_unit   = raidID;
    374 		raidrootdev[raidID].dv_parent = NULL;
    375 		raidrootdev[raidID].dv_flags  = 0;
    376 		sprintf(raidrootdev[raidID].dv_xname,"raid%d",raidID);
    377 
    378 		RF_Calloc(raidPtrs[raidID], 1, sizeof(RF_Raid_t),
    379 			  (RF_Raid_t *));
    380 		if (raidPtrs[raidID] == NULL) {
    381 			printf("WARNING: raidPtrs[%d] is NULL\n", raidID);
    382 			numraid = raidID;
    383 			return;
    384 		}
    385 	}
    386 
    387 if (raidautoconfig) {
    388 	/* 1. locate all RAID components on the system */
    389 
    390 #if DEBUG
    391 	printf("Searching for raid components...\n");
    392 #endif
    393 	ac_list = rf_find_raid_components();
    394 
    395 	/* 2. sort them into their respective sets */
    396 
    397 	config_sets = rf_create_auto_sets(ac_list);
    398 
    399 	/* 3. evaluate each set and configure the valid ones
    400 	   This gets done in rf_buildroothack() */
    401 
    402 	/* schedule the creation of the thread to do the
    403 	   "/ on RAID" stuff */
    404 
    405 	kthread_create(rf_buildroothack,config_sets);
    406 
    407 	/* 4. make sure we get our mud.. I mean root.. hooks in.. */
    408 	/* XXXX pick raid0 for now... and this should be only done
    409 	   if we find something that's bootable!!! */
    410 #if 0
    411 	mountroothook_establish(rf_mountroot_hook, &raidrootdev[0]);
    412 #endif
    413 	if (boothowto & RB_ASKNAME) {
    414 		/* We don't auto-config... */
    415 	} else {
    416 		/* They didn't ask, and we found something bootable... */
    417 		/* XXX pretend for now.. */
    418 #if 0
    419  		booted_device = &raidrootdev[0];
    420 #endif
    421 	}
    422 }
    423 
    424 }
    425 
    426 void
    427 rf_buildroothack(arg)
    428 	void *arg;
    429 {
    430 	RF_ConfigSet_t *config_sets = arg;
    431 	RF_ConfigSet_t *cset;
    432 	RF_ConfigSet_t *next_cset;
    433 	int retcode;
    434 	int raidID;
    435 	int rootID;
    436 	int num_root;
    437 
    438 	num_root = 0;
    439 	cset = config_sets;
    440 	while(cset != NULL ) {
    441 		next_cset = cset->next;
    442 		if (rf_have_enough_components(cset) &&
    443 		    cset->ac->clabel->autoconfigure==1) {
    444 			retcode = rf_auto_config_set(cset,&raidID);
    445 			if (!retcode) {
    446 				if (cset->rootable) {
    447 					rootID = raidID;
    448 					num_root++;
    449 				}
    450 			} else {
    451 				/* The autoconfig didn't work :( */
    452 #if DEBUG
    453 				printf("Autoconfig failed with code %d for raid%d\n", retcode, raidID);
    454 #endif
    455 				rf_release_all_vps(cset);
    456 #if DEBUG
    457 				printf("Done cleanup\n");
    458 #endif
    459 			}
    460 		} else {
    461 			/* we're not autoconfiguring this set...
    462 			   release the associated resources */
    463 #if DEBUG
    464 			printf("Releasing vp's\n");
    465 #endif
    466 			rf_release_all_vps(cset);
    467 #if DEBUG
    468 			printf("Done.\n");
    469 #endif
    470 		}
    471 		/* cleanup */
    472 #if DEBUG
    473 		printf("Cleaning up config set\n");
    474 #endif
    475 		rf_cleanup_config_set(cset);
    476 #if DEBUG
    477 		printf("Done cleanup\n");
    478 #endif
    479 		cset = next_cset;
    480 	}
    481 	if (boothowto & RB_ASKNAME) {
    482 		/* We don't auto-config... */
    483 	} else {
    484 		/* They didn't ask, and we found something bootable... */
    485 		/* XXX pretend for now.. */
    486 		if (num_root == 1) {
    487 #if 1
    488 			booted_device = &raidrootdev[rootID];
    489 #endif
    490 		} else if (num_root > 1) {
    491 			/* we can't guess.. require the user to answer... */
    492 			boothowto |= RB_ASKNAME;
    493 		}
    494 	}
    495 }
    496 
    497 
    498 int
    499 raidsize(dev)
    500 	dev_t   dev;
    501 {
    502 	struct raid_softc *rs;
    503 	struct disklabel *lp;
    504 	int     part, unit, omask, size;
    505 
    506 	unit = raidunit(dev);
    507 	if (unit >= numraid)
    508 		return (-1);
    509 	rs = &raid_softc[unit];
    510 
    511 	if ((rs->sc_flags & RAIDF_INITED) == 0)
    512 		return (-1);
    513 
    514 	part = DISKPART(dev);
    515 	omask = rs->sc_dkdev.dk_openmask & (1 << part);
    516 	lp = rs->sc_dkdev.dk_label;
    517 
    518 	if (omask == 0 && raidopen(dev, 0, S_IFBLK, curproc))
    519 		return (-1);
    520 
    521 	if (lp->d_partitions[part].p_fstype != FS_SWAP)
    522 		size = -1;
    523 	else
    524 		size = lp->d_partitions[part].p_size *
    525 		    (lp->d_secsize / DEV_BSIZE);
    526 
    527 	if (omask == 0 && raidclose(dev, 0, S_IFBLK, curproc))
    528 		return (-1);
    529 
    530 	return (size);
    531 
    532 }
    533 
    534 int
    535 raiddump(dev, blkno, va, size)
    536 	dev_t   dev;
    537 	daddr_t blkno;
    538 	caddr_t va;
    539 	size_t  size;
    540 {
    541 	/* Not implemented. */
    542 	return ENXIO;
    543 }
    544 /* ARGSUSED */
    545 int
    546 raidopen(dev, flags, fmt, p)
    547 	dev_t   dev;
    548 	int     flags, fmt;
    549 	struct proc *p;
    550 {
    551 	int     unit = raidunit(dev);
    552 	struct raid_softc *rs;
    553 	struct disklabel *lp;
    554 	int     part, pmask;
    555 	int     error = 0;
    556 
    557 	if (unit >= numraid)
    558 		return (ENXIO);
    559 	rs = &raid_softc[unit];
    560 
    561 	if ((error = raidlock(rs)) != 0)
    562 		return (error);
    563 	lp = rs->sc_dkdev.dk_label;
    564 
    565 	part = DISKPART(dev);
    566 	pmask = (1 << part);
    567 
    568 	db1_printf(("Opening raid device number: %d partition: %d\n",
    569 		unit, part));
    570 
    571 
    572 	if ((rs->sc_flags & RAIDF_INITED) &&
    573 	    (rs->sc_dkdev.dk_openmask == 0))
    574 		raidgetdisklabel(dev);
    575 
    576 	/* make sure that this partition exists */
    577 
    578 	if (part != RAW_PART) {
    579 		db1_printf(("Not a raw partition..\n"));
    580 		if (((rs->sc_flags & RAIDF_INITED) == 0) ||
    581 		    ((part >= lp->d_npartitions) ||
    582 			(lp->d_partitions[part].p_fstype == FS_UNUSED))) {
    583 			error = ENXIO;
    584 			raidunlock(rs);
    585 			db1_printf(("Bailing out...\n"));
    586 			return (error);
    587 		}
    588 	}
    589 	/* Prevent this unit from being unconfigured while open. */
    590 	switch (fmt) {
    591 	case S_IFCHR:
    592 		rs->sc_dkdev.dk_copenmask |= pmask;
    593 		break;
    594 
    595 	case S_IFBLK:
    596 		rs->sc_dkdev.dk_bopenmask |= pmask;
    597 		break;
    598 	}
    599 
    600 	if ((rs->sc_dkdev.dk_openmask == 0) &&
    601 	    ((rs->sc_flags & RAIDF_INITED) != 0)) {
    602 		/* First one... mark things as dirty... Note that we *MUST*
    603 		 have done a configure before this.  I DO NOT WANT TO BE
    604 		 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
    605 		 THAT THEY BELONG TOGETHER!!!!! */
    606 		/* XXX should check to see if we're only open for reading
    607 		   here... If so, we needn't do this, but then need some
    608 		   other way of keeping track of what's happened.. */
    609 
    610 		rf_markalldirty( raidPtrs[unit] );
    611 	}
    612 
    613 
    614 	rs->sc_dkdev.dk_openmask =
    615 	    rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
    616 
    617 	raidunlock(rs);
    618 
    619 	return (error);
    620 
    621 
    622 }
    623 /* ARGSUSED */
    624 int
    625 raidclose(dev, flags, fmt, p)
    626 	dev_t   dev;
    627 	int     flags, fmt;
    628 	struct proc *p;
    629 {
    630 	int     unit = raidunit(dev);
    631 	struct raid_softc *rs;
    632 	int     error = 0;
    633 	int     part;
    634 
    635 	if (unit >= numraid)
    636 		return (ENXIO);
    637 	rs = &raid_softc[unit];
    638 
    639 	if ((error = raidlock(rs)) != 0)
    640 		return (error);
    641 
    642 	part = DISKPART(dev);
    643 
    644 	/* ...that much closer to allowing unconfiguration... */
    645 	switch (fmt) {
    646 	case S_IFCHR:
    647 		rs->sc_dkdev.dk_copenmask &= ~(1 << part);
    648 		break;
    649 
    650 	case S_IFBLK:
    651 		rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
    652 		break;
    653 	}
    654 	rs->sc_dkdev.dk_openmask =
    655 	    rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
    656 
    657 	if ((rs->sc_dkdev.dk_openmask == 0) &&
    658 	    ((rs->sc_flags & RAIDF_INITED) != 0)) {
    659 		/* Last one... device is not unconfigured yet.
    660 		   Device shutdown has taken care of setting the
    661 		   clean bits if RAIDF_INITED is not set
    662 		   mark things as clean... */
    663 #ifdef DEBUG
    664 		printf("Last one on raid%d.  Updating status.\n",unit);
    665 #endif
    666 		rf_update_component_labels( raidPtrs[unit] );
    667 	}
    668 
    669 	raidunlock(rs);
    670 	return (0);
    671 
    672 }
    673 
    674 void
    675 raidstrategy(bp)
    676 	register struct buf *bp;
    677 {
    678 	register int s;
    679 
    680 	unsigned int raidID = raidunit(bp->b_dev);
    681 	RF_Raid_t *raidPtr;
    682 	struct raid_softc *rs = &raid_softc[raidID];
    683 	struct disklabel *lp;
    684 	int     wlabel;
    685 
    686 	if ((rs->sc_flags & RAIDF_INITED) ==0) {
    687 		bp->b_error = ENXIO;
    688 		bp->b_flags = B_ERROR;
    689 		bp->b_resid = bp->b_bcount;
    690 		biodone(bp);
    691 		return;
    692 	}
    693 	if (raidID >= numraid || !raidPtrs[raidID]) {
    694 		bp->b_error = ENODEV;
    695 		bp->b_flags |= B_ERROR;
    696 		bp->b_resid = bp->b_bcount;
    697 		biodone(bp);
    698 		return;
    699 	}
    700 	raidPtr = raidPtrs[raidID];
    701 	if (!raidPtr->valid) {
    702 		bp->b_error = ENODEV;
    703 		bp->b_flags |= B_ERROR;
    704 		bp->b_resid = bp->b_bcount;
    705 		biodone(bp);
    706 		return;
    707 	}
    708 	if (bp->b_bcount == 0) {
    709 		db1_printf(("b_bcount is zero..\n"));
    710 		biodone(bp);
    711 		return;
    712 	}
    713 	lp = rs->sc_dkdev.dk_label;
    714 
    715 	/*
    716 	 * Do bounds checking and adjust transfer.  If there's an
    717 	 * error, the bounds check will flag that for us.
    718 	 */
    719 
    720 	wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
    721 	if (DISKPART(bp->b_dev) != RAW_PART)
    722 		if (bounds_check_with_label(bp, lp, wlabel) <= 0) {
    723 			db1_printf(("Bounds check failed!!:%d %d\n",
    724 				(int) bp->b_blkno, (int) wlabel));
    725 			biodone(bp);
    726 			return;
    727 		}
    728 	s = splbio();
    729 
    730 	bp->b_resid = 0;
    731 
    732 	/* stuff it onto our queue */
    733 	BUFQ_INSERT_TAIL(&rs->buf_queue, bp);
    734 
    735 	raidstart(raidPtrs[raidID]);
    736 
    737 	splx(s);
    738 }
    739 /* ARGSUSED */
    740 int
    741 raidread(dev, uio, flags)
    742 	dev_t   dev;
    743 	struct uio *uio;
    744 	int     flags;
    745 {
    746 	int     unit = raidunit(dev);
    747 	struct raid_softc *rs;
    748 	int     part;
    749 
    750 	if (unit >= numraid)
    751 		return (ENXIO);
    752 	rs = &raid_softc[unit];
    753 
    754 	if ((rs->sc_flags & RAIDF_INITED) == 0)
    755 		return (ENXIO);
    756 	part = DISKPART(dev);
    757 
    758 	db1_printf(("raidread: unit: %d partition: %d\n", unit, part));
    759 
    760 	return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
    761 
    762 }
    763 /* ARGSUSED */
    764 int
    765 raidwrite(dev, uio, flags)
    766 	dev_t   dev;
    767 	struct uio *uio;
    768 	int     flags;
    769 {
    770 	int     unit = raidunit(dev);
    771 	struct raid_softc *rs;
    772 
    773 	if (unit >= numraid)
    774 		return (ENXIO);
    775 	rs = &raid_softc[unit];
    776 
    777 	if ((rs->sc_flags & RAIDF_INITED) == 0)
    778 		return (ENXIO);
    779 	db1_printf(("raidwrite\n"));
    780 	return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
    781 
    782 }
    783 
    784 int
    785 raidioctl(dev, cmd, data, flag, p)
    786 	dev_t   dev;
    787 	u_long  cmd;
    788 	caddr_t data;
    789 	int     flag;
    790 	struct proc *p;
    791 {
    792 	int     unit = raidunit(dev);
    793 	int     error = 0;
    794 	int     part, pmask;
    795 	struct raid_softc *rs;
    796 	RF_Config_t *k_cfg, *u_cfg;
    797 	RF_Raid_t *raidPtr;
    798 	RF_RaidDisk_t *diskPtr;
    799 	RF_AccTotals_t *totals;
    800 	RF_DeviceConfig_t *d_cfg, **ucfgp;
    801 	u_char *specific_buf;
    802 	int retcode = 0;
    803 	int row;
    804 	int column;
    805 	struct rf_recon_req *rrcopy, *rr;
    806 	RF_ComponentLabel_t *clabel;
    807 	RF_ComponentLabel_t ci_label;
    808 	RF_ComponentLabel_t **clabel_ptr;
    809 	RF_SingleComponent_t *sparePtr,*componentPtr;
    810 	RF_SingleComponent_t hot_spare;
    811 	RF_SingleComponent_t component;
    812 	int i, j, d;
    813 
    814 	if (unit >= numraid)
    815 		return (ENXIO);
    816 	rs = &raid_softc[unit];
    817 	raidPtr = raidPtrs[unit];
    818 
    819 	db1_printf(("raidioctl: %d %d %d %d\n", (int) dev,
    820 		(int) DISKPART(dev), (int) unit, (int) cmd));
    821 
    822 	/* Must be open for writes for these commands... */
    823 	switch (cmd) {
    824 	case DIOCSDINFO:
    825 	case DIOCWDINFO:
    826 	case DIOCWLABEL:
    827 		if ((flag & FWRITE) == 0)
    828 			return (EBADF);
    829 	}
    830 
    831 	/* Must be initialized for these... */
    832 	switch (cmd) {
    833 	case DIOCGDINFO:
    834 	case DIOCSDINFO:
    835 	case DIOCWDINFO:
    836 	case DIOCGPART:
    837 	case DIOCWLABEL:
    838 	case DIOCGDEFLABEL:
    839 	case RAIDFRAME_SHUTDOWN:
    840 	case RAIDFRAME_REWRITEPARITY:
    841 	case RAIDFRAME_GET_INFO:
    842 	case RAIDFRAME_RESET_ACCTOTALS:
    843 	case RAIDFRAME_GET_ACCTOTALS:
    844 	case RAIDFRAME_KEEP_ACCTOTALS:
    845 	case RAIDFRAME_GET_SIZE:
    846 	case RAIDFRAME_FAIL_DISK:
    847 	case RAIDFRAME_COPYBACK:
    848 	case RAIDFRAME_CHECK_RECON_STATUS:
    849 	case RAIDFRAME_GET_COMPONENT_LABEL:
    850 	case RAIDFRAME_SET_COMPONENT_LABEL:
    851 	case RAIDFRAME_ADD_HOT_SPARE:
    852 	case RAIDFRAME_REMOVE_HOT_SPARE:
    853 	case RAIDFRAME_INIT_LABELS:
    854 	case RAIDFRAME_REBUILD_IN_PLACE:
    855 	case RAIDFRAME_CHECK_PARITY:
    856 	case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
    857 	case RAIDFRAME_CHECK_COPYBACK_STATUS:
    858 	case RAIDFRAME_SET_AUTOCONFIG:
    859 	case RAIDFRAME_SET_ROOT:
    860 		if ((rs->sc_flags & RAIDF_INITED) == 0)
    861 			return (ENXIO);
    862 	}
    863 
    864 	switch (cmd) {
    865 
    866 		/* configure the system */
    867 	case RAIDFRAME_CONFIGURE:
    868 
    869 		if (raidPtr->valid) {
    870 			/* There is a valid RAID set running on this unit! */
    871 			printf("raid%d: Device already configured!\n",unit);
    872 		}
    873 
    874 		/* copy-in the configuration information */
    875 		/* data points to a pointer to the configuration structure */
    876 
    877 		u_cfg = *((RF_Config_t **) data);
    878 		RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
    879 		if (k_cfg == NULL) {
    880 			return (ENOMEM);
    881 		}
    882 		retcode = copyin((caddr_t) u_cfg, (caddr_t) k_cfg,
    883 		    sizeof(RF_Config_t));
    884 		if (retcode) {
    885 			RF_Free(k_cfg, sizeof(RF_Config_t));
    886 			db1_printf(("rf_ioctl: retcode=%d copyin.1\n",
    887 				retcode));
    888 			return (retcode);
    889 		}
    890 		/* allocate a buffer for the layout-specific data, and copy it
    891 		 * in */
    892 		if (k_cfg->layoutSpecificSize) {
    893 			if (k_cfg->layoutSpecificSize > 10000) {
    894 				/* sanity check */
    895 				RF_Free(k_cfg, sizeof(RF_Config_t));
    896 				return (EINVAL);
    897 			}
    898 			RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
    899 			    (u_char *));
    900 			if (specific_buf == NULL) {
    901 				RF_Free(k_cfg, sizeof(RF_Config_t));
    902 				return (ENOMEM);
    903 			}
    904 			retcode = copyin(k_cfg->layoutSpecific,
    905 			    (caddr_t) specific_buf,
    906 			    k_cfg->layoutSpecificSize);
    907 			if (retcode) {
    908 				RF_Free(k_cfg, sizeof(RF_Config_t));
    909 				RF_Free(specific_buf,
    910 					k_cfg->layoutSpecificSize);
    911 				db1_printf(("rf_ioctl: retcode=%d copyin.2\n",
    912 					retcode));
    913 				return (retcode);
    914 			}
    915 		} else
    916 			specific_buf = NULL;
    917 		k_cfg->layoutSpecific = specific_buf;
    918 
    919 		/* should do some kind of sanity check on the configuration.
    920 		 * Store the sum of all the bytes in the last byte? */
    921 
    922 		/* configure the system */
    923 
    924 		/*
    925 		 * Clear the entire RAID descriptor, just to make sure
    926 		 *  there is no stale data left in the case of a
    927 		 *  reconfiguration
    928 		 */
    929 		bzero((char *) raidPtr, sizeof(RF_Raid_t));
    930 		raidPtr->raidid = unit;
    931 
    932 		retcode = rf_Configure(raidPtr, k_cfg, NULL);
    933 
    934 		if (retcode == 0) {
    935 
    936 			/* allow this many simultaneous IO's to
    937 			   this RAID device */
    938 			raidPtr->openings = RAIDOUTSTANDING;
    939 
    940 			retcode = raidinit(dev, raidPtr, unit);
    941 			rf_markalldirty( raidPtr );
    942 		}
    943 		/* free the buffers.  No return code here. */
    944 		if (k_cfg->layoutSpecificSize) {
    945 			RF_Free(specific_buf, k_cfg->layoutSpecificSize);
    946 		}
    947 		RF_Free(k_cfg, sizeof(RF_Config_t));
    948 
    949 		return (retcode);
    950 
    951 		/* shutdown the system */
    952 	case RAIDFRAME_SHUTDOWN:
    953 
    954 		if ((error = raidlock(rs)) != 0)
    955 			return (error);
    956 
    957 		/*
    958 		 * If somebody has a partition mounted, we shouldn't
    959 		 * shutdown.
    960 		 */
    961 
    962 		part = DISKPART(dev);
    963 		pmask = (1 << part);
    964 		if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
    965 		    ((rs->sc_dkdev.dk_bopenmask & pmask) &&
    966 			(rs->sc_dkdev.dk_copenmask & pmask))) {
    967 			raidunlock(rs);
    968 			return (EBUSY);
    969 		}
    970 
    971 		retcode = rf_Shutdown(raidPtr);
    972 
    973 		pool_destroy(&rs->sc_cbufpool);
    974 
    975 		/* It's no longer initialized... */
    976 		rs->sc_flags &= ~RAIDF_INITED;
    977 
    978 		/* Detach the disk. */
    979 		disk_detach(&rs->sc_dkdev);
    980 
    981 		raidunlock(rs);
    982 
    983 		return (retcode);
    984 	case RAIDFRAME_GET_COMPONENT_LABEL:
    985 		clabel_ptr = (RF_ComponentLabel_t **) data;
    986 		/* need to read the component label for the disk indicated
    987 		   by row,column in clabel */
    988 
    989 		/* For practice, let's get it directly fromdisk, rather
    990 		   than from the in-core copy */
    991 		RF_Malloc( clabel, sizeof( RF_ComponentLabel_t ),
    992 			   (RF_ComponentLabel_t *));
    993 		if (clabel == NULL)
    994 			return (ENOMEM);
    995 
    996 		bzero((char *) clabel, sizeof(RF_ComponentLabel_t));
    997 
    998 		retcode = copyin( *clabel_ptr, clabel,
    999 				  sizeof(RF_ComponentLabel_t));
   1000 
   1001 		if (retcode) {
   1002 			RF_Free( clabel, sizeof(RF_ComponentLabel_t));
   1003 			return(retcode);
   1004 		}
   1005 
   1006 		row = clabel->row;
   1007 		column = clabel->column;
   1008 
   1009 		if ((row < 0) || (row >= raidPtr->numRow) ||
   1010 		    (column < 0) || (column >= raidPtr->numCol)) {
   1011 			RF_Free( clabel, sizeof(RF_ComponentLabel_t));
   1012 			return(EINVAL);
   1013 		}
   1014 
   1015 		raidread_component_label(raidPtr->Disks[row][column].dev,
   1016 				raidPtr->raid_cinfo[row][column].ci_vp,
   1017 				clabel );
   1018 
   1019 		retcode = copyout((caddr_t) clabel,
   1020 				  (caddr_t) *clabel_ptr,
   1021 				  sizeof(RF_ComponentLabel_t));
   1022 		RF_Free( clabel, sizeof(RF_ComponentLabel_t));
   1023 		return (retcode);
   1024 
   1025 	case RAIDFRAME_SET_COMPONENT_LABEL:
   1026 		clabel = (RF_ComponentLabel_t *) data;
   1027 
   1028 		/* XXX check the label for valid stuff... */
   1029 		/* Note that some things *should not* get modified --
   1030 		   the user should be re-initing the labels instead of
   1031 		   trying to patch things.
   1032 		   */
   1033 
   1034 		printf("Got component label:\n");
   1035 		printf("Version: %d\n",clabel->version);
   1036 		printf("Serial Number: %d\n",clabel->serial_number);
   1037 		printf("Mod counter: %d\n",clabel->mod_counter);
   1038 		printf("Row: %d\n", clabel->row);
   1039 		printf("Column: %d\n", clabel->column);
   1040 		printf("Num Rows: %d\n", clabel->num_rows);
   1041 		printf("Num Columns: %d\n", clabel->num_columns);
   1042 		printf("Clean: %d\n", clabel->clean);
   1043 		printf("Status: %d\n", clabel->status);
   1044 
   1045 		row = clabel->row;
   1046 		column = clabel->column;
   1047 
   1048 		if ((row < 0) || (row >= raidPtr->numRow) ||
   1049 		    (column < 0) || (column >= raidPtr->numCol)) {
   1050 			return(EINVAL);
   1051 		}
   1052 
   1053 		/* XXX this isn't allowed to do anything for now :-) */
   1054 
   1055 		/* XXX and before it is, we need to fill in the rest
   1056 		   of the fields!?!?!?! */
   1057 #if 0
   1058 		raidwrite_component_label(
   1059                             raidPtr->Disks[row][column].dev,
   1060 			    raidPtr->raid_cinfo[row][column].ci_vp,
   1061 			    clabel );
   1062 #endif
   1063 		return (0);
   1064 
   1065 	case RAIDFRAME_INIT_LABELS:
   1066 		clabel = (RF_ComponentLabel_t *) data;
   1067 		/*
   1068 		   we only want the serial number from
   1069 		   the above.  We get all the rest of the information
   1070 		   from the config that was used to create this RAID
   1071 		   set.
   1072 		   */
   1073 
   1074 		raidPtr->serial_number = clabel->serial_number;
   1075 
   1076 		raid_init_component_label(raidPtr, &ci_label);
   1077 		ci_label.serial_number = clabel->serial_number;
   1078 
   1079 		for(row=0;row<raidPtr->numRow;row++) {
   1080 			ci_label.row = row;
   1081 			for(column=0;column<raidPtr->numCol;column++) {
   1082 				diskPtr = &raidPtr->Disks[row][column];
   1083 				ci_label.partitionSize = diskPtr->partitionSize;
   1084 				ci_label.column = column;
   1085 				raidwrite_component_label(
   1086 				  raidPtr->Disks[row][column].dev,
   1087 				  raidPtr->raid_cinfo[row][column].ci_vp,
   1088 				  &ci_label );
   1089 			}
   1090 		}
   1091 
   1092 		return (retcode);
   1093 	case RAIDFRAME_SET_AUTOCONFIG:
   1094 		d = rf_set_autoconfig(raidPtr, *data);
   1095 		printf("New autoconfig value is: %d\n", d);
   1096 		*data = d;
   1097 		return (retcode);
   1098 
   1099 	case RAIDFRAME_SET_ROOT:
   1100 		d = rf_set_rootpartition(raidPtr, *data);
   1101 		printf("New rootpartition value is: %d\n", d);
   1102 		*data = d;
   1103 		return (retcode);
   1104 
   1105 		/* initialize all parity */
   1106 	case RAIDFRAME_REWRITEPARITY:
   1107 
   1108 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1109 			/* Parity for RAID 0 is trivially correct */
   1110 			raidPtr->parity_good = RF_RAID_CLEAN;
   1111 			return(0);
   1112 		}
   1113 
   1114 		if (raidPtr->parity_rewrite_in_progress == 1) {
   1115 			/* Re-write is already in progress! */
   1116 			return(EINVAL);
   1117 		}
   1118 
   1119 		retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
   1120 					   rf_RewriteParityThread,
   1121 					   raidPtr,"raid_parity");
   1122 		return (retcode);
   1123 
   1124 
   1125 	case RAIDFRAME_ADD_HOT_SPARE:
   1126 		sparePtr = (RF_SingleComponent_t *) data;
   1127 		memcpy( &hot_spare, sparePtr, sizeof(RF_SingleComponent_t));
   1128 		printf("Adding spare\n");
   1129 		retcode = rf_add_hot_spare(raidPtr, &hot_spare);
   1130 		return(retcode);
   1131 
   1132 	case RAIDFRAME_REMOVE_HOT_SPARE:
   1133 		return(retcode);
   1134 
   1135 	case RAIDFRAME_REBUILD_IN_PLACE:
   1136 
   1137 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1138 			/* Can't do this on a RAID 0!! */
   1139 			return(EINVAL);
   1140 		}
   1141 
   1142 		if (raidPtr->recon_in_progress == 1) {
   1143 			/* a reconstruct is already in progress! */
   1144 			return(EINVAL);
   1145 		}
   1146 
   1147 		componentPtr = (RF_SingleComponent_t *) data;
   1148 		memcpy( &component, componentPtr,
   1149 			sizeof(RF_SingleComponent_t));
   1150 		row = component.row;
   1151 		column = component.column;
   1152 		printf("Rebuild: %d %d\n",row, column);
   1153 		if ((row < 0) || (row >= raidPtr->numRow) ||
   1154 		    (column < 0) || (column >= raidPtr->numCol)) {
   1155 			return(EINVAL);
   1156 		}
   1157 
   1158 		RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
   1159 		if (rrcopy == NULL)
   1160 			return(ENOMEM);
   1161 
   1162 		rrcopy->raidPtr = (void *) raidPtr;
   1163 		rrcopy->row = row;
   1164 		rrcopy->col = column;
   1165 
   1166 		retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
   1167 					   rf_ReconstructInPlaceThread,
   1168 					   rrcopy,"raid_reconip");
   1169 		return(retcode);
   1170 
   1171 	case RAIDFRAME_GET_INFO:
   1172 		if (!raidPtr->valid)
   1173 			return (ENODEV);
   1174 		ucfgp = (RF_DeviceConfig_t **) data;
   1175 		RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t),
   1176 			  (RF_DeviceConfig_t *));
   1177 		if (d_cfg == NULL)
   1178 			return (ENOMEM);
   1179 		bzero((char *) d_cfg, sizeof(RF_DeviceConfig_t));
   1180 		d_cfg->rows = raidPtr->numRow;
   1181 		d_cfg->cols = raidPtr->numCol;
   1182 		d_cfg->ndevs = raidPtr->numRow * raidPtr->numCol;
   1183 		if (d_cfg->ndevs >= RF_MAX_DISKS) {
   1184 			RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
   1185 			return (ENOMEM);
   1186 		}
   1187 		d_cfg->nspares = raidPtr->numSpare;
   1188 		if (d_cfg->nspares >= RF_MAX_DISKS) {
   1189 			RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
   1190 			return (ENOMEM);
   1191 		}
   1192 		d_cfg->maxqdepth = raidPtr->maxQueueDepth;
   1193 		d = 0;
   1194 		for (i = 0; i < d_cfg->rows; i++) {
   1195 			for (j = 0; j < d_cfg->cols; j++) {
   1196 				d_cfg->devs[d] = raidPtr->Disks[i][j];
   1197 				d++;
   1198 			}
   1199 		}
   1200 		for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) {
   1201 			d_cfg->spares[i] = raidPtr->Disks[0][j];
   1202 		}
   1203 		retcode = copyout((caddr_t) d_cfg, (caddr_t) * ucfgp,
   1204 				  sizeof(RF_DeviceConfig_t));
   1205 		RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
   1206 
   1207 		return (retcode);
   1208 
   1209 	case RAIDFRAME_CHECK_PARITY:
   1210 		*(int *) data = raidPtr->parity_good;
   1211 		return (0);
   1212 
   1213 	case RAIDFRAME_RESET_ACCTOTALS:
   1214 		bzero(&raidPtr->acc_totals, sizeof(raidPtr->acc_totals));
   1215 		return (0);
   1216 
   1217 	case RAIDFRAME_GET_ACCTOTALS:
   1218 		totals = (RF_AccTotals_t *) data;
   1219 		*totals = raidPtr->acc_totals;
   1220 		return (0);
   1221 
   1222 	case RAIDFRAME_KEEP_ACCTOTALS:
   1223 		raidPtr->keep_acc_totals = *(int *)data;
   1224 		return (0);
   1225 
   1226 	case RAIDFRAME_GET_SIZE:
   1227 		*(int *) data = raidPtr->totalSectors;
   1228 		return (0);
   1229 
   1230 		/* fail a disk & optionally start reconstruction */
   1231 	case RAIDFRAME_FAIL_DISK:
   1232 
   1233 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1234 			/* Can't do this on a RAID 0!! */
   1235 			return(EINVAL);
   1236 		}
   1237 
   1238 		rr = (struct rf_recon_req *) data;
   1239 
   1240 		if (rr->row < 0 || rr->row >= raidPtr->numRow
   1241 		    || rr->col < 0 || rr->col >= raidPtr->numCol)
   1242 			return (EINVAL);
   1243 
   1244 		printf("raid%d: Failing the disk: row: %d col: %d\n",
   1245 		       unit, rr->row, rr->col);
   1246 
   1247 		/* make a copy of the recon request so that we don't rely on
   1248 		 * the user's buffer */
   1249 		RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
   1250 		if (rrcopy == NULL)
   1251 			return(ENOMEM);
   1252 		bcopy(rr, rrcopy, sizeof(*rr));
   1253 		rrcopy->raidPtr = (void *) raidPtr;
   1254 
   1255 		retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
   1256 					   rf_ReconThread,
   1257 					   rrcopy,"raid_recon");
   1258 		return (0);
   1259 
   1260 		/* invoke a copyback operation after recon on whatever disk
   1261 		 * needs it, if any */
   1262 	case RAIDFRAME_COPYBACK:
   1263 
   1264 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1265 			/* This makes no sense on a RAID 0!! */
   1266 			return(EINVAL);
   1267 		}
   1268 
   1269 		if (raidPtr->copyback_in_progress == 1) {
   1270 			/* Copyback is already in progress! */
   1271 			return(EINVAL);
   1272 		}
   1273 
   1274 		retcode = RF_CREATE_THREAD(raidPtr->copyback_thread,
   1275 					   rf_CopybackThread,
   1276 					   raidPtr,"raid_copyback");
   1277 		return (retcode);
   1278 
   1279 		/* return the percentage completion of reconstruction */
   1280 	case RAIDFRAME_CHECK_RECON_STATUS:
   1281 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1282 			/* This makes no sense on a RAID 0 */
   1283 			return(EINVAL);
   1284 		}
   1285 		row = 0; /* XXX we only consider a single row... */
   1286 		if (raidPtr->status[row] != rf_rs_reconstructing)
   1287 			*(int *) data = 100;
   1288 		else
   1289 			*(int *) data = raidPtr->reconControl[row]->percentComplete;
   1290 		return (0);
   1291 
   1292 	case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
   1293 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1294 			/* This makes no sense on a RAID 0 */
   1295 			return(EINVAL);
   1296 		}
   1297 		if (raidPtr->parity_rewrite_in_progress == 1) {
   1298 			*(int *) data = 100 * raidPtr->parity_rewrite_stripes_done / raidPtr->Layout.numStripe;
   1299 		} else {
   1300 			*(int *) data = 100;
   1301 		}
   1302 		return (0);
   1303 
   1304 	case RAIDFRAME_CHECK_COPYBACK_STATUS:
   1305 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1306 			/* This makes no sense on a RAID 0 */
   1307 			return(EINVAL);
   1308 		}
   1309 		if (raidPtr->copyback_in_progress == 1) {
   1310 			*(int *) data = 100 * raidPtr->copyback_stripes_done /
   1311 				raidPtr->Layout.numStripe;
   1312 		} else {
   1313 			*(int *) data = 100;
   1314 		}
   1315 		return (0);
   1316 
   1317 
   1318 		/* the sparetable daemon calls this to wait for the kernel to
   1319 		 * need a spare table. this ioctl does not return until a
   1320 		 * spare table is needed. XXX -- calling mpsleep here in the
   1321 		 * ioctl code is almost certainly wrong and evil. -- XXX XXX
   1322 		 * -- I should either compute the spare table in the kernel,
   1323 		 * or have a different -- XXX XXX -- interface (a different
   1324 		 * character device) for delivering the table     -- XXX */
   1325 #if 0
   1326 	case RAIDFRAME_SPARET_WAIT:
   1327 		RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1328 		while (!rf_sparet_wait_queue)
   1329 			mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE);
   1330 		waitreq = rf_sparet_wait_queue;
   1331 		rf_sparet_wait_queue = rf_sparet_wait_queue->next;
   1332 		RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1333 
   1334 		/* structure assignment */
   1335 		*((RF_SparetWait_t *) data) = *waitreq;
   1336 
   1337 		RF_Free(waitreq, sizeof(*waitreq));
   1338 		return (0);
   1339 
   1340 		/* wakes up a process waiting on SPARET_WAIT and puts an error
   1341 		 * code in it that will cause the dameon to exit */
   1342 	case RAIDFRAME_ABORT_SPARET_WAIT:
   1343 		RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
   1344 		waitreq->fcol = -1;
   1345 		RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1346 		waitreq->next = rf_sparet_wait_queue;
   1347 		rf_sparet_wait_queue = waitreq;
   1348 		RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1349 		wakeup(&rf_sparet_wait_queue);
   1350 		return (0);
   1351 
   1352 		/* used by the spare table daemon to deliver a spare table
   1353 		 * into the kernel */
   1354 	case RAIDFRAME_SEND_SPARET:
   1355 
   1356 		/* install the spare table */
   1357 		retcode = rf_SetSpareTable(raidPtr, *(void **) data);
   1358 
   1359 		/* respond to the requestor.  the return status of the spare
   1360 		 * table installation is passed in the "fcol" field */
   1361 		RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
   1362 		waitreq->fcol = retcode;
   1363 		RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1364 		waitreq->next = rf_sparet_resp_queue;
   1365 		rf_sparet_resp_queue = waitreq;
   1366 		wakeup(&rf_sparet_resp_queue);
   1367 		RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1368 
   1369 		return (retcode);
   1370 #endif
   1371 
   1372 	default:
   1373 		break; /* fall through to the os-specific code below */
   1374 
   1375 	}
   1376 
   1377 	if (!raidPtr->valid)
   1378 		return (EINVAL);
   1379 
   1380 	/*
   1381 	 * Add support for "regular" device ioctls here.
   1382 	 */
   1383 
   1384 	switch (cmd) {
   1385 	case DIOCGDINFO:
   1386 		*(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
   1387 		break;
   1388 
   1389 	case DIOCGPART:
   1390 		((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
   1391 		((struct partinfo *) data)->part =
   1392 		    &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
   1393 		break;
   1394 
   1395 	case DIOCWDINFO:
   1396 	case DIOCSDINFO:
   1397 		if ((error = raidlock(rs)) != 0)
   1398 			return (error);
   1399 
   1400 		rs->sc_flags |= RAIDF_LABELLING;
   1401 
   1402 		error = setdisklabel(rs->sc_dkdev.dk_label,
   1403 		    (struct disklabel *) data, 0, rs->sc_dkdev.dk_cpulabel);
   1404 		if (error == 0) {
   1405 			if (cmd == DIOCWDINFO)
   1406 				error = writedisklabel(RAIDLABELDEV(dev),
   1407 				    raidstrategy, rs->sc_dkdev.dk_label,
   1408 				    rs->sc_dkdev.dk_cpulabel);
   1409 		}
   1410 		rs->sc_flags &= ~RAIDF_LABELLING;
   1411 
   1412 		raidunlock(rs);
   1413 
   1414 		if (error)
   1415 			return (error);
   1416 		break;
   1417 
   1418 	case DIOCWLABEL:
   1419 		if (*(int *) data != 0)
   1420 			rs->sc_flags |= RAIDF_WLABEL;
   1421 		else
   1422 			rs->sc_flags &= ~RAIDF_WLABEL;
   1423 		break;
   1424 
   1425 	case DIOCGDEFLABEL:
   1426 		raidgetdefaultlabel(raidPtr, rs,
   1427 		    (struct disklabel *) data);
   1428 		break;
   1429 
   1430 	default:
   1431 		retcode = ENOTTY;
   1432 	}
   1433 	return (retcode);
   1434 
   1435 }
   1436 
   1437 
   1438 /* raidinit -- complete the rest of the initialization for the
   1439    RAIDframe device.  */
   1440 
   1441 
   1442 static int
   1443 raidinit(dev, raidPtr, unit)
   1444 	dev_t   dev;
   1445 	RF_Raid_t *raidPtr;
   1446 	int     unit;
   1447 {
   1448 	int     retcode;
   1449 	struct raid_softc *rs;
   1450 
   1451 	retcode = 0;
   1452 
   1453 	rs = &raid_softc[unit];
   1454 	pool_init(&rs->sc_cbufpool, sizeof(struct raidbuf), 0,
   1455 		  0, 0, "raidpl", 0, NULL, NULL, M_RAIDFRAME);
   1456 
   1457 
   1458 	/* XXX should check return code first... */
   1459 	rs->sc_flags |= RAIDF_INITED;
   1460 
   1461 	sprintf(rs->sc_xname, "raid%d", unit);	/* XXX doesn't check bounds. */
   1462 
   1463 	rs->sc_dkdev.dk_name = rs->sc_xname;
   1464 
   1465 	/* disk_attach actually creates space for the CPU disklabel, among
   1466 	 * other things, so it's critical to call this *BEFORE* we try putzing
   1467 	 * with disklabels. */
   1468 
   1469 	disk_attach(&rs->sc_dkdev);
   1470 
   1471 	/* XXX There may be a weird interaction here between this, and
   1472 	 * protectedSectors, as used in RAIDframe.  */
   1473 
   1474 	rs->sc_size = raidPtr->totalSectors;
   1475 	rs->sc_dev = dev;
   1476 
   1477 	return (retcode);
   1478 }
   1479 
   1480 /* wake up the daemon & tell it to get us a spare table
   1481  * XXX
   1482  * the entries in the queues should be tagged with the raidPtr
   1483  * so that in the extremely rare case that two recons happen at once,
   1484  * we know for which device were requesting a spare table
   1485  * XXX
   1486  *
   1487  * XXX This code is not currently used. GO
   1488  */
   1489 int
   1490 rf_GetSpareTableFromDaemon(req)
   1491 	RF_SparetWait_t *req;
   1492 {
   1493 	int     retcode;
   1494 
   1495 	RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1496 	req->next = rf_sparet_wait_queue;
   1497 	rf_sparet_wait_queue = req;
   1498 	wakeup(&rf_sparet_wait_queue);
   1499 
   1500 	/* mpsleep unlocks the mutex */
   1501 	while (!rf_sparet_resp_queue) {
   1502 		tsleep(&rf_sparet_resp_queue, PRIBIO,
   1503 		    "raidframe getsparetable", 0);
   1504 	}
   1505 	req = rf_sparet_resp_queue;
   1506 	rf_sparet_resp_queue = req->next;
   1507 	RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1508 
   1509 	retcode = req->fcol;
   1510 	RF_Free(req, sizeof(*req));	/* this is not the same req as we
   1511 					 * alloc'd */
   1512 	return (retcode);
   1513 }
   1514 
   1515 /* a wrapper around rf_DoAccess that extracts appropriate info from the
   1516  * bp & passes it down.
   1517  * any calls originating in the kernel must use non-blocking I/O
   1518  * do some extra sanity checking to return "appropriate" error values for
   1519  * certain conditions (to make some standard utilities work)
   1520  *
   1521  * Formerly known as: rf_DoAccessKernel
   1522  */
   1523 void
   1524 raidstart(raidPtr)
   1525 	RF_Raid_t *raidPtr;
   1526 {
   1527 	RF_SectorCount_t num_blocks, pb, sum;
   1528 	RF_RaidAddr_t raid_addr;
   1529 	int     retcode;
   1530 	struct partition *pp;
   1531 	daddr_t blocknum;
   1532 	int     unit;
   1533 	struct raid_softc *rs;
   1534 	int     do_async;
   1535 	struct buf *bp;
   1536 
   1537 	unit = raidPtr->raidid;
   1538 	rs = &raid_softc[unit];
   1539 
   1540 	/* Check to see if we're at the limit... */
   1541 	RF_LOCK_MUTEX(raidPtr->mutex);
   1542 	while (raidPtr->openings > 0) {
   1543 		RF_UNLOCK_MUTEX(raidPtr->mutex);
   1544 
   1545 		/* get the next item, if any, from the queue */
   1546 		if ((bp = BUFQ_FIRST(&rs->buf_queue)) == NULL) {
   1547 			/* nothing more to do */
   1548 			return;
   1549 		}
   1550 		BUFQ_REMOVE(&rs->buf_queue, bp);
   1551 
   1552 		/* Ok, for the bp we have here, bp->b_blkno is relative to the
   1553 		 * partition.. Need to make it absolute to the underlying
   1554 		 * device.. */
   1555 
   1556 		blocknum = bp->b_blkno;
   1557 		if (DISKPART(bp->b_dev) != RAW_PART) {
   1558 			pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
   1559 			blocknum += pp->p_offset;
   1560 		}
   1561 
   1562 		db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
   1563 			    (int) blocknum));
   1564 
   1565 		db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
   1566 		db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
   1567 
   1568 		/* *THIS* is where we adjust what block we're going to...
   1569 		 * but DO NOT TOUCH bp->b_blkno!!! */
   1570 		raid_addr = blocknum;
   1571 
   1572 		num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
   1573 		pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
   1574 		sum = raid_addr + num_blocks + pb;
   1575 		if (1 || rf_debugKernelAccess) {
   1576 			db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
   1577 				    (int) raid_addr, (int) sum, (int) num_blocks,
   1578 				    (int) pb, (int) bp->b_resid));
   1579 		}
   1580 		if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
   1581 		    || (sum < num_blocks) || (sum < pb)) {
   1582 			bp->b_error = ENOSPC;
   1583 			bp->b_flags |= B_ERROR;
   1584 			bp->b_resid = bp->b_bcount;
   1585 			biodone(bp);
   1586 			RF_LOCK_MUTEX(raidPtr->mutex);
   1587 			continue;
   1588 		}
   1589 		/*
   1590 		 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
   1591 		 */
   1592 
   1593 		if (bp->b_bcount & raidPtr->sectorMask) {
   1594 			bp->b_error = EINVAL;
   1595 			bp->b_flags |= B_ERROR;
   1596 			bp->b_resid = bp->b_bcount;
   1597 			biodone(bp);
   1598 			RF_LOCK_MUTEX(raidPtr->mutex);
   1599 			continue;
   1600 
   1601 		}
   1602 		db1_printf(("Calling DoAccess..\n"));
   1603 
   1604 
   1605 		RF_LOCK_MUTEX(raidPtr->mutex);
   1606 		raidPtr->openings--;
   1607 		RF_UNLOCK_MUTEX(raidPtr->mutex);
   1608 
   1609 		/*
   1610 		 * Everything is async.
   1611 		 */
   1612 		do_async = 1;
   1613 
   1614 		/* don't ever condition on bp->b_flags & B_WRITE.
   1615 		 * always condition on B_READ instead */
   1616 
   1617 		/* XXX we're still at splbio() here... do we *really*
   1618 		   need to be? */
   1619 
   1620 
   1621 		retcode = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
   1622 				      RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
   1623 				      do_async, raid_addr, num_blocks,
   1624 				      bp->b_un.b_addr, bp, NULL, NULL,
   1625 				      RF_DAG_NONBLOCKING_IO, NULL, NULL, NULL);
   1626 
   1627 
   1628 		RF_LOCK_MUTEX(raidPtr->mutex);
   1629 	}
   1630 	RF_UNLOCK_MUTEX(raidPtr->mutex);
   1631 }
   1632 
   1633 
   1634 
   1635 
   1636 /* invoke an I/O from kernel mode.  Disk queue should be locked upon entry */
   1637 
   1638 int
   1639 rf_DispatchKernelIO(queue, req)
   1640 	RF_DiskQueue_t *queue;
   1641 	RF_DiskQueueData_t *req;
   1642 {
   1643 	int     op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
   1644 	struct buf *bp;
   1645 	struct raidbuf *raidbp = NULL;
   1646 	struct raid_softc *rs;
   1647 	int     unit;
   1648 	int s;
   1649 
   1650 	s=0;
   1651 	/* s = splbio();*/ /* want to test this */
   1652 	/* XXX along with the vnode, we also need the softc associated with
   1653 	 * this device.. */
   1654 
   1655 	req->queue = queue;
   1656 
   1657 	unit = queue->raidPtr->raidid;
   1658 
   1659 	db1_printf(("DispatchKernelIO unit: %d\n", unit));
   1660 
   1661 	if (unit >= numraid) {
   1662 		printf("Invalid unit number: %d %d\n", unit, numraid);
   1663 		panic("Invalid Unit number in rf_DispatchKernelIO\n");
   1664 	}
   1665 	rs = &raid_softc[unit];
   1666 
   1667 	/* XXX is this the right place? */
   1668 	disk_busy(&rs->sc_dkdev);
   1669 
   1670 	bp = req->bp;
   1671 #if 1
   1672 	/* XXX when there is a physical disk failure, someone is passing us a
   1673 	 * buffer that contains old stuff!!  Attempt to deal with this problem
   1674 	 * without taking a performance hit... (not sure where the real bug
   1675 	 * is.  It's buried in RAIDframe somewhere) :-(  GO ) */
   1676 
   1677 	if (bp->b_flags & B_ERROR) {
   1678 		bp->b_flags &= ~B_ERROR;
   1679 	}
   1680 	if (bp->b_error != 0) {
   1681 		bp->b_error = 0;
   1682 	}
   1683 #endif
   1684 	raidbp = RAIDGETBUF(rs);
   1685 
   1686 	raidbp->rf_flags = 0;	/* XXX not really used anywhere... */
   1687 
   1688 	/*
   1689 	 * context for raidiodone
   1690 	 */
   1691 	raidbp->rf_obp = bp;
   1692 	raidbp->req = req;
   1693 
   1694 	LIST_INIT(&raidbp->rf_buf.b_dep);
   1695 
   1696 	switch (req->type) {
   1697 	case RF_IO_TYPE_NOP:	/* used primarily to unlock a locked queue */
   1698 		/* XXX need to do something extra here.. */
   1699 		/* I'm leaving this in, as I've never actually seen it used,
   1700 		 * and I'd like folks to report it... GO */
   1701 		printf(("WAKEUP CALLED\n"));
   1702 		queue->numOutstanding++;
   1703 
   1704 		/* XXX need to glue the original buffer into this??  */
   1705 
   1706 		KernelWakeupFunc(&raidbp->rf_buf);
   1707 		break;
   1708 
   1709 	case RF_IO_TYPE_READ:
   1710 	case RF_IO_TYPE_WRITE:
   1711 
   1712 		if (req->tracerec) {
   1713 			RF_ETIMER_START(req->tracerec->timer);
   1714 		}
   1715 		InitBP(&raidbp->rf_buf, queue->rf_cinfo->ci_vp,
   1716 		    op | bp->b_flags, queue->rf_cinfo->ci_dev,
   1717 		    req->sectorOffset, req->numSector,
   1718 		    req->buf, KernelWakeupFunc, (void *) req,
   1719 		    queue->raidPtr->logBytesPerSector, req->b_proc);
   1720 
   1721 		if (rf_debugKernelAccess) {
   1722 			db1_printf(("dispatch: bp->b_blkno = %ld\n",
   1723 				(long) bp->b_blkno));
   1724 		}
   1725 		queue->numOutstanding++;
   1726 		queue->last_deq_sector = req->sectorOffset;
   1727 		/* acc wouldn't have been let in if there were any pending
   1728 		 * reqs at any other priority */
   1729 		queue->curPriority = req->priority;
   1730 
   1731 		db1_printf(("Going for %c to unit %d row %d col %d\n",
   1732 			req->type, unit, queue->row, queue->col));
   1733 		db1_printf(("sector %d count %d (%d bytes) %d\n",
   1734 			(int) req->sectorOffset, (int) req->numSector,
   1735 			(int) (req->numSector <<
   1736 			    queue->raidPtr->logBytesPerSector),
   1737 			(int) queue->raidPtr->logBytesPerSector));
   1738 		if ((raidbp->rf_buf.b_flags & B_READ) == 0) {
   1739 			raidbp->rf_buf.b_vp->v_numoutput++;
   1740 		}
   1741 		VOP_STRATEGY(&raidbp->rf_buf);
   1742 
   1743 		break;
   1744 
   1745 	default:
   1746 		panic("bad req->type in rf_DispatchKernelIO");
   1747 	}
   1748 	db1_printf(("Exiting from DispatchKernelIO\n"));
   1749 	/* splx(s); */ /* want to test this */
   1750 	return (0);
   1751 }
   1752 /* this is the callback function associated with a I/O invoked from
   1753    kernel code.
   1754  */
   1755 static void
   1756 KernelWakeupFunc(vbp)
   1757 	struct buf *vbp;
   1758 {
   1759 	RF_DiskQueueData_t *req = NULL;
   1760 	RF_DiskQueue_t *queue;
   1761 	struct raidbuf *raidbp = (struct raidbuf *) vbp;
   1762 	struct buf *bp;
   1763 	struct raid_softc *rs;
   1764 	int     unit;
   1765 	register int s;
   1766 
   1767 	s = splbio();
   1768 	db1_printf(("recovering the request queue:\n"));
   1769 	req = raidbp->req;
   1770 
   1771 	bp = raidbp->rf_obp;
   1772 
   1773 	queue = (RF_DiskQueue_t *) req->queue;
   1774 
   1775 	if (raidbp->rf_buf.b_flags & B_ERROR) {
   1776 		bp->b_flags |= B_ERROR;
   1777 		bp->b_error = raidbp->rf_buf.b_error ?
   1778 		    raidbp->rf_buf.b_error : EIO;
   1779 	}
   1780 
   1781 	/* XXX methinks this could be wrong... */
   1782 #if 1
   1783 	bp->b_resid = raidbp->rf_buf.b_resid;
   1784 #endif
   1785 
   1786 	if (req->tracerec) {
   1787 		RF_ETIMER_STOP(req->tracerec->timer);
   1788 		RF_ETIMER_EVAL(req->tracerec->timer);
   1789 		RF_LOCK_MUTEX(rf_tracing_mutex);
   1790 		req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
   1791 		req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
   1792 		req->tracerec->num_phys_ios++;
   1793 		RF_UNLOCK_MUTEX(rf_tracing_mutex);
   1794 	}
   1795 	bp->b_bcount = raidbp->rf_buf.b_bcount;	/* XXXX ?? */
   1796 
   1797 	unit = queue->raidPtr->raidid;	/* *Much* simpler :-> */
   1798 
   1799 
   1800 	/* XXX Ok, let's get aggressive... If B_ERROR is set, let's go
   1801 	 * ballistic, and mark the component as hosed... */
   1802 
   1803 	if (bp->b_flags & B_ERROR) {
   1804 		/* Mark the disk as dead */
   1805 		/* but only mark it once... */
   1806 		if (queue->raidPtr->Disks[queue->row][queue->col].status ==
   1807 		    rf_ds_optimal) {
   1808 			printf("raid%d: IO Error.  Marking %s as failed.\n",
   1809 			    unit, queue->raidPtr->Disks[queue->row][queue->col].devname);
   1810 			queue->raidPtr->Disks[queue->row][queue->col].status =
   1811 			    rf_ds_failed;
   1812 			queue->raidPtr->status[queue->row] = rf_rs_degraded;
   1813 			queue->raidPtr->numFailures++;
   1814 			/* XXX here we should bump the version number for each component, and write that data out */
   1815 		} else {	/* Disk is already dead... */
   1816 			/* printf("Disk already marked as dead!\n"); */
   1817 		}
   1818 
   1819 	}
   1820 
   1821 	rs = &raid_softc[unit];
   1822 	RAIDPUTBUF(rs, raidbp);
   1823 
   1824 
   1825 	if (bp->b_resid == 0) {
   1826 		/* XXX is this the right place for a disk_unbusy()??!??!?!? */
   1827 		disk_unbusy(&rs->sc_dkdev, (bp->b_bcount - bp->b_resid));
   1828 	}
   1829 
   1830 	rf_DiskIOComplete(queue, req, (bp->b_flags & B_ERROR) ? 1 : 0);
   1831 	(req->CompleteFunc) (req->argument, (bp->b_flags & B_ERROR) ? 1 : 0);
   1832 
   1833 	splx(s);
   1834 }
   1835 
   1836 
   1837 
   1838 /*
   1839  * initialize a buf structure for doing an I/O in the kernel.
   1840  */
   1841 static void
   1842 InitBP(
   1843     struct buf * bp,
   1844     struct vnode * b_vp,
   1845     unsigned rw_flag,
   1846     dev_t dev,
   1847     RF_SectorNum_t startSect,
   1848     RF_SectorCount_t numSect,
   1849     caddr_t buf,
   1850     void (*cbFunc) (struct buf *),
   1851     void *cbArg,
   1852     int logBytesPerSector,
   1853     struct proc * b_proc)
   1854 {
   1855 	/* bp->b_flags       = B_PHYS | rw_flag; */
   1856 	bp->b_flags = B_CALL | rw_flag;	/* XXX need B_PHYS here too??? */
   1857 	bp->b_bcount = numSect << logBytesPerSector;
   1858 	bp->b_bufsize = bp->b_bcount;
   1859 	bp->b_error = 0;
   1860 	bp->b_dev = dev;
   1861 	bp->b_un.b_addr = buf;
   1862 	bp->b_blkno = startSect;
   1863 	bp->b_resid = bp->b_bcount;	/* XXX is this right!??!?!! */
   1864 	if (bp->b_bcount == 0) {
   1865 		panic("bp->b_bcount is zero in InitBP!!\n");
   1866 	}
   1867 	bp->b_proc = b_proc;
   1868 	bp->b_iodone = cbFunc;
   1869 	bp->b_vp = b_vp;
   1870 
   1871 }
   1872 
   1873 static void
   1874 raidgetdefaultlabel(raidPtr, rs, lp)
   1875 	RF_Raid_t *raidPtr;
   1876 	struct raid_softc *rs;
   1877 	struct disklabel *lp;
   1878 {
   1879 	db1_printf(("Building a default label...\n"));
   1880 	bzero(lp, sizeof(*lp));
   1881 
   1882 	/* fabricate a label... */
   1883 	lp->d_secperunit = raidPtr->totalSectors;
   1884 	lp->d_secsize = raidPtr->bytesPerSector;
   1885 	lp->d_nsectors = raidPtr->Layout.dataSectorsPerStripe;
   1886 	lp->d_ntracks = 1;
   1887 	lp->d_ncylinders = raidPtr->totalSectors /
   1888 		(lp->d_nsectors * lp->d_ntracks);
   1889 	lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
   1890 
   1891 	strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
   1892 	lp->d_type = DTYPE_RAID;
   1893 	strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
   1894 	lp->d_rpm = 3600;
   1895 	lp->d_interleave = 1;
   1896 	lp->d_flags = 0;
   1897 
   1898 	lp->d_partitions[RAW_PART].p_offset = 0;
   1899 	lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors;
   1900 	lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
   1901 	lp->d_npartitions = RAW_PART + 1;
   1902 
   1903 	lp->d_magic = DISKMAGIC;
   1904 	lp->d_magic2 = DISKMAGIC;
   1905 	lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
   1906 
   1907 }
   1908 /*
   1909  * Read the disklabel from the raid device.  If one is not present, fake one
   1910  * up.
   1911  */
   1912 static void
   1913 raidgetdisklabel(dev)
   1914 	dev_t   dev;
   1915 {
   1916 	int     unit = raidunit(dev);
   1917 	struct raid_softc *rs = &raid_softc[unit];
   1918 	char   *errstring;
   1919 	struct disklabel *lp = rs->sc_dkdev.dk_label;
   1920 	struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel;
   1921 	RF_Raid_t *raidPtr;
   1922 
   1923 	db1_printf(("Getting the disklabel...\n"));
   1924 
   1925 	bzero(clp, sizeof(*clp));
   1926 
   1927 	raidPtr = raidPtrs[unit];
   1928 
   1929 	raidgetdefaultlabel(raidPtr, rs, lp);
   1930 
   1931 	/*
   1932 	 * Call the generic disklabel extraction routine.
   1933 	 */
   1934 	errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
   1935 	    rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
   1936 	if (errstring)
   1937 		raidmakedisklabel(rs);
   1938 	else {
   1939 		int     i;
   1940 		struct partition *pp;
   1941 
   1942 		/*
   1943 		 * Sanity check whether the found disklabel is valid.
   1944 		 *
   1945 		 * This is necessary since total size of the raid device
   1946 		 * may vary when an interleave is changed even though exactly
   1947 		 * same componets are used, and old disklabel may used
   1948 		 * if that is found.
   1949 		 */
   1950 		if (lp->d_secperunit != rs->sc_size)
   1951 			printf("WARNING: %s: "
   1952 			    "total sector size in disklabel (%d) != "
   1953 			    "the size of raid (%ld)\n", rs->sc_xname,
   1954 			    lp->d_secperunit, (long) rs->sc_size);
   1955 		for (i = 0; i < lp->d_npartitions; i++) {
   1956 			pp = &lp->d_partitions[i];
   1957 			if (pp->p_offset + pp->p_size > rs->sc_size)
   1958 				printf("WARNING: %s: end of partition `%c' "
   1959 				    "exceeds the size of raid (%ld)\n",
   1960 				    rs->sc_xname, 'a' + i, (long) rs->sc_size);
   1961 		}
   1962 	}
   1963 
   1964 }
   1965 /*
   1966  * Take care of things one might want to take care of in the event
   1967  * that a disklabel isn't present.
   1968  */
   1969 static void
   1970 raidmakedisklabel(rs)
   1971 	struct raid_softc *rs;
   1972 {
   1973 	struct disklabel *lp = rs->sc_dkdev.dk_label;
   1974 	db1_printf(("Making a label..\n"));
   1975 
   1976 	/*
   1977 	 * For historical reasons, if there's no disklabel present
   1978 	 * the raw partition must be marked FS_BSDFFS.
   1979 	 */
   1980 
   1981 	lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
   1982 
   1983 	strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
   1984 
   1985 	lp->d_checksum = dkcksum(lp);
   1986 }
   1987 /*
   1988  * Lookup the provided name in the filesystem.  If the file exists,
   1989  * is a valid block device, and isn't being used by anyone else,
   1990  * set *vpp to the file's vnode.
   1991  * You'll find the original of this in ccd.c
   1992  */
   1993 int
   1994 raidlookup(path, p, vpp)
   1995 	char   *path;
   1996 	struct proc *p;
   1997 	struct vnode **vpp;	/* result */
   1998 {
   1999 	struct nameidata nd;
   2000 	struct vnode *vp;
   2001 	struct vattr va;
   2002 	int     error;
   2003 
   2004 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, path, p);
   2005 	if ((error = vn_open(&nd, FREAD | FWRITE, 0)) != 0) {
   2006 #ifdef DEBUG
   2007 		printf("RAIDframe: vn_open returned %d\n", error);
   2008 #endif
   2009 		return (error);
   2010 	}
   2011 	vp = nd.ni_vp;
   2012 	if (vp->v_usecount > 1) {
   2013 		VOP_UNLOCK(vp, 0);
   2014 		(void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
   2015 		return (EBUSY);
   2016 	}
   2017 	if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)) != 0) {
   2018 		VOP_UNLOCK(vp, 0);
   2019 		(void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
   2020 		return (error);
   2021 	}
   2022 	/* XXX: eventually we should handle VREG, too. */
   2023 	if (va.va_type != VBLK) {
   2024 		VOP_UNLOCK(vp, 0);
   2025 		(void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
   2026 		return (ENOTBLK);
   2027 	}
   2028 	VOP_UNLOCK(vp, 0);
   2029 	*vpp = vp;
   2030 	return (0);
   2031 }
   2032 /*
   2033  * Wait interruptibly for an exclusive lock.
   2034  *
   2035  * XXX
   2036  * Several drivers do this; it should be abstracted and made MP-safe.
   2037  * (Hmm... where have we seen this warning before :->  GO )
   2038  */
   2039 static int
   2040 raidlock(rs)
   2041 	struct raid_softc *rs;
   2042 {
   2043 	int     error;
   2044 
   2045 	while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
   2046 		rs->sc_flags |= RAIDF_WANTED;
   2047 		if ((error =
   2048 			tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
   2049 			return (error);
   2050 	}
   2051 	rs->sc_flags |= RAIDF_LOCKED;
   2052 	return (0);
   2053 }
   2054 /*
   2055  * Unlock and wake up any waiters.
   2056  */
   2057 static void
   2058 raidunlock(rs)
   2059 	struct raid_softc *rs;
   2060 {
   2061 
   2062 	rs->sc_flags &= ~RAIDF_LOCKED;
   2063 	if ((rs->sc_flags & RAIDF_WANTED) != 0) {
   2064 		rs->sc_flags &= ~RAIDF_WANTED;
   2065 		wakeup(rs);
   2066 	}
   2067 }
   2068 
   2069 
   2070 #define RF_COMPONENT_INFO_OFFSET  16384 /* bytes */
   2071 #define RF_COMPONENT_INFO_SIZE     1024 /* bytes */
   2072 
   2073 int
   2074 raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter)
   2075 {
   2076 	RF_ComponentLabel_t clabel;
   2077 	raidread_component_label(dev, b_vp, &clabel);
   2078 	clabel.mod_counter = mod_counter;
   2079 	clabel.clean = RF_RAID_CLEAN;
   2080 	raidwrite_component_label(dev, b_vp, &clabel);
   2081 	return(0);
   2082 }
   2083 
   2084 
   2085 int
   2086 raidmarkdirty(dev_t dev, struct vnode *b_vp, int mod_counter)
   2087 {
   2088 	RF_ComponentLabel_t clabel;
   2089 	raidread_component_label(dev, b_vp, &clabel);
   2090 	clabel.mod_counter = mod_counter;
   2091 	clabel.clean = RF_RAID_DIRTY;
   2092 	raidwrite_component_label(dev, b_vp, &clabel);
   2093 	return(0);
   2094 }
   2095 
   2096 /* ARGSUSED */
   2097 int
   2098 raidread_component_label(dev, b_vp, clabel)
   2099 	dev_t dev;
   2100 	struct vnode *b_vp;
   2101 	RF_ComponentLabel_t *clabel;
   2102 {
   2103 	struct buf *bp;
   2104 	int error;
   2105 
   2106 	/* XXX should probably ensure that we don't try to do this if
   2107 	   someone has changed rf_protected_sectors. */
   2108 
   2109 	/* get a block of the appropriate size... */
   2110 	bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
   2111 	bp->b_dev = dev;
   2112 
   2113 	/* get our ducks in a row for the read */
   2114 	bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
   2115 	bp->b_bcount = RF_COMPONENT_INFO_SIZE;
   2116 	bp->b_flags = B_BUSY | B_READ;
   2117  	bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
   2118 
   2119 	(*bdevsw[major(bp->b_dev)].d_strategy)(bp);
   2120 
   2121 	error = biowait(bp);
   2122 
   2123 	if (!error) {
   2124 		memcpy(clabel, bp->b_un.b_addr,
   2125 		       sizeof(RF_ComponentLabel_t));
   2126 #if 0
   2127 		print_component_label( clabel );
   2128 #endif
   2129         } else {
   2130 #if 0
   2131 		printf("Failed to read RAID component label!\n");
   2132 #endif
   2133 	}
   2134 
   2135         bp->b_flags = B_INVAL | B_AGE;
   2136 	brelse(bp);
   2137 	return(error);
   2138 }
   2139 /* ARGSUSED */
   2140 int
   2141 raidwrite_component_label(dev, b_vp, clabel)
   2142 	dev_t dev;
   2143 	struct vnode *b_vp;
   2144 	RF_ComponentLabel_t *clabel;
   2145 {
   2146 	struct buf *bp;
   2147 	int error;
   2148 
   2149 	/* get a block of the appropriate size... */
   2150 	bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
   2151 	bp->b_dev = dev;
   2152 
   2153 	/* get our ducks in a row for the write */
   2154 	bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
   2155 	bp->b_bcount = RF_COMPONENT_INFO_SIZE;
   2156 	bp->b_flags = B_BUSY | B_WRITE;
   2157  	bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
   2158 
   2159 	memset( bp->b_un.b_addr, 0, RF_COMPONENT_INFO_SIZE );
   2160 
   2161 	memcpy( bp->b_un.b_addr, clabel, sizeof(RF_ComponentLabel_t));
   2162 
   2163 	(*bdevsw[major(bp->b_dev)].d_strategy)(bp);
   2164 	error = biowait(bp);
   2165         bp->b_flags = B_INVAL | B_AGE;
   2166 	brelse(bp);
   2167 	if (error) {
   2168 #if 1
   2169 		printf("Failed to write RAID component info!\n");
   2170 #endif
   2171 	}
   2172 
   2173 	return(error);
   2174 }
   2175 
   2176 void
   2177 rf_markalldirty( raidPtr )
   2178 	RF_Raid_t *raidPtr;
   2179 {
   2180 	RF_ComponentLabel_t clabel;
   2181 	int r,c;
   2182 
   2183 	raidPtr->mod_counter++;
   2184 	for (r = 0; r < raidPtr->numRow; r++) {
   2185 		for (c = 0; c < raidPtr->numCol; c++) {
   2186 			if (raidPtr->Disks[r][c].status != rf_ds_failed) {
   2187 				raidread_component_label(
   2188 					raidPtr->Disks[r][c].dev,
   2189 					raidPtr->raid_cinfo[r][c].ci_vp,
   2190 					&clabel);
   2191 				if (clabel.status == rf_ds_spared) {
   2192 					/* XXX do something special...
   2193 					 but whatever you do, don't
   2194 					 try to access it!! */
   2195 				} else {
   2196 #if 0
   2197 				clabel.status =
   2198 					raidPtr->Disks[r][c].status;
   2199 				raidwrite_component_label(
   2200 					raidPtr->Disks[r][c].dev,
   2201 					raidPtr->raid_cinfo[r][c].ci_vp,
   2202 					&clabel);
   2203 #endif
   2204 				raidmarkdirty(
   2205 				       raidPtr->Disks[r][c].dev,
   2206 				       raidPtr->raid_cinfo[r][c].ci_vp,
   2207 				       raidPtr->mod_counter);
   2208 				}
   2209 			}
   2210 		}
   2211 	}
   2212 	/* printf("Component labels marked dirty.\n"); */
   2213 #if 0
   2214 	for( c = 0; c < raidPtr->numSpare ; c++) {
   2215 		sparecol = raidPtr->numCol + c;
   2216 		if (raidPtr->Disks[r][sparecol].status == rf_ds_used_spare) {
   2217 			/*
   2218 
   2219 			   XXX this is where we get fancy and map this spare
   2220 			   into it's correct spot in the array.
   2221 
   2222 			 */
   2223 			/*
   2224 
   2225 			   we claim this disk is "optimal" if it's
   2226 			   rf_ds_used_spare, as that means it should be
   2227 			   directly substitutable for the disk it replaced.
   2228 			   We note that too...
   2229 
   2230 			 */
   2231 
   2232 			for(i=0;i<raidPtr->numRow;i++) {
   2233 				for(j=0;j<raidPtr->numCol;j++) {
   2234 					if ((raidPtr->Disks[i][j].spareRow ==
   2235 					     r) &&
   2236 					    (raidPtr->Disks[i][j].spareCol ==
   2237 					     sparecol)) {
   2238 						srow = r;
   2239 						scol = sparecol;
   2240 						break;
   2241 					}
   2242 				}
   2243 			}
   2244 
   2245 			raidread_component_label(
   2246 				      raidPtr->Disks[r][sparecol].dev,
   2247 				      raidPtr->raid_cinfo[r][sparecol].ci_vp,
   2248 				      &clabel);
   2249 			/* make sure status is noted */
   2250 			clabel.version = RF_COMPONENT_LABEL_VERSION;
   2251 			clabel.mod_counter = raidPtr->mod_counter;
   2252 			clabel.serial_number = raidPtr->serial_number;
   2253 			clabel.row = srow;
   2254 			clabel.column = scol;
   2255 			clabel.num_rows = raidPtr->numRow;
   2256 			clabel.num_columns = raidPtr->numCol;
   2257 			clabel.clean = RF_RAID_DIRTY; /* changed in a bit*/
   2258 			clabel.status = rf_ds_optimal;
   2259 			raidwrite_component_label(
   2260 				      raidPtr->Disks[r][sparecol].dev,
   2261 				      raidPtr->raid_cinfo[r][sparecol].ci_vp,
   2262 				      &clabel);
   2263 			raidmarkclean( raidPtr->Disks[r][sparecol].dev,
   2264 			              raidPtr->raid_cinfo[r][sparecol].ci_vp);
   2265 		}
   2266 	}
   2267 
   2268 #endif
   2269 }
   2270 
   2271 
   2272 void
   2273 rf_update_component_labels( raidPtr )
   2274 	RF_Raid_t *raidPtr;
   2275 {
   2276 	RF_ComponentLabel_t clabel;
   2277 	int sparecol;
   2278 	int r,c;
   2279 	int i,j;
   2280 	int srow, scol;
   2281 
   2282 	srow = -1;
   2283 	scol = -1;
   2284 
   2285 	/* XXX should do extra checks to make sure things really are clean,
   2286 	   rather than blindly setting the clean bit... */
   2287 
   2288 	raidPtr->mod_counter++;
   2289 
   2290 	for (r = 0; r < raidPtr->numRow; r++) {
   2291 		for (c = 0; c < raidPtr->numCol; c++) {
   2292 			if (raidPtr->Disks[r][c].status == rf_ds_optimal) {
   2293 				raidread_component_label(
   2294 					raidPtr->Disks[r][c].dev,
   2295 					raidPtr->raid_cinfo[r][c].ci_vp,
   2296 					&clabel);
   2297 				/* make sure status is noted */
   2298 				clabel.status = rf_ds_optimal;
   2299 				raidwrite_component_label(
   2300 					raidPtr->Disks[r][c].dev,
   2301 					raidPtr->raid_cinfo[r][c].ci_vp,
   2302 					&clabel);
   2303 				if (raidPtr->parity_good == RF_RAID_CLEAN) {
   2304 					raidmarkclean(
   2305 					      raidPtr->Disks[r][c].dev,
   2306 					      raidPtr->raid_cinfo[r][c].ci_vp,
   2307 					      raidPtr->mod_counter);
   2308 				}
   2309 			}
   2310 			/* else we don't touch it.. */
   2311 #if 0
   2312 			else if (raidPtr->Disks[r][c].status !=
   2313 				   rf_ds_failed) {
   2314 				raidread_component_label(
   2315 					raidPtr->Disks[r][c].dev,
   2316 					raidPtr->raid_cinfo[r][c].ci_vp,
   2317 					&clabel);
   2318 				/* make sure status is noted */
   2319 				clabel.status =
   2320 					raidPtr->Disks[r][c].status;
   2321 				raidwrite_component_label(
   2322 					raidPtr->Disks[r][c].dev,
   2323 					raidPtr->raid_cinfo[r][c].ci_vp,
   2324 					&clabel);
   2325 				if (raidPtr->parity_good == RF_RAID_CLEAN) {
   2326 					raidmarkclean(
   2327 					      raidPtr->Disks[r][c].dev,
   2328 					      raidPtr->raid_cinfo[r][c].ci_vp,
   2329 					      raidPtr->mod_counter);
   2330 				}
   2331 			}
   2332 #endif
   2333 		}
   2334 	}
   2335 
   2336 	for( c = 0; c < raidPtr->numSpare ; c++) {
   2337 		sparecol = raidPtr->numCol + c;
   2338 		if (raidPtr->Disks[0][sparecol].status == rf_ds_used_spare) {
   2339 			/*
   2340 
   2341 			   we claim this disk is "optimal" if it's
   2342 			   rf_ds_used_spare, as that means it should be
   2343 			   directly substitutable for the disk it replaced.
   2344 			   We note that too...
   2345 
   2346 			 */
   2347 
   2348 			for(i=0;i<raidPtr->numRow;i++) {
   2349 				for(j=0;j<raidPtr->numCol;j++) {
   2350 					if ((raidPtr->Disks[i][j].spareRow ==
   2351 					     0) &&
   2352 					    (raidPtr->Disks[i][j].spareCol ==
   2353 					     sparecol)) {
   2354 						srow = i;
   2355 						scol = j;
   2356 						break;
   2357 					}
   2358 				}
   2359 			}
   2360 
   2361 			raidread_component_label(
   2362 				      raidPtr->Disks[0][sparecol].dev,
   2363 				      raidPtr->raid_cinfo[0][sparecol].ci_vp,
   2364 				      &clabel);
   2365 			/* make sure status is noted */
   2366 			clabel.version = RF_COMPONENT_LABEL_VERSION;
   2367 			clabel.mod_counter = raidPtr->mod_counter;
   2368 			clabel.serial_number = raidPtr->serial_number;
   2369 			clabel.row = srow;
   2370 			clabel.column = scol;
   2371 			clabel.num_rows = raidPtr->numRow;
   2372 			clabel.num_columns = raidPtr->numCol;
   2373 			clabel.clean = RF_RAID_DIRTY; /* changed in a bit*/
   2374 			clabel.status = rf_ds_optimal;
   2375 			raidwrite_component_label(
   2376 				      raidPtr->Disks[0][sparecol].dev,
   2377 				      raidPtr->raid_cinfo[0][sparecol].ci_vp,
   2378 				      &clabel);
   2379 			if (raidPtr->parity_good == RF_RAID_CLEAN) {
   2380 				raidmarkclean( raidPtr->Disks[0][sparecol].dev,
   2381 			              raidPtr->raid_cinfo[0][sparecol].ci_vp,
   2382 					       raidPtr->mod_counter);
   2383 			}
   2384 		}
   2385 	}
   2386 	/* 	printf("Component labels updated\n"); */
   2387 }
   2388 
   2389 void
   2390 rf_ReconThread(req)
   2391 	struct rf_recon_req *req;
   2392 {
   2393 	int     s;
   2394 	RF_Raid_t *raidPtr;
   2395 
   2396 	s = splbio();
   2397 	raidPtr = (RF_Raid_t *) req->raidPtr;
   2398 	raidPtr->recon_in_progress = 1;
   2399 
   2400 	rf_FailDisk((RF_Raid_t *) req->raidPtr, req->row, req->col,
   2401 		    ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
   2402 
   2403 	/* XXX get rid of this! we don't need it at all.. */
   2404 	RF_Free(req, sizeof(*req));
   2405 
   2406 	raidPtr->recon_in_progress = 0;
   2407 	splx(s);
   2408 
   2409 	/* That's all... */
   2410 	kthread_exit(0);        /* does not return */
   2411 }
   2412 
   2413 void
   2414 rf_RewriteParityThread(raidPtr)
   2415 	RF_Raid_t *raidPtr;
   2416 {
   2417 	int retcode;
   2418 	int s;
   2419 
   2420 	raidPtr->parity_rewrite_in_progress = 1;
   2421 	s = splbio();
   2422 	retcode = rf_RewriteParity(raidPtr);
   2423 	splx(s);
   2424 	if (retcode) {
   2425 		printf("raid%d: Error re-writing parity!\n",raidPtr->raidid);
   2426 	} else {
   2427 		/* set the clean bit!  If we shutdown correctly,
   2428 		   the clean bit on each component label will get
   2429 		   set */
   2430 		raidPtr->parity_good = RF_RAID_CLEAN;
   2431 	}
   2432 	raidPtr->parity_rewrite_in_progress = 0;
   2433 
   2434 	/* That's all... */
   2435 	kthread_exit(0);        /* does not return */
   2436 }
   2437 
   2438 
   2439 void
   2440 rf_CopybackThread(raidPtr)
   2441 	RF_Raid_t *raidPtr;
   2442 {
   2443 	int s;
   2444 
   2445 	raidPtr->copyback_in_progress = 1;
   2446 	s = splbio();
   2447 	rf_CopybackReconstructedData(raidPtr);
   2448 	splx(s);
   2449 	raidPtr->copyback_in_progress = 0;
   2450 
   2451 	/* That's all... */
   2452 	kthread_exit(0);        /* does not return */
   2453 }
   2454 
   2455 
   2456 void
   2457 rf_ReconstructInPlaceThread(req)
   2458 	struct rf_recon_req *req;
   2459 {
   2460 	int retcode;
   2461 	int s;
   2462 	RF_Raid_t *raidPtr;
   2463 
   2464 	s = splbio();
   2465 	raidPtr = req->raidPtr;
   2466 	raidPtr->recon_in_progress = 1;
   2467 	retcode = rf_ReconstructInPlace(raidPtr, req->row, req->col);
   2468 	RF_Free(req, sizeof(*req));
   2469 	raidPtr->recon_in_progress = 0;
   2470 	splx(s);
   2471 
   2472 	/* That's all... */
   2473 	kthread_exit(0);        /* does not return */
   2474 }
   2475 
   2476 void
   2477 rf_mountroot_hook(dev)
   2478 	struct device *dev;
   2479 {
   2480 #if 1
   2481 	printf("rf_mountroot_hook called for %s\n",dev->dv_xname);
   2482 #endif
   2483 	if (boothowto & RB_ASKNAME) {
   2484 		/* We don't auto-config... */
   2485 	} else {
   2486 		/* They didn't ask, and we found something bootable... */
   2487 		/* XXX pretend for now.. */
   2488 if (raidautoconfig) {
   2489 		rootspec = raid_rooty;
   2490 }
   2491 	}
   2492 }
   2493 
   2494 
   2495 RF_AutoConfig_t *
   2496 rf_find_raid_components()
   2497 {
   2498 	struct devnametobdevmaj *dtobdm;
   2499 	struct vnode *vp;
   2500 	struct disklabel label;
   2501 	struct device *dv;
   2502 	char *cd_name;
   2503 	dev_t dev;
   2504 	int error;
   2505 	int i;
   2506 	int good_one;
   2507 	RF_ComponentLabel_t *clabel;
   2508 	RF_AutoConfig_t *ac_list;
   2509 	RF_AutoConfig_t *ac;
   2510 
   2511 
   2512 	/* initialize the AutoConfig list */
   2513 	ac_list = NULL;
   2514 
   2515 if (raidautoconfig) {
   2516 
   2517 	/* we begin by trolling through *all* the devices on the system */
   2518 
   2519 	for (dv = alldevs.tqh_first; dv != NULL;
   2520 	     dv = dv->dv_list.tqe_next) {
   2521 
   2522 		/* we are only interested in disks... */
   2523 		if (dv->dv_class != DV_DISK)
   2524 			continue;
   2525 
   2526 		/* we don't care about floppies... */
   2527 		if (!strcmp(dv->dv_cfdata->cf_driver->cd_name,"fd")) {
   2528 			continue;
   2529 		}
   2530 
   2531 		/* need to find the device_name_to_block_device_major stuff */
   2532 		cd_name = dv->dv_cfdata->cf_driver->cd_name;
   2533 		dtobdm = dev_name2blk;
   2534 		while (dtobdm->d_name && strcmp(dtobdm->d_name, cd_name)) {
   2535 			dtobdm++;
   2536 		}
   2537 
   2538 		/* get a vnode for the raw partition of this disk */
   2539 
   2540 		dev = MAKEDISKDEV(dtobdm->d_maj, dv->dv_unit, RAW_PART);
   2541 		if (bdevvp(dev, &vp))
   2542 			panic("RAID can't alloc vnode");
   2543 
   2544 		error = VOP_OPEN(vp, FREAD, NOCRED, 0);
   2545 
   2546 		if (error) {
   2547 			/* "Who cares."  Continue looking
   2548 			   for something that exists*/
   2549 			vput(vp);
   2550 			continue;
   2551 		}
   2552 
   2553 		/* Ok, the disk exists.  Go get the disklabel. */
   2554 		error = VOP_IOCTL(vp, DIOCGDINFO, (caddr_t)&label,
   2555 				  FREAD, NOCRED, 0);
   2556 		if (error) {
   2557 			/*
   2558 			 * XXX can't happen - open() would
   2559 			 * have errored out (or faked up one)
   2560 			 */
   2561 			printf("can't get label for dev %s%c (%d)!?!?\n",
   2562 			       dv->dv_xname, 'a' + RAW_PART, error);
   2563 		}
   2564 
   2565 		/* don't need this any more.  We'll allocate it again
   2566 		   a little later if we really do... */
   2567 		VOP_CLOSE(vp, FREAD, NOCRED, 0);
   2568 		vput(vp);
   2569 
   2570 		for (i=0; i < label.d_npartitions; i++) {
   2571 			/* We only support partitions marked as RAID */
   2572 			if (label.d_partitions[i].p_fstype != FS_RAID)
   2573 				continue;
   2574 
   2575 			dev = MAKEDISKDEV(dtobdm->d_maj, dv->dv_unit, i);
   2576 			if (bdevvp(dev, &vp))
   2577 				panic("RAID can't alloc vnode");
   2578 
   2579 			error = VOP_OPEN(vp, FREAD, NOCRED, 0);
   2580 			if (error) {
   2581 				/* Whatever... */
   2582 				vput(vp);
   2583 				continue;
   2584 			}
   2585 
   2586 			good_one = 0;
   2587 
   2588 			clabel = (RF_ComponentLabel_t *)
   2589 				malloc(sizeof(RF_ComponentLabel_t),
   2590 				       M_RAIDFRAME, M_NOWAIT);
   2591 			if (clabel == NULL) {
   2592 				/* XXX CLEANUP HERE */
   2593 				printf("RAID auto config: out of memory!\n");
   2594 				return(NULL); /* XXX probably should panic? */
   2595 			}
   2596 
   2597 			if (!raidread_component_label(dev, vp, clabel)) {
   2598 				/* Got the label.  Does it look reasonable? */
   2599 				if (rf_reasonable_label(clabel) &&
   2600 				    (clabel->partitionSize <=
   2601 				     label.d_partitions[i].p_size)) {
   2602 #if DEBUG
   2603 					printf("Component on: %s%c: %d\n",
   2604 					       dv->dv_xname, 'a'+i,
   2605 					       label.d_partitions[i].p_size);
   2606 					print_component_label(clabel);
   2607 #endif
   2608 					/* if it's reasonable, add it,
   2609 					   else ignore it. */
   2610 					ac = (RF_AutoConfig_t *)
   2611 						malloc(sizeof(RF_AutoConfig_t),
   2612 						       M_RAIDFRAME,
   2613 						       M_NOWAIT);
   2614 					if (ac == NULL) {
   2615 						/* XXX should panic?? */
   2616 						return(NULL);
   2617 					}
   2618 
   2619 					sprintf(ac->devname, "%s%c",
   2620 						dv->dv_xname, 'a'+i);
   2621 					ac->dev = dev;
   2622 					ac->vp = vp;
   2623 					ac->clabel = clabel;
   2624 					ac->next = ac_list;
   2625 					ac_list = ac;
   2626 					good_one = 1;
   2627 				}
   2628 			}
   2629 			if (!good_one) {
   2630 				/* cleanup */
   2631 				free(clabel, M_RAIDFRAME);
   2632 				VOP_CLOSE(vp, FREAD, NOCRED, 0);
   2633 				vput(vp);
   2634 			}
   2635 		}
   2636 	}
   2637 }
   2638 return(ac_list);
   2639 }
   2640 
   2641 static int
   2642 rf_reasonable_label(clabel)
   2643 	RF_ComponentLabel_t *clabel;
   2644 {
   2645 
   2646 	if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) ||
   2647 	     (clabel->version==RF_COMPONENT_LABEL_VERSION)) &&
   2648 	    ((clabel->clean == RF_RAID_CLEAN) ||
   2649 	     (clabel->clean == RF_RAID_DIRTY)) &&
   2650 	    clabel->row >=0 &&
   2651 	    clabel->column >= 0 &&
   2652 	    clabel->num_rows > 0 &&
   2653 	    clabel->num_columns > 0 &&
   2654 	    clabel->row < clabel->num_rows &&
   2655 	    clabel->column < clabel->num_columns &&
   2656 	    clabel->blockSize > 0 &&
   2657 	    clabel->numBlocks > 0) {
   2658 		/* label looks reasonable enough... */
   2659 		return(1);
   2660 	}
   2661 	return(0);
   2662 }
   2663 
   2664 
   2665 void
   2666 print_component_label(clabel)
   2667 	RF_ComponentLabel_t *clabel;
   2668 {
   2669 	printf("   Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
   2670 	       clabel->row, clabel->column,
   2671 	       clabel->num_rows, clabel->num_columns);
   2672 	printf("   Version: %d Serial Number: %d Mod Counter: %d\n",
   2673 	       clabel->version, clabel->serial_number,
   2674 	       clabel->mod_counter);
   2675 	printf("   Clean: %s Status: %d\n",
   2676 	       clabel->clean ? "Yes" : "No", clabel->status );
   2677 	printf("   sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
   2678 	       clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
   2679 	printf("   RAID Level: %c  blocksize: %d numBlocks: %d\n",
   2680 	       (char) clabel->parityConfig, clabel->blockSize,
   2681 	       clabel->numBlocks);
   2682 	printf("   Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No" );
   2683 	printf("   Last configured as: raid%d\n", clabel->last_unit );
   2684 #if 0
   2685 	   printf("   Config order: %d\n", clabel->config_order);
   2686 #endif
   2687 
   2688 }
   2689 
   2690 RF_ConfigSet_t *
   2691 rf_create_auto_sets(ac_list)
   2692 	RF_AutoConfig_t *ac_list;
   2693 {
   2694 	RF_AutoConfig_t *ac;
   2695 	RF_ConfigSet_t *config_sets;
   2696 	RF_ConfigSet_t *cset;
   2697 	RF_AutoConfig_t *ac_next;
   2698 
   2699 
   2700 	config_sets = NULL;
   2701 
   2702 	/* Go through the AutoConfig list, and figure out which components
   2703 	   belong to what sets.  */
   2704 	ac = ac_list;
   2705 	while(ac!=NULL) {
   2706 		/* we're going to putz with ac->next, so save it here
   2707 		   for use at the end of the loop */
   2708 		ac_next = ac->next;
   2709 
   2710 		if (config_sets == NULL) {
   2711 			/* will need at least this one... */
   2712 			config_sets = (RF_ConfigSet_t *)
   2713 				malloc(sizeof(RF_ConfigSet_t),
   2714 				       M_RAIDFRAME, M_NOWAIT);
   2715 			if (config_sets == NULL) {
   2716 				panic("rf_create_auto_sets: No memory!\n");
   2717 			}
   2718 			/* this one is easy :) */
   2719 			config_sets->ac = ac;
   2720 			config_sets->next = NULL;
   2721 			config_sets->rootable = 0;
   2722 			ac->next = NULL;
   2723 		} else {
   2724 			/* which set does this component fit into? */
   2725 			cset = config_sets;
   2726 			while(cset!=NULL) {
   2727 				if (rf_does_it_fit(cset, ac)) {
   2728 					/* looks like it matches */
   2729 					ac->next = cset->ac;
   2730 					cset->ac = ac;
   2731 					break;
   2732 				}
   2733 				cset = cset->next;
   2734 			}
   2735 			if (cset==NULL) {
   2736 				/* didn't find a match above... new set..*/
   2737 				cset = (RF_ConfigSet_t *)
   2738 					malloc(sizeof(RF_ConfigSet_t),
   2739 					       M_RAIDFRAME, M_NOWAIT);
   2740 				if (cset == NULL) {
   2741 					panic("rf_create_auto_sets: No memory!\n");
   2742 				}
   2743 				cset->ac = ac;
   2744 				ac->next = NULL;
   2745 				cset->next = config_sets;
   2746 				cset->rootable = 0;
   2747 				config_sets = cset;
   2748 			}
   2749 		}
   2750 		ac = ac_next;
   2751 	}
   2752 
   2753 
   2754 	return(config_sets);
   2755 }
   2756 
   2757 static int
   2758 rf_does_it_fit(cset, ac)
   2759 	RF_ConfigSet_t *cset;
   2760 	RF_AutoConfig_t *ac;
   2761 {
   2762 	RF_ComponentLabel_t *clabel1, *clabel2;
   2763 
   2764 	/* If this one matches the *first* one in the set, that's good
   2765 	   enough, since the other members of the set would have been
   2766 	   through here too... */
   2767 	/* note that we are not checking partitionSize here.. */
   2768 
   2769 	clabel1 = cset->ac->clabel;
   2770 	clabel2 = ac->clabel;
   2771 	if ((clabel1->version == clabel2->version) &&
   2772 	    (clabel1->serial_number == clabel2->serial_number) &&
   2773 	    (clabel1->mod_counter == clabel2->mod_counter) &&
   2774 	    (clabel1->num_rows == clabel2->num_rows) &&
   2775 	    (clabel1->num_columns == clabel2->num_columns) &&
   2776 	    (clabel1->sectPerSU == clabel2->sectPerSU) &&
   2777 	    (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
   2778 	    (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
   2779 	    (clabel1->parityConfig == clabel2->parityConfig) &&
   2780 	    (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
   2781 	    (clabel1->blockSize == clabel2->blockSize) &&
   2782 	    (clabel1->numBlocks == clabel2->numBlocks) &&
   2783 	    (clabel1->autoconfigure == clabel2->autoconfigure) &&
   2784 	    (clabel1->root_partition == clabel2->root_partition) &&
   2785 	    (clabel1->last_unit == clabel2->last_unit) &&
   2786 	    (clabel1->config_order == clabel2->config_order)) {
   2787 		/* if it get's here, it almost *has* to be a match */
   2788 	} else {
   2789 		/* it's not consistent with somebody in the set..
   2790 		   punt */
   2791 		return(0);
   2792 	}
   2793 	/* all was fine.. it must fit... */
   2794 	return(1);
   2795 }
   2796 
   2797 int
   2798 rf_have_enough_components(cset)
   2799 	RF_ConfigSet_t *cset;
   2800 {
   2801 	RF_AutoConfig_t *ac;
   2802 	RF_AutoConfig_t *auto_config;
   2803 	RF_ComponentLabel_t *clabel;
   2804 	int r,c;
   2805 	int num_rows;
   2806 	int num_cols;
   2807 	int num_missing;
   2808 
   2809 	/* check to see that we have enough 'live' components
   2810 	   of this set.  If so, we can configure it if necessary */
   2811 
   2812 	num_rows = cset->ac->clabel->num_rows;
   2813 	num_cols = cset->ac->clabel->num_columns;
   2814 
   2815 	/* XXX Check for duplicate components!?!?!? */
   2816 
   2817 	num_missing = 0;
   2818 	auto_config = cset->ac;
   2819 
   2820 	for(r=0; r<num_rows; r++) {
   2821 		for(c=0; c<num_cols; c++) {
   2822 			ac = auto_config;
   2823 			while(ac!=NULL) {
   2824 				if (ac->clabel==NULL) {
   2825 					/* big-time bad news. */
   2826 					goto fail;
   2827 				}
   2828 				if ((ac->clabel->row == r) &&
   2829 				    (ac->clabel->column == c)) {
   2830 					/* it's this one... */
   2831 #if DEBUG
   2832 					printf("Found: %s at %d,%d\n",
   2833 					       ac->devname,r,c);
   2834 #endif
   2835 					break;
   2836 				}
   2837 				ac=ac->next;
   2838 			}
   2839 			if (ac==NULL) {
   2840 				/* Didn't find one here! */
   2841 				num_missing++;
   2842 			}
   2843 		}
   2844 	}
   2845 
   2846 	clabel = cset->ac->clabel;
   2847 
   2848 	if (((clabel->parityConfig == '0') && (num_missing > 0)) ||
   2849 	    ((clabel->parityConfig == '1') && (num_missing > 1)) ||
   2850 	    ((clabel->parityConfig == '4') && (num_missing > 1)) ||
   2851 	    ((clabel->parityConfig == '5') && (num_missing > 1))) {
   2852 		/* XXX this needs to be made *much* more general */
   2853 		/* Too many failures */
   2854 		return(0);
   2855 	}
   2856 	/* otherwise, all is well, and we've got enough to take a kick
   2857 	   at autoconfiguring this set */
   2858 	return(1);
   2859 fail:
   2860 	return(0);
   2861 
   2862 }
   2863 
   2864 void
   2865 rf_create_configuration(ac,config,raidPtr)
   2866 	RF_AutoConfig_t *ac;
   2867 	RF_Config_t *config;
   2868 	RF_Raid_t *raidPtr;
   2869 {
   2870 	RF_ComponentLabel_t *clabel;
   2871 
   2872 	clabel = ac->clabel;
   2873 
   2874 	/* 1. Fill in the common stuff */
   2875 	config->numRow = clabel->num_rows;
   2876 	config->numCol = clabel->num_columns;
   2877 	config->numSpare = 0; /* XXX should this be set here? */
   2878 	config->sectPerSU = clabel->sectPerSU;
   2879 	config->SUsPerPU = clabel->SUsPerPU;
   2880 	config->SUsPerRU = clabel->SUsPerRU;
   2881 	config->parityConfig = clabel->parityConfig;
   2882 	/* XXX... */
   2883 	strcpy(config->diskQueueType,"fifo");
   2884 	config->maxOutstandingDiskReqs = clabel->maxOutstanding;
   2885 	config->layoutSpecificSize = 0; /* XXX ?? */
   2886 
   2887 	while(ac!=NULL) {
   2888 		/* row/col values will be in range due to the checks
   2889 		   in reasonable_label() */
   2890 		strcpy(config->devnames[ac->clabel->row][ac->clabel->column],
   2891 		       ac->devname);
   2892 		ac = ac->next;
   2893 	}
   2894 
   2895 }
   2896 
   2897 int
   2898 rf_set_autoconfig(raidPtr, new_value)
   2899 	RF_Raid_t *raidPtr;
   2900 	int new_value;
   2901 {
   2902 	RF_ComponentLabel_t clabel;
   2903 	struct vnode *vp;
   2904 	dev_t dev;
   2905 	int row, column;
   2906 
   2907 	raidPtr->autoconfigure = new_value;
   2908 	for(row=0; row<raidPtr->numRow; row++) {
   2909 		for(column=0; column<raidPtr->numCol; column++) {
   2910 			dev = raidPtr->Disks[row][column].dev;
   2911 			vp = raidPtr->raid_cinfo[row][column].ci_vp;
   2912 			raidread_component_label(dev, vp, &clabel);
   2913 			clabel.autoconfigure = new_value;
   2914 			raidwrite_component_label(dev, vp, &clabel);
   2915 		}
   2916 	}
   2917 	return(new_value);
   2918 }
   2919 
   2920 int
   2921 rf_set_rootpartition(raidPtr, new_value)
   2922 	RF_Raid_t *raidPtr;
   2923 	int new_value;
   2924 {
   2925 	RF_ComponentLabel_t clabel;
   2926 	struct vnode *vp;
   2927 	dev_t dev;
   2928 	int row, column;
   2929 
   2930 	raidPtr->root_partition = new_value;
   2931 	for(row=0; row<raidPtr->numRow; row++) {
   2932 		for(column=0; column<raidPtr->numCol; column++) {
   2933 			dev = raidPtr->Disks[row][column].dev;
   2934 			vp = raidPtr->raid_cinfo[row][column].ci_vp;
   2935 			raidread_component_label(dev, vp, &clabel);
   2936 			clabel.root_partition = new_value;
   2937 			raidwrite_component_label(dev, vp, &clabel);
   2938 		}
   2939 	}
   2940 	return(new_value);
   2941 }
   2942 
   2943 void
   2944 rf_release_all_vps(cset)
   2945 	RF_ConfigSet_t *cset;
   2946 {
   2947 	RF_AutoConfig_t *ac;
   2948 
   2949 	ac = cset->ac;
   2950 	while(ac!=NULL) {
   2951 		/* Close the vp, and give it back */
   2952 		if (ac->vp) {
   2953 			VOP_CLOSE(ac->vp, FREAD, NOCRED, 0);
   2954 			vput(ac->vp);
   2955 		}
   2956 		ac = ac->next;
   2957 	}
   2958 }
   2959 
   2960 
   2961 void
   2962 rf_cleanup_config_set(cset)
   2963 	RF_ConfigSet_t *cset;
   2964 {
   2965 	RF_AutoConfig_t *ac;
   2966 	RF_AutoConfig_t *next_ac;
   2967 
   2968 	ac = cset->ac;
   2969 	while(ac!=NULL) {
   2970 		next_ac = ac->next;
   2971 		/* nuke the label */
   2972 		free(ac->clabel, M_RAIDFRAME);
   2973 		/* cleanup the config structure */
   2974 		free(ac, M_RAIDFRAME);
   2975 		/* "next.." */
   2976 		ac = next_ac;
   2977 	}
   2978 	/* and, finally, nuke the config set */
   2979 	free(cset, M_RAIDFRAME);
   2980 }
   2981 
   2982 
   2983 void
   2984 raid_init_component_label(raidPtr, clabel)
   2985 	RF_Raid_t *raidPtr;
   2986 	RF_ComponentLabel_t *clabel;
   2987 {
   2988 	/* current version number */
   2989 	clabel->version = RF_COMPONENT_LABEL_VERSION;
   2990 	clabel->serial_number = clabel->serial_number;
   2991 	clabel->mod_counter = raidPtr->mod_counter;
   2992 	clabel->num_rows = raidPtr->numRow;
   2993 	clabel->num_columns = raidPtr->numCol;
   2994 	clabel->clean = RF_RAID_DIRTY; /* not clean */
   2995 	clabel->status = rf_ds_optimal; /* "It's good!" */
   2996 
   2997 	clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
   2998 	clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
   2999 	clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
   3000 
   3001 	clabel->blockSize = raidPtr->bytesPerSector;
   3002 	clabel->numBlocks = raidPtr->sectorsPerDisk;
   3003 
   3004 	/* XXX not portable */
   3005 	clabel->parityConfig = raidPtr->Layout.map->parityConfig;
   3006 	clabel->maxOutstanding = raidPtr->maxOutstanding;
   3007 	clabel->autoconfigure = raidPtr->autoconfigure;
   3008 	clabel->root_partition = raidPtr->root_partition;
   3009 	clabel->last_unit = raidPtr->raidid;
   3010 	clabel->config_order = raidPtr->config_order;
   3011 }
   3012 
   3013 int
   3014 rf_auto_config_set(cset,unit)
   3015 	RF_ConfigSet_t *cset;
   3016 	int *unit;
   3017 {
   3018 	RF_Raid_t *raidPtr;
   3019 	RF_Config_t *config;
   3020 	int raidID;
   3021 	int retcode;
   3022 
   3023 	printf("Starting autoconfigure on raid%d\n",raidID);
   3024 
   3025 	retcode = 0;
   3026 	*unit = -1;
   3027 
   3028 	/* 1. Create a config structure */
   3029 
   3030 	config = (RF_Config_t *)malloc(sizeof(RF_Config_t),
   3031 				       M_RAIDFRAME,
   3032 				       M_NOWAIT);
   3033 	if (config==NULL) {
   3034 		printf("Out of mem!?!?\n");
   3035 				/* XXX do something more intelligent here. */
   3036 		return(1);
   3037 	}
   3038 	/* XXX raidID needs to be set correctly.. */
   3039 
   3040 	/*
   3041 	   2. Figure out what RAID ID this one is supposed to live at
   3042 	   See if we can get the same RAID dev that it was configured
   3043 	   on last time..
   3044 	*/
   3045 
   3046 	raidID = cset->ac->clabel->last_unit;
   3047 	if ((raidID < 0) || (raidID >= numraid)) {
   3048 		/* let's not wander off into lala land. */
   3049 		raidID = numraid - 1;
   3050 	}
   3051 	if (raidPtrs[raidID]->valid != 0) {
   3052 
   3053 		/*
   3054 		   Nope... Go looking for an alternative...
   3055 		   Start high so we don't immediately use raid0 if that's
   3056 		   not taken.
   3057 		*/
   3058 
   3059 		for(raidID = numraid; raidID >= 0; raidID--) {
   3060 			if (raidPtrs[raidID]->valid == 0) {
   3061 				/* can use this one! */
   3062 				break;
   3063 			}
   3064 		}
   3065 	}
   3066 
   3067 	if (raidID < 0) {
   3068 		/* punt... */
   3069 		printf("Unable to auto configure this set!\n");
   3070 		printf("(Out of RAID devs!)\n");
   3071 		return(1);
   3072 	}
   3073 
   3074 	raidPtr = raidPtrs[raidID];
   3075 
   3076 	/* XXX all this stuff should be done SOMEWHERE ELSE! */
   3077 	raidPtr->raidid = raidID;
   3078 	raidPtr->openings = RAIDOUTSTANDING;
   3079 
   3080 	/* 3. Build the configuration structure */
   3081 	rf_create_configuration(cset->ac, config, raidPtr);
   3082 
   3083 	/* 4. Do the configuration */
   3084 	retcode = rf_Configure(raidPtr, config, cset->ac);
   3085 
   3086 	if (retcode == 0) {
   3087 #if DEBUG
   3088 		printf("Calling raidinit()\n");
   3089 #endif
   3090 				/* XXX the 0 below is bogus! */
   3091 		retcode = raidinit(0, raidPtrs[raidID], raidID);
   3092 		if (retcode) {
   3093 			printf("init returned: %d\n",retcode);
   3094 		}
   3095 		rf_markalldirty( raidPtrs[raidID] );
   3096 		raidPtrs[raidID]->autoconfigure = 1; /* XXX do this here? */
   3097 		if (cset->ac->clabel->root_partition==1) {
   3098 			/* everything configured just fine.  Make a note
   3099 			   that this set is eligible to be root. */
   3100 			cset->rootable = 1;
   3101 			/* XXX do this here? */
   3102 			raidPtrs[raidID]->root_partition = 1;
   3103 		}
   3104 	}
   3105 
   3106 	/* 5. Cleanup */
   3107 	free(config, M_RAIDFRAME);
   3108 
   3109 	*unit = raidID;
   3110 	return(retcode);
   3111 }
   3112