Home | History | Annotate | Line # | Download | only in raidframe
rf_netbsdkintf.c revision 1.66
      1 /*	$NetBSD: rf_netbsdkintf.c,v 1.66 2000/03/04 06:03:21 oster Exp $	*/
      2 /*-
      3  * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
      4  * All rights reserved.
      5  *
      6  * This code is derived from software contributed to The NetBSD Foundation
      7  * by Greg Oster; Jason R. Thorpe.
      8  *
      9  * Redistribution and use in source and binary forms, with or without
     10  * modification, are permitted provided that the following conditions
     11  * are met:
     12  * 1. Redistributions of source code must retain the above copyright
     13  *    notice, this list of conditions and the following disclaimer.
     14  * 2. Redistributions in binary form must reproduce the above copyright
     15  *    notice, this list of conditions and the following disclaimer in the
     16  *    documentation and/or other materials provided with the distribution.
     17  * 3. All advertising materials mentioning features or use of this software
     18  *    must display the following acknowledgement:
     19  *        This product includes software developed by the NetBSD
     20  *        Foundation, Inc. and its contributors.
     21  * 4. Neither the name of The NetBSD Foundation nor the names of its
     22  *    contributors may be used to endorse or promote products derived
     23  *    from this software without specific prior written permission.
     24  *
     25  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     26  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     27  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     28  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     29  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     30  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     31  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     32  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     33  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     34  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     35  * POSSIBILITY OF SUCH DAMAGE.
     36  */
     37 
     38 /*
     39  * Copyright (c) 1988 University of Utah.
     40  * Copyright (c) 1990, 1993
     41  *      The Regents of the University of California.  All rights reserved.
     42  *
     43  * This code is derived from software contributed to Berkeley by
     44  * the Systems Programming Group of the University of Utah Computer
     45  * Science Department.
     46  *
     47  * Redistribution and use in source and binary forms, with or without
     48  * modification, are permitted provided that the following conditions
     49  * are met:
     50  * 1. Redistributions of source code must retain the above copyright
     51  *    notice, this list of conditions and the following disclaimer.
     52  * 2. Redistributions in binary form must reproduce the above copyright
     53  *    notice, this list of conditions and the following disclaimer in the
     54  *    documentation and/or other materials provided with the distribution.
     55  * 3. All advertising materials mentioning features or use of this software
     56  *    must display the following acknowledgement:
     57  *      This product includes software developed by the University of
     58  *      California, Berkeley and its contributors.
     59  * 4. Neither the name of the University nor the names of its contributors
     60  *    may be used to endorse or promote products derived from this software
     61  *    without specific prior written permission.
     62  *
     63  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     64  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     65  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     66  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     67  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     68  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     69  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     70  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     71  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     72  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     73  * SUCH DAMAGE.
     74  *
     75  * from: Utah $Hdr: cd.c 1.6 90/11/28$
     76  *
     77  *      @(#)cd.c        8.2 (Berkeley) 11/16/93
     78  */
     79 
     80 
     81 
     82 
     83 /*
     84  * Copyright (c) 1995 Carnegie-Mellon University.
     85  * All rights reserved.
     86  *
     87  * Authors: Mark Holland, Jim Zelenka
     88  *
     89  * Permission to use, copy, modify and distribute this software and
     90  * its documentation is hereby granted, provided that both the copyright
     91  * notice and this permission notice appear in all copies of the
     92  * software, derivative works or modified versions, and any portions
     93  * thereof, and that both notices appear in supporting documentation.
     94  *
     95  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
     96  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
     97  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
     98  *
     99  * Carnegie Mellon requests users of this software to return to
    100  *
    101  *  Software Distribution Coordinator  or  Software.Distribution (at) CS.CMU.EDU
    102  *  School of Computer Science
    103  *  Carnegie Mellon University
    104  *  Pittsburgh PA 15213-3890
    105  *
    106  * any improvements or extensions that they make and grant Carnegie the
    107  * rights to redistribute these changes.
    108  */
    109 
    110 /***********************************************************
    111  *
    112  * rf_kintf.c -- the kernel interface routines for RAIDframe
    113  *
    114  ***********************************************************/
    115 
    116 #include <sys/errno.h>
    117 #include <sys/param.h>
    118 #include <sys/pool.h>
    119 #include <sys/queue.h>
    120 #include <sys/disk.h>
    121 #include <sys/device.h>
    122 #include <sys/stat.h>
    123 #include <sys/ioctl.h>
    124 #include <sys/fcntl.h>
    125 #include <sys/systm.h>
    126 #include <sys/namei.h>
    127 #include <sys/vnode.h>
    128 #include <sys/param.h>
    129 #include <sys/types.h>
    130 #include <machine/types.h>
    131 #include <sys/disklabel.h>
    132 #include <sys/conf.h>
    133 #include <sys/lock.h>
    134 #include <sys/buf.h>
    135 #include <sys/user.h>
    136 #include <sys/reboot.h>
    137 
    138 #include "raid.h"
    139 #include "opt_raid_autoconfig.h"
    140 #include "rf_raid.h"
    141 #include "rf_raidframe.h"
    142 #include "rf_copyback.h"
    143 #include "rf_dag.h"
    144 #include "rf_dagflags.h"
    145 #include "rf_diskqueue.h"
    146 #include "rf_acctrace.h"
    147 #include "rf_etimer.h"
    148 #include "rf_general.h"
    149 #include "rf_debugMem.h"
    150 #include "rf_kintf.h"
    151 #include "rf_options.h"
    152 #include "rf_driver.h"
    153 #include "rf_parityscan.h"
    154 #include "rf_debugprint.h"
    155 #include "rf_threadstuff.h"
    156 #include "rf_configure.h"
    157 
    158 int     rf_kdebug_level = 0;
    159 
    160 #ifdef DEBUG
    161 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
    162 #else				/* DEBUG */
    163 #define db1_printf(a) { }
    164 #endif				/* DEBUG */
    165 
    166 static RF_Raid_t **raidPtrs;	/* global raid device descriptors */
    167 
    168 RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex)
    169 
    170 static RF_SparetWait_t *rf_sparet_wait_queue;	/* requests to install a
    171 						 * spare table */
    172 static RF_SparetWait_t *rf_sparet_resp_queue;	/* responses from
    173 						 * installation process */
    174 
    175 /* prototypes */
    176 static void KernelWakeupFunc(struct buf * bp);
    177 static void InitBP(struct buf * bp, struct vnode *, unsigned rw_flag,
    178 		   dev_t dev, RF_SectorNum_t startSect,
    179 		   RF_SectorCount_t numSect, caddr_t buf,
    180 		   void (*cbFunc) (struct buf *), void *cbArg,
    181 		   int logBytesPerSector, struct proc * b_proc);
    182 static void raidinit __P((RF_Raid_t *));
    183 
    184 void raidattach __P((int));
    185 int raidsize __P((dev_t));
    186 int raidopen __P((dev_t, int, int, struct proc *));
    187 int raidclose __P((dev_t, int, int, struct proc *));
    188 int raidioctl __P((dev_t, u_long, caddr_t, int, struct proc *));
    189 int raidwrite __P((dev_t, struct uio *, int));
    190 int raidread __P((dev_t, struct uio *, int));
    191 void raidstrategy __P((struct buf *));
    192 int raiddump __P((dev_t, daddr_t, caddr_t, size_t));
    193 
    194 /*
    195  * Pilfered from ccd.c
    196  */
    197 
    198 struct raidbuf {
    199 	struct buf rf_buf;	/* new I/O buf.  MUST BE FIRST!!! */
    200 	struct buf *rf_obp;	/* ptr. to original I/O buf */
    201 	int     rf_flags;	/* misc. flags */
    202 	RF_DiskQueueData_t *req;/* the request that this was part of.. */
    203 };
    204 
    205 
    206 #define RAIDGETBUF(rs) pool_get(&(rs)->sc_cbufpool, PR_NOWAIT)
    207 #define	RAIDPUTBUF(rs, cbp) pool_put(&(rs)->sc_cbufpool, cbp)
    208 
    209 /* XXX Not sure if the following should be replacing the raidPtrs above,
    210    or if it should be used in conjunction with that...
    211 */
    212 
    213 struct raid_softc {
    214 	int     sc_flags;	/* flags */
    215 	int     sc_cflags;	/* configuration flags */
    216 	size_t  sc_size;        /* size of the raid device */
    217 	char    sc_xname[20];	/* XXX external name */
    218 	struct disk sc_dkdev;	/* generic disk device info */
    219 	struct pool sc_cbufpool;	/* component buffer pool */
    220 	struct buf_queue buf_queue;	/* used for the device queue */
    221 };
    222 /* sc_flags */
    223 #define RAIDF_INITED	0x01	/* unit has been initialized */
    224 #define RAIDF_WLABEL	0x02	/* label area is writable */
    225 #define RAIDF_LABELLING	0x04	/* unit is currently being labelled */
    226 #define RAIDF_WANTED	0x40	/* someone is waiting to obtain a lock */
    227 #define RAIDF_LOCKED	0x80	/* unit is locked */
    228 
    229 #define	raidunit(x)	DISKUNIT(x)
    230 int numraid = 0;
    231 
    232 /*
    233  * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
    234  * Be aware that large numbers can allow the driver to consume a lot of
    235  * kernel memory, especially on writes, and in degraded mode reads.
    236  *
    237  * For example: with a stripe width of 64 blocks (32k) and 5 disks,
    238  * a single 64K write will typically require 64K for the old data,
    239  * 64K for the old parity, and 64K for the new parity, for a total
    240  * of 192K (if the parity buffer is not re-used immediately).
    241  * Even it if is used immedately, that's still 128K, which when multiplied
    242  * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
    243  *
    244  * Now in degraded mode, for example, a 64K read on the above setup may
    245  * require data reconstruction, which will require *all* of the 4 remaining
    246  * disks to participate -- 4 * 32K/disk == 128K again.
    247  */
    248 
    249 #ifndef RAIDOUTSTANDING
    250 #define RAIDOUTSTANDING   6
    251 #endif
    252 
    253 #define RAIDLABELDEV(dev)	\
    254 	(MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
    255 
    256 /* declared here, and made public, for the benefit of KVM stuff.. */
    257 struct raid_softc *raid_softc;
    258 
    259 static void raidgetdefaultlabel __P((RF_Raid_t *, struct raid_softc *,
    260 				     struct disklabel *));
    261 static void raidgetdisklabel __P((dev_t));
    262 static void raidmakedisklabel __P((struct raid_softc *));
    263 
    264 static int raidlock __P((struct raid_softc *));
    265 static void raidunlock __P((struct raid_softc *));
    266 
    267 static void rf_markalldirty __P((RF_Raid_t *));
    268 void rf_mountroot_hook __P((struct device *));
    269 
    270 struct device *raidrootdev;
    271 
    272 void rf_ReconThread __P((struct rf_recon_req *));
    273 /* XXX what I want is: */
    274 /*void rf_ReconThread __P((RF_Raid_t *raidPtr));  */
    275 void rf_RewriteParityThread __P((RF_Raid_t *raidPtr));
    276 void rf_CopybackThread __P((RF_Raid_t *raidPtr));
    277 void rf_ReconstructInPlaceThread __P((struct rf_recon_req *));
    278 void rf_buildroothack __P((void *));
    279 
    280 RF_AutoConfig_t *rf_find_raid_components __P((void));
    281 void print_component_label __P((RF_ComponentLabel_t *));
    282 RF_ConfigSet_t *rf_create_auto_sets __P((RF_AutoConfig_t *));
    283 static int rf_does_it_fit __P((RF_ConfigSet_t *,RF_AutoConfig_t *));
    284 static int rf_reasonable_label __P((RF_ComponentLabel_t *));
    285 void rf_create_configuration __P((RF_AutoConfig_t *,RF_Config_t *,
    286 				  RF_Raid_t *));
    287 int rf_set_autoconfig __P((RF_Raid_t *, int));
    288 int rf_set_rootpartition __P((RF_Raid_t *, int));
    289 void rf_release_all_vps __P((RF_ConfigSet_t *));
    290 void rf_cleanup_config_set __P((RF_ConfigSet_t *));
    291 int rf_have_enough_components __P((RF_ConfigSet_t *));
    292 int rf_auto_config_set __P((RF_ConfigSet_t *, int *));
    293 
    294 static int raidautoconfig = 0; /* Debugging, mostly.  Set to 0 to not
    295 				  allow autoconfig to take place.
    296 			          Note that this is overridden by having
    297 			          RAID_AUTOCONFIG as an option in the
    298 			          kernel config file.  */
    299 extern struct device *booted_device;
    300 
    301 void
    302 raidattach(num)
    303 	int     num;
    304 {
    305 	int raidID;
    306 	int i, rc;
    307 	RF_AutoConfig_t *ac_list; /* autoconfig list */
    308 	RF_ConfigSet_t *config_sets;
    309 
    310 #ifdef DEBUG
    311 	printf("raidattach: Asked for %d units\n", num);
    312 #endif
    313 
    314 	if (num <= 0) {
    315 #ifdef DIAGNOSTIC
    316 		panic("raidattach: count <= 0");
    317 #endif
    318 		return;
    319 	}
    320 	/* This is where all the initialization stuff gets done. */
    321 
    322 	numraid = num;
    323 
    324 	/* Make some space for requested number of units... */
    325 
    326 	RF_Calloc(raidPtrs, num, sizeof(RF_Raid_t *), (RF_Raid_t **));
    327 	if (raidPtrs == NULL) {
    328 		panic("raidPtrs is NULL!!\n");
    329 	}
    330 
    331 	rc = rf_mutex_init(&rf_sparet_wait_mutex);
    332 	if (rc) {
    333 		RF_PANIC();
    334 	}
    335 
    336 	rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
    337 
    338 	for (i = 0; i < num; i++)
    339 		raidPtrs[i] = NULL;
    340 	rc = rf_BootRaidframe();
    341 	if (rc == 0)
    342 		printf("Kernelized RAIDframe activated\n");
    343 	else
    344 		panic("Serious error booting RAID!!\n");
    345 
    346 	/* put together some datastructures like the CCD device does.. This
    347 	 * lets us lock the device and what-not when it gets opened. */
    348 
    349 	raid_softc = (struct raid_softc *)
    350 		malloc(num * sizeof(struct raid_softc),
    351 		       M_RAIDFRAME, M_NOWAIT);
    352 	if (raid_softc == NULL) {
    353 		printf("WARNING: no memory for RAIDframe driver\n");
    354 		return;
    355 	}
    356 
    357 	bzero(raid_softc, num * sizeof(struct raid_softc));
    358 
    359 	raidrootdev = (struct device *)malloc(num * sizeof(struct device),
    360 					      M_RAIDFRAME, M_NOWAIT);
    361 	if (raidrootdev == NULL) {
    362 		panic("No memory for RAIDframe driver!!?!?!\n");
    363 	}
    364 
    365 	for (raidID = 0; raidID < num; raidID++) {
    366 		BUFQ_INIT(&raid_softc[raidID].buf_queue);
    367 
    368 		raidrootdev[raidID].dv_class  = DV_DISK;
    369 		raidrootdev[raidID].dv_cfdata = NULL;
    370 		raidrootdev[raidID].dv_unit   = raidID;
    371 		raidrootdev[raidID].dv_parent = NULL;
    372 		raidrootdev[raidID].dv_flags  = 0;
    373 		sprintf(raidrootdev[raidID].dv_xname,"raid%d",raidID);
    374 
    375 		RF_Calloc(raidPtrs[raidID], 1, sizeof(RF_Raid_t),
    376 			  (RF_Raid_t *));
    377 		if (raidPtrs[raidID] == NULL) {
    378 			printf("WARNING: raidPtrs[%d] is NULL\n", raidID);
    379 			numraid = raidID;
    380 			return;
    381 		}
    382 	}
    383 
    384 #if RAID_AUTOCONFIG
    385 	raidautoconfig = 1;
    386 #endif
    387 
    388 if (raidautoconfig) {
    389 	/* 1. locate all RAID components on the system */
    390 
    391 #if DEBUG
    392 	printf("Searching for raid components...\n");
    393 #endif
    394 	ac_list = rf_find_raid_components();
    395 
    396 	/* 2. sort them into their respective sets */
    397 
    398 	config_sets = rf_create_auto_sets(ac_list);
    399 
    400 	/* 3. evaluate each set and configure the valid ones
    401 	   This gets done in rf_buildroothack() */
    402 
    403 	/* schedule the creation of the thread to do the
    404 	   "/ on RAID" stuff */
    405 
    406 	kthread_create(rf_buildroothack,config_sets);
    407 
    408 #if 0
    409 	mountroothook_establish(rf_mountroot_hook, &raidrootdev[0]);
    410 #endif
    411 }
    412 
    413 }
    414 
    415 void
    416 rf_buildroothack(arg)
    417 	void *arg;
    418 {
    419 	RF_ConfigSet_t *config_sets = arg;
    420 	RF_ConfigSet_t *cset;
    421 	RF_ConfigSet_t *next_cset;
    422 	int retcode;
    423 	int raidID;
    424 	int rootID;
    425 	int num_root;
    426 
    427 	num_root = 0;
    428 	cset = config_sets;
    429 	while(cset != NULL ) {
    430 		next_cset = cset->next;
    431 		if (rf_have_enough_components(cset) &&
    432 		    cset->ac->clabel->autoconfigure==1) {
    433 			retcode = rf_auto_config_set(cset,&raidID);
    434 			if (!retcode) {
    435 				if (cset->rootable) {
    436 					rootID = raidID;
    437 					num_root++;
    438 				}
    439 			} else {
    440 				/* The autoconfig didn't work :( */
    441 #if DEBUG
    442 				printf("Autoconfig failed with code %d for raid%d\n", retcode, raidID);
    443 #endif
    444 				rf_release_all_vps(cset);
    445 			}
    446 		} else {
    447 			/* we're not autoconfiguring this set...
    448 			   release the associated resources */
    449 			rf_release_all_vps(cset);
    450 		}
    451 		/* cleanup */
    452 		rf_cleanup_config_set(cset);
    453 		cset = next_cset;
    454 	}
    455 	if (boothowto & RB_ASKNAME) {
    456 		/* We don't auto-config... */
    457 	} else {
    458 		/* They didn't ask, and we found something bootable... */
    459 
    460 		if (num_root == 1) {
    461 			booted_device = &raidrootdev[rootID];
    462 		} else if (num_root > 1) {
    463 			/* we can't guess.. require the user to answer... */
    464 			boothowto |= RB_ASKNAME;
    465 		}
    466 	}
    467 }
    468 
    469 
    470 int
    471 raidsize(dev)
    472 	dev_t   dev;
    473 {
    474 	struct raid_softc *rs;
    475 	struct disklabel *lp;
    476 	int     part, unit, omask, size;
    477 
    478 	unit = raidunit(dev);
    479 	if (unit >= numraid)
    480 		return (-1);
    481 	rs = &raid_softc[unit];
    482 
    483 	if ((rs->sc_flags & RAIDF_INITED) == 0)
    484 		return (-1);
    485 
    486 	part = DISKPART(dev);
    487 	omask = rs->sc_dkdev.dk_openmask & (1 << part);
    488 	lp = rs->sc_dkdev.dk_label;
    489 
    490 	if (omask == 0 && raidopen(dev, 0, S_IFBLK, curproc))
    491 		return (-1);
    492 
    493 	if (lp->d_partitions[part].p_fstype != FS_SWAP)
    494 		size = -1;
    495 	else
    496 		size = lp->d_partitions[part].p_size *
    497 		    (lp->d_secsize / DEV_BSIZE);
    498 
    499 	if (omask == 0 && raidclose(dev, 0, S_IFBLK, curproc))
    500 		return (-1);
    501 
    502 	return (size);
    503 
    504 }
    505 
    506 int
    507 raiddump(dev, blkno, va, size)
    508 	dev_t   dev;
    509 	daddr_t blkno;
    510 	caddr_t va;
    511 	size_t  size;
    512 {
    513 	/* Not implemented. */
    514 	return ENXIO;
    515 }
    516 /* ARGSUSED */
    517 int
    518 raidopen(dev, flags, fmt, p)
    519 	dev_t   dev;
    520 	int     flags, fmt;
    521 	struct proc *p;
    522 {
    523 	int     unit = raidunit(dev);
    524 	struct raid_softc *rs;
    525 	struct disklabel *lp;
    526 	int     part, pmask;
    527 	int     error = 0;
    528 
    529 	if (unit >= numraid)
    530 		return (ENXIO);
    531 	rs = &raid_softc[unit];
    532 
    533 	if ((error = raidlock(rs)) != 0)
    534 		return (error);
    535 	lp = rs->sc_dkdev.dk_label;
    536 
    537 	part = DISKPART(dev);
    538 	pmask = (1 << part);
    539 
    540 	db1_printf(("Opening raid device number: %d partition: %d\n",
    541 		unit, part));
    542 
    543 
    544 	if ((rs->sc_flags & RAIDF_INITED) &&
    545 	    (rs->sc_dkdev.dk_openmask == 0))
    546 		raidgetdisklabel(dev);
    547 
    548 	/* make sure that this partition exists */
    549 
    550 	if (part != RAW_PART) {
    551 		db1_printf(("Not a raw partition..\n"));
    552 		if (((rs->sc_flags & RAIDF_INITED) == 0) ||
    553 		    ((part >= lp->d_npartitions) ||
    554 			(lp->d_partitions[part].p_fstype == FS_UNUSED))) {
    555 			error = ENXIO;
    556 			raidunlock(rs);
    557 			db1_printf(("Bailing out...\n"));
    558 			return (error);
    559 		}
    560 	}
    561 	/* Prevent this unit from being unconfigured while open. */
    562 	switch (fmt) {
    563 	case S_IFCHR:
    564 		rs->sc_dkdev.dk_copenmask |= pmask;
    565 		break;
    566 
    567 	case S_IFBLK:
    568 		rs->sc_dkdev.dk_bopenmask |= pmask;
    569 		break;
    570 	}
    571 
    572 	if ((rs->sc_dkdev.dk_openmask == 0) &&
    573 	    ((rs->sc_flags & RAIDF_INITED) != 0)) {
    574 		/* First one... mark things as dirty... Note that we *MUST*
    575 		 have done a configure before this.  I DO NOT WANT TO BE
    576 		 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
    577 		 THAT THEY BELONG TOGETHER!!!!! */
    578 		/* XXX should check to see if we're only open for reading
    579 		   here... If so, we needn't do this, but then need some
    580 		   other way of keeping track of what's happened.. */
    581 
    582 		rf_markalldirty( raidPtrs[unit] );
    583 	}
    584 
    585 
    586 	rs->sc_dkdev.dk_openmask =
    587 	    rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
    588 
    589 	raidunlock(rs);
    590 
    591 	return (error);
    592 
    593 
    594 }
    595 /* ARGSUSED */
    596 int
    597 raidclose(dev, flags, fmt, p)
    598 	dev_t   dev;
    599 	int     flags, fmt;
    600 	struct proc *p;
    601 {
    602 	int     unit = raidunit(dev);
    603 	struct raid_softc *rs;
    604 	int     error = 0;
    605 	int     part;
    606 
    607 	if (unit >= numraid)
    608 		return (ENXIO);
    609 	rs = &raid_softc[unit];
    610 
    611 	if ((error = raidlock(rs)) != 0)
    612 		return (error);
    613 
    614 	part = DISKPART(dev);
    615 
    616 	/* ...that much closer to allowing unconfiguration... */
    617 	switch (fmt) {
    618 	case S_IFCHR:
    619 		rs->sc_dkdev.dk_copenmask &= ~(1 << part);
    620 		break;
    621 
    622 	case S_IFBLK:
    623 		rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
    624 		break;
    625 	}
    626 	rs->sc_dkdev.dk_openmask =
    627 	    rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
    628 
    629 	if ((rs->sc_dkdev.dk_openmask == 0) &&
    630 	    ((rs->sc_flags & RAIDF_INITED) != 0)) {
    631 		/* Last one... device is not unconfigured yet.
    632 		   Device shutdown has taken care of setting the
    633 		   clean bits if RAIDF_INITED is not set
    634 		   mark things as clean... */
    635 #if 0
    636 		printf("Last one on raid%d.  Updating status.\n",unit);
    637 #endif
    638 		rf_final_update_component_labels( raidPtrs[unit] );
    639 	}
    640 
    641 	raidunlock(rs);
    642 	return (0);
    643 
    644 }
    645 
    646 void
    647 raidstrategy(bp)
    648 	register struct buf *bp;
    649 {
    650 	register int s;
    651 
    652 	unsigned int raidID = raidunit(bp->b_dev);
    653 	RF_Raid_t *raidPtr;
    654 	struct raid_softc *rs = &raid_softc[raidID];
    655 	struct disklabel *lp;
    656 	int     wlabel;
    657 
    658 	if ((rs->sc_flags & RAIDF_INITED) ==0) {
    659 		bp->b_error = ENXIO;
    660 		bp->b_flags = B_ERROR;
    661 		bp->b_resid = bp->b_bcount;
    662 		biodone(bp);
    663 		return;
    664 	}
    665 	if (raidID >= numraid || !raidPtrs[raidID]) {
    666 		bp->b_error = ENODEV;
    667 		bp->b_flags |= B_ERROR;
    668 		bp->b_resid = bp->b_bcount;
    669 		biodone(bp);
    670 		return;
    671 	}
    672 	raidPtr = raidPtrs[raidID];
    673 	if (!raidPtr->valid) {
    674 		bp->b_error = ENODEV;
    675 		bp->b_flags |= B_ERROR;
    676 		bp->b_resid = bp->b_bcount;
    677 		biodone(bp);
    678 		return;
    679 	}
    680 	if (bp->b_bcount == 0) {
    681 		db1_printf(("b_bcount is zero..\n"));
    682 		biodone(bp);
    683 		return;
    684 	}
    685 	lp = rs->sc_dkdev.dk_label;
    686 
    687 	/*
    688 	 * Do bounds checking and adjust transfer.  If there's an
    689 	 * error, the bounds check will flag that for us.
    690 	 */
    691 
    692 	wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
    693 	if (DISKPART(bp->b_dev) != RAW_PART)
    694 		if (bounds_check_with_label(bp, lp, wlabel) <= 0) {
    695 			db1_printf(("Bounds check failed!!:%d %d\n",
    696 				(int) bp->b_blkno, (int) wlabel));
    697 			biodone(bp);
    698 			return;
    699 		}
    700 	s = splbio();
    701 
    702 	bp->b_resid = 0;
    703 
    704 	/* stuff it onto our queue */
    705 	BUFQ_INSERT_TAIL(&rs->buf_queue, bp);
    706 
    707 	raidstart(raidPtrs[raidID]);
    708 
    709 	splx(s);
    710 }
    711 /* ARGSUSED */
    712 int
    713 raidread(dev, uio, flags)
    714 	dev_t   dev;
    715 	struct uio *uio;
    716 	int     flags;
    717 {
    718 	int     unit = raidunit(dev);
    719 	struct raid_softc *rs;
    720 	int     part;
    721 
    722 	if (unit >= numraid)
    723 		return (ENXIO);
    724 	rs = &raid_softc[unit];
    725 
    726 	if ((rs->sc_flags & RAIDF_INITED) == 0)
    727 		return (ENXIO);
    728 	part = DISKPART(dev);
    729 
    730 	db1_printf(("raidread: unit: %d partition: %d\n", unit, part));
    731 
    732 	return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
    733 
    734 }
    735 /* ARGSUSED */
    736 int
    737 raidwrite(dev, uio, flags)
    738 	dev_t   dev;
    739 	struct uio *uio;
    740 	int     flags;
    741 {
    742 	int     unit = raidunit(dev);
    743 	struct raid_softc *rs;
    744 
    745 	if (unit >= numraid)
    746 		return (ENXIO);
    747 	rs = &raid_softc[unit];
    748 
    749 	if ((rs->sc_flags & RAIDF_INITED) == 0)
    750 		return (ENXIO);
    751 	db1_printf(("raidwrite\n"));
    752 	return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
    753 
    754 }
    755 
    756 int
    757 raidioctl(dev, cmd, data, flag, p)
    758 	dev_t   dev;
    759 	u_long  cmd;
    760 	caddr_t data;
    761 	int     flag;
    762 	struct proc *p;
    763 {
    764 	int     unit = raidunit(dev);
    765 	int     error = 0;
    766 	int     part, pmask;
    767 	struct raid_softc *rs;
    768 	RF_Config_t *k_cfg, *u_cfg;
    769 	RF_Raid_t *raidPtr;
    770 	RF_RaidDisk_t *diskPtr;
    771 	RF_AccTotals_t *totals;
    772 	RF_DeviceConfig_t *d_cfg, **ucfgp;
    773 	u_char *specific_buf;
    774 	int retcode = 0;
    775 	int row;
    776 	int column;
    777 	struct rf_recon_req *rrcopy, *rr;
    778 	RF_ComponentLabel_t *clabel;
    779 	RF_ComponentLabel_t ci_label;
    780 	RF_ComponentLabel_t **clabel_ptr;
    781 	RF_SingleComponent_t *sparePtr,*componentPtr;
    782 	RF_SingleComponent_t hot_spare;
    783 	RF_SingleComponent_t component;
    784 	int i, j, d;
    785 
    786 	if (unit >= numraid)
    787 		return (ENXIO);
    788 	rs = &raid_softc[unit];
    789 	raidPtr = raidPtrs[unit];
    790 
    791 	db1_printf(("raidioctl: %d %d %d %d\n", (int) dev,
    792 		(int) DISKPART(dev), (int) unit, (int) cmd));
    793 
    794 	/* Must be open for writes for these commands... */
    795 	switch (cmd) {
    796 	case DIOCSDINFO:
    797 	case DIOCWDINFO:
    798 	case DIOCWLABEL:
    799 		if ((flag & FWRITE) == 0)
    800 			return (EBADF);
    801 	}
    802 
    803 	/* Must be initialized for these... */
    804 	switch (cmd) {
    805 	case DIOCGDINFO:
    806 	case DIOCSDINFO:
    807 	case DIOCWDINFO:
    808 	case DIOCGPART:
    809 	case DIOCWLABEL:
    810 	case DIOCGDEFLABEL:
    811 	case RAIDFRAME_SHUTDOWN:
    812 	case RAIDFRAME_REWRITEPARITY:
    813 	case RAIDFRAME_GET_INFO:
    814 	case RAIDFRAME_RESET_ACCTOTALS:
    815 	case RAIDFRAME_GET_ACCTOTALS:
    816 	case RAIDFRAME_KEEP_ACCTOTALS:
    817 	case RAIDFRAME_GET_SIZE:
    818 	case RAIDFRAME_FAIL_DISK:
    819 	case RAIDFRAME_COPYBACK:
    820 	case RAIDFRAME_CHECK_RECON_STATUS:
    821 	case RAIDFRAME_GET_COMPONENT_LABEL:
    822 	case RAIDFRAME_SET_COMPONENT_LABEL:
    823 	case RAIDFRAME_ADD_HOT_SPARE:
    824 	case RAIDFRAME_REMOVE_HOT_SPARE:
    825 	case RAIDFRAME_INIT_LABELS:
    826 	case RAIDFRAME_REBUILD_IN_PLACE:
    827 	case RAIDFRAME_CHECK_PARITY:
    828 	case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
    829 	case RAIDFRAME_CHECK_COPYBACK_STATUS:
    830 	case RAIDFRAME_SET_AUTOCONFIG:
    831 	case RAIDFRAME_SET_ROOT:
    832 		if ((rs->sc_flags & RAIDF_INITED) == 0)
    833 			return (ENXIO);
    834 	}
    835 
    836 	switch (cmd) {
    837 
    838 		/* configure the system */
    839 	case RAIDFRAME_CONFIGURE:
    840 
    841 		if (raidPtr->valid) {
    842 			/* There is a valid RAID set running on this unit! */
    843 			printf("raid%d: Device already configured!\n",unit);
    844 			return(EINVAL);
    845 		}
    846 
    847 		/* copy-in the configuration information */
    848 		/* data points to a pointer to the configuration structure */
    849 
    850 		u_cfg = *((RF_Config_t **) data);
    851 		RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
    852 		if (k_cfg == NULL) {
    853 			return (ENOMEM);
    854 		}
    855 		retcode = copyin((caddr_t) u_cfg, (caddr_t) k_cfg,
    856 		    sizeof(RF_Config_t));
    857 		if (retcode) {
    858 			RF_Free(k_cfg, sizeof(RF_Config_t));
    859 			db1_printf(("rf_ioctl: retcode=%d copyin.1\n",
    860 				retcode));
    861 			return (retcode);
    862 		}
    863 		/* allocate a buffer for the layout-specific data, and copy it
    864 		 * in */
    865 		if (k_cfg->layoutSpecificSize) {
    866 			if (k_cfg->layoutSpecificSize > 10000) {
    867 				/* sanity check */
    868 				RF_Free(k_cfg, sizeof(RF_Config_t));
    869 				return (EINVAL);
    870 			}
    871 			RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
    872 			    (u_char *));
    873 			if (specific_buf == NULL) {
    874 				RF_Free(k_cfg, sizeof(RF_Config_t));
    875 				return (ENOMEM);
    876 			}
    877 			retcode = copyin(k_cfg->layoutSpecific,
    878 			    (caddr_t) specific_buf,
    879 			    k_cfg->layoutSpecificSize);
    880 			if (retcode) {
    881 				RF_Free(k_cfg, sizeof(RF_Config_t));
    882 				RF_Free(specific_buf,
    883 					k_cfg->layoutSpecificSize);
    884 				db1_printf(("rf_ioctl: retcode=%d copyin.2\n",
    885 					retcode));
    886 				return (retcode);
    887 			}
    888 		} else
    889 			specific_buf = NULL;
    890 		k_cfg->layoutSpecific = specific_buf;
    891 
    892 		/* should do some kind of sanity check on the configuration.
    893 		 * Store the sum of all the bytes in the last byte? */
    894 
    895 		/* configure the system */
    896 
    897 		/*
    898 		 * Clear the entire RAID descriptor, just to make sure
    899 		 *  there is no stale data left in the case of a
    900 		 *  reconfiguration
    901 		 */
    902 		bzero((char *) raidPtr, sizeof(RF_Raid_t));
    903 		raidPtr->raidid = unit;
    904 
    905 		retcode = rf_Configure(raidPtr, k_cfg, NULL);
    906 
    907 		if (retcode == 0) {
    908 
    909 			/* allow this many simultaneous IO's to
    910 			   this RAID device */
    911 			raidPtr->openings = RAIDOUTSTANDING;
    912 
    913 			raidinit(raidPtr);
    914 			rf_markalldirty(raidPtr);
    915 		}
    916 		/* free the buffers.  No return code here. */
    917 		if (k_cfg->layoutSpecificSize) {
    918 			RF_Free(specific_buf, k_cfg->layoutSpecificSize);
    919 		}
    920 		RF_Free(k_cfg, sizeof(RF_Config_t));
    921 
    922 		return (retcode);
    923 
    924 		/* shutdown the system */
    925 	case RAIDFRAME_SHUTDOWN:
    926 
    927 		if ((error = raidlock(rs)) != 0)
    928 			return (error);
    929 
    930 		/*
    931 		 * If somebody has a partition mounted, we shouldn't
    932 		 * shutdown.
    933 		 */
    934 
    935 		part = DISKPART(dev);
    936 		pmask = (1 << part);
    937 		if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
    938 		    ((rs->sc_dkdev.dk_bopenmask & pmask) &&
    939 			(rs->sc_dkdev.dk_copenmask & pmask))) {
    940 			raidunlock(rs);
    941 			return (EBUSY);
    942 		}
    943 
    944 		retcode = rf_Shutdown(raidPtr);
    945 
    946 		pool_destroy(&rs->sc_cbufpool);
    947 
    948 		/* It's no longer initialized... */
    949 		rs->sc_flags &= ~RAIDF_INITED;
    950 
    951 		/* Detach the disk. */
    952 		disk_detach(&rs->sc_dkdev);
    953 
    954 		raidunlock(rs);
    955 
    956 		return (retcode);
    957 	case RAIDFRAME_GET_COMPONENT_LABEL:
    958 		clabel_ptr = (RF_ComponentLabel_t **) data;
    959 		/* need to read the component label for the disk indicated
    960 		   by row,column in clabel */
    961 
    962 		/* For practice, let's get it directly fromdisk, rather
    963 		   than from the in-core copy */
    964 		RF_Malloc( clabel, sizeof( RF_ComponentLabel_t ),
    965 			   (RF_ComponentLabel_t *));
    966 		if (clabel == NULL)
    967 			return (ENOMEM);
    968 
    969 		bzero((char *) clabel, sizeof(RF_ComponentLabel_t));
    970 
    971 		retcode = copyin( *clabel_ptr, clabel,
    972 				  sizeof(RF_ComponentLabel_t));
    973 
    974 		if (retcode) {
    975 			RF_Free( clabel, sizeof(RF_ComponentLabel_t));
    976 			return(retcode);
    977 		}
    978 
    979 		row = clabel->row;
    980 		column = clabel->column;
    981 
    982 		if ((row < 0) || (row >= raidPtr->numRow) ||
    983 		    (column < 0) || (column >= raidPtr->numCol)) {
    984 			RF_Free( clabel, sizeof(RF_ComponentLabel_t));
    985 			return(EINVAL);
    986 		}
    987 
    988 		raidread_component_label(raidPtr->Disks[row][column].dev,
    989 				raidPtr->raid_cinfo[row][column].ci_vp,
    990 				clabel );
    991 
    992 		retcode = copyout((caddr_t) clabel,
    993 				  (caddr_t) *clabel_ptr,
    994 				  sizeof(RF_ComponentLabel_t));
    995 		RF_Free( clabel, sizeof(RF_ComponentLabel_t));
    996 		return (retcode);
    997 
    998 	case RAIDFRAME_SET_COMPONENT_LABEL:
    999 		clabel = (RF_ComponentLabel_t *) data;
   1000 
   1001 		/* XXX check the label for valid stuff... */
   1002 		/* Note that some things *should not* get modified --
   1003 		   the user should be re-initing the labels instead of
   1004 		   trying to patch things.
   1005 		   */
   1006 
   1007 		printf("Got component label:\n");
   1008 		printf("Version: %d\n",clabel->version);
   1009 		printf("Serial Number: %d\n",clabel->serial_number);
   1010 		printf("Mod counter: %d\n",clabel->mod_counter);
   1011 		printf("Row: %d\n", clabel->row);
   1012 		printf("Column: %d\n", clabel->column);
   1013 		printf("Num Rows: %d\n", clabel->num_rows);
   1014 		printf("Num Columns: %d\n", clabel->num_columns);
   1015 		printf("Clean: %d\n", clabel->clean);
   1016 		printf("Status: %d\n", clabel->status);
   1017 
   1018 		row = clabel->row;
   1019 		column = clabel->column;
   1020 
   1021 		if ((row < 0) || (row >= raidPtr->numRow) ||
   1022 		    (column < 0) || (column >= raidPtr->numCol)) {
   1023 			return(EINVAL);
   1024 		}
   1025 
   1026 		/* XXX this isn't allowed to do anything for now :-) */
   1027 
   1028 		/* XXX and before it is, we need to fill in the rest
   1029 		   of the fields!?!?!?! */
   1030 #if 0
   1031 		raidwrite_component_label(
   1032                             raidPtr->Disks[row][column].dev,
   1033 			    raidPtr->raid_cinfo[row][column].ci_vp,
   1034 			    clabel );
   1035 #endif
   1036 		return (0);
   1037 
   1038 	case RAIDFRAME_INIT_LABELS:
   1039 		clabel = (RF_ComponentLabel_t *) data;
   1040 		/*
   1041 		   we only want the serial number from
   1042 		   the above.  We get all the rest of the information
   1043 		   from the config that was used to create this RAID
   1044 		   set.
   1045 		   */
   1046 
   1047 		raidPtr->serial_number = clabel->serial_number;
   1048 
   1049 		raid_init_component_label(raidPtr, &ci_label);
   1050 		ci_label.serial_number = clabel->serial_number;
   1051 
   1052 		for(row=0;row<raidPtr->numRow;row++) {
   1053 			ci_label.row = row;
   1054 			for(column=0;column<raidPtr->numCol;column++) {
   1055 				diskPtr = &raidPtr->Disks[row][column];
   1056 				ci_label.partitionSize = diskPtr->partitionSize;
   1057 				ci_label.column = column;
   1058 				raidwrite_component_label(
   1059 				  raidPtr->Disks[row][column].dev,
   1060 				  raidPtr->raid_cinfo[row][column].ci_vp,
   1061 				  &ci_label );
   1062 			}
   1063 		}
   1064 
   1065 		return (retcode);
   1066 	case RAIDFRAME_SET_AUTOCONFIG:
   1067 		d = rf_set_autoconfig(raidPtr, *data);
   1068 		printf("New autoconfig value is: %d\n", d);
   1069 		*data = d;
   1070 		return (retcode);
   1071 
   1072 	case RAIDFRAME_SET_ROOT:
   1073 		d = rf_set_rootpartition(raidPtr, *data);
   1074 		printf("New rootpartition value is: %d\n", d);
   1075 		*data = d;
   1076 		return (retcode);
   1077 
   1078 		/* initialize all parity */
   1079 	case RAIDFRAME_REWRITEPARITY:
   1080 
   1081 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1082 			/* Parity for RAID 0 is trivially correct */
   1083 			raidPtr->parity_good = RF_RAID_CLEAN;
   1084 			return(0);
   1085 		}
   1086 
   1087 		if (raidPtr->parity_rewrite_in_progress == 1) {
   1088 			/* Re-write is already in progress! */
   1089 			return(EINVAL);
   1090 		}
   1091 
   1092 		retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
   1093 					   rf_RewriteParityThread,
   1094 					   raidPtr,"raid_parity");
   1095 		return (retcode);
   1096 
   1097 
   1098 	case RAIDFRAME_ADD_HOT_SPARE:
   1099 		sparePtr = (RF_SingleComponent_t *) data;
   1100 		memcpy( &hot_spare, sparePtr, sizeof(RF_SingleComponent_t));
   1101 		printf("Adding spare\n");
   1102 		retcode = rf_add_hot_spare(raidPtr, &hot_spare);
   1103 		return(retcode);
   1104 
   1105 	case RAIDFRAME_REMOVE_HOT_SPARE:
   1106 		return(retcode);
   1107 
   1108 	case RAIDFRAME_REBUILD_IN_PLACE:
   1109 
   1110 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1111 			/* Can't do this on a RAID 0!! */
   1112 			return(EINVAL);
   1113 		}
   1114 
   1115 		if (raidPtr->recon_in_progress == 1) {
   1116 			/* a reconstruct is already in progress! */
   1117 			return(EINVAL);
   1118 		}
   1119 
   1120 		componentPtr = (RF_SingleComponent_t *) data;
   1121 		memcpy( &component, componentPtr,
   1122 			sizeof(RF_SingleComponent_t));
   1123 		row = component.row;
   1124 		column = component.column;
   1125 		printf("Rebuild: %d %d\n",row, column);
   1126 		if ((row < 0) || (row >= raidPtr->numRow) ||
   1127 		    (column < 0) || (column >= raidPtr->numCol)) {
   1128 			return(EINVAL);
   1129 		}
   1130 
   1131 		RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
   1132 		if (rrcopy == NULL)
   1133 			return(ENOMEM);
   1134 
   1135 		rrcopy->raidPtr = (void *) raidPtr;
   1136 		rrcopy->row = row;
   1137 		rrcopy->col = column;
   1138 
   1139 		retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
   1140 					   rf_ReconstructInPlaceThread,
   1141 					   rrcopy,"raid_reconip");
   1142 		return(retcode);
   1143 
   1144 	case RAIDFRAME_GET_INFO:
   1145 		if (!raidPtr->valid)
   1146 			return (ENODEV);
   1147 		ucfgp = (RF_DeviceConfig_t **) data;
   1148 		RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t),
   1149 			  (RF_DeviceConfig_t *));
   1150 		if (d_cfg == NULL)
   1151 			return (ENOMEM);
   1152 		bzero((char *) d_cfg, sizeof(RF_DeviceConfig_t));
   1153 		d_cfg->rows = raidPtr->numRow;
   1154 		d_cfg->cols = raidPtr->numCol;
   1155 		d_cfg->ndevs = raidPtr->numRow * raidPtr->numCol;
   1156 		if (d_cfg->ndevs >= RF_MAX_DISKS) {
   1157 			RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
   1158 			return (ENOMEM);
   1159 		}
   1160 		d_cfg->nspares = raidPtr->numSpare;
   1161 		if (d_cfg->nspares >= RF_MAX_DISKS) {
   1162 			RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
   1163 			return (ENOMEM);
   1164 		}
   1165 		d_cfg->maxqdepth = raidPtr->maxQueueDepth;
   1166 		d = 0;
   1167 		for (i = 0; i < d_cfg->rows; i++) {
   1168 			for (j = 0; j < d_cfg->cols; j++) {
   1169 				d_cfg->devs[d] = raidPtr->Disks[i][j];
   1170 				d++;
   1171 			}
   1172 		}
   1173 		for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) {
   1174 			d_cfg->spares[i] = raidPtr->Disks[0][j];
   1175 		}
   1176 		retcode = copyout((caddr_t) d_cfg, (caddr_t) * ucfgp,
   1177 				  sizeof(RF_DeviceConfig_t));
   1178 		RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
   1179 
   1180 		return (retcode);
   1181 
   1182 	case RAIDFRAME_CHECK_PARITY:
   1183 		*(int *) data = raidPtr->parity_good;
   1184 		return (0);
   1185 
   1186 	case RAIDFRAME_RESET_ACCTOTALS:
   1187 		bzero(&raidPtr->acc_totals, sizeof(raidPtr->acc_totals));
   1188 		return (0);
   1189 
   1190 	case RAIDFRAME_GET_ACCTOTALS:
   1191 		totals = (RF_AccTotals_t *) data;
   1192 		*totals = raidPtr->acc_totals;
   1193 		return (0);
   1194 
   1195 	case RAIDFRAME_KEEP_ACCTOTALS:
   1196 		raidPtr->keep_acc_totals = *(int *)data;
   1197 		return (0);
   1198 
   1199 	case RAIDFRAME_GET_SIZE:
   1200 		*(int *) data = raidPtr->totalSectors;
   1201 		return (0);
   1202 
   1203 		/* fail a disk & optionally start reconstruction */
   1204 	case RAIDFRAME_FAIL_DISK:
   1205 
   1206 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1207 			/* Can't do this on a RAID 0!! */
   1208 			return(EINVAL);
   1209 		}
   1210 
   1211 		rr = (struct rf_recon_req *) data;
   1212 
   1213 		if (rr->row < 0 || rr->row >= raidPtr->numRow
   1214 		    || rr->col < 0 || rr->col >= raidPtr->numCol)
   1215 			return (EINVAL);
   1216 
   1217 		printf("raid%d: Failing the disk: row: %d col: %d\n",
   1218 		       unit, rr->row, rr->col);
   1219 
   1220 		/* make a copy of the recon request so that we don't rely on
   1221 		 * the user's buffer */
   1222 		RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
   1223 		if (rrcopy == NULL)
   1224 			return(ENOMEM);
   1225 		bcopy(rr, rrcopy, sizeof(*rr));
   1226 		rrcopy->raidPtr = (void *) raidPtr;
   1227 
   1228 		retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
   1229 					   rf_ReconThread,
   1230 					   rrcopy,"raid_recon");
   1231 		return (0);
   1232 
   1233 		/* invoke a copyback operation after recon on whatever disk
   1234 		 * needs it, if any */
   1235 	case RAIDFRAME_COPYBACK:
   1236 
   1237 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1238 			/* This makes no sense on a RAID 0!! */
   1239 			return(EINVAL);
   1240 		}
   1241 
   1242 		if (raidPtr->copyback_in_progress == 1) {
   1243 			/* Copyback is already in progress! */
   1244 			return(EINVAL);
   1245 		}
   1246 
   1247 		retcode = RF_CREATE_THREAD(raidPtr->copyback_thread,
   1248 					   rf_CopybackThread,
   1249 					   raidPtr,"raid_copyback");
   1250 		return (retcode);
   1251 
   1252 		/* return the percentage completion of reconstruction */
   1253 	case RAIDFRAME_CHECK_RECON_STATUS:
   1254 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1255 			/* This makes no sense on a RAID 0 */
   1256 			return(EINVAL);
   1257 		}
   1258 		row = 0; /* XXX we only consider a single row... */
   1259 		if (raidPtr->status[row] != rf_rs_reconstructing)
   1260 			*(int *) data = 100;
   1261 		else
   1262 			*(int *) data = raidPtr->reconControl[row]->percentComplete;
   1263 		return (0);
   1264 
   1265 	case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
   1266 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1267 			/* This makes no sense on a RAID 0 */
   1268 			return(EINVAL);
   1269 		}
   1270 		if (raidPtr->parity_rewrite_in_progress == 1) {
   1271 			*(int *) data = 100 * raidPtr->parity_rewrite_stripes_done / raidPtr->Layout.numStripe;
   1272 		} else {
   1273 			*(int *) data = 100;
   1274 		}
   1275 		return (0);
   1276 
   1277 	case RAIDFRAME_CHECK_COPYBACK_STATUS:
   1278 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1279 			/* This makes no sense on a RAID 0 */
   1280 			return(EINVAL);
   1281 		}
   1282 		if (raidPtr->copyback_in_progress == 1) {
   1283 			*(int *) data = 100 * raidPtr->copyback_stripes_done /
   1284 				raidPtr->Layout.numStripe;
   1285 		} else {
   1286 			*(int *) data = 100;
   1287 		}
   1288 		return (0);
   1289 
   1290 
   1291 		/* the sparetable daemon calls this to wait for the kernel to
   1292 		 * need a spare table. this ioctl does not return until a
   1293 		 * spare table is needed. XXX -- calling mpsleep here in the
   1294 		 * ioctl code is almost certainly wrong and evil. -- XXX XXX
   1295 		 * -- I should either compute the spare table in the kernel,
   1296 		 * or have a different -- XXX XXX -- interface (a different
   1297 		 * character device) for delivering the table     -- XXX */
   1298 #if 0
   1299 	case RAIDFRAME_SPARET_WAIT:
   1300 		RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1301 		while (!rf_sparet_wait_queue)
   1302 			mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE);
   1303 		waitreq = rf_sparet_wait_queue;
   1304 		rf_sparet_wait_queue = rf_sparet_wait_queue->next;
   1305 		RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1306 
   1307 		/* structure assignment */
   1308 		*((RF_SparetWait_t *) data) = *waitreq;
   1309 
   1310 		RF_Free(waitreq, sizeof(*waitreq));
   1311 		return (0);
   1312 
   1313 		/* wakes up a process waiting on SPARET_WAIT and puts an error
   1314 		 * code in it that will cause the dameon to exit */
   1315 	case RAIDFRAME_ABORT_SPARET_WAIT:
   1316 		RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
   1317 		waitreq->fcol = -1;
   1318 		RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1319 		waitreq->next = rf_sparet_wait_queue;
   1320 		rf_sparet_wait_queue = waitreq;
   1321 		RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1322 		wakeup(&rf_sparet_wait_queue);
   1323 		return (0);
   1324 
   1325 		/* used by the spare table daemon to deliver a spare table
   1326 		 * into the kernel */
   1327 	case RAIDFRAME_SEND_SPARET:
   1328 
   1329 		/* install the spare table */
   1330 		retcode = rf_SetSpareTable(raidPtr, *(void **) data);
   1331 
   1332 		/* respond to the requestor.  the return status of the spare
   1333 		 * table installation is passed in the "fcol" field */
   1334 		RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
   1335 		waitreq->fcol = retcode;
   1336 		RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1337 		waitreq->next = rf_sparet_resp_queue;
   1338 		rf_sparet_resp_queue = waitreq;
   1339 		wakeup(&rf_sparet_resp_queue);
   1340 		RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1341 
   1342 		return (retcode);
   1343 #endif
   1344 
   1345 	default:
   1346 		break; /* fall through to the os-specific code below */
   1347 
   1348 	}
   1349 
   1350 	if (!raidPtr->valid)
   1351 		return (EINVAL);
   1352 
   1353 	/*
   1354 	 * Add support for "regular" device ioctls here.
   1355 	 */
   1356 
   1357 	switch (cmd) {
   1358 	case DIOCGDINFO:
   1359 		*(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
   1360 		break;
   1361 
   1362 	case DIOCGPART:
   1363 		((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
   1364 		((struct partinfo *) data)->part =
   1365 		    &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
   1366 		break;
   1367 
   1368 	case DIOCWDINFO:
   1369 	case DIOCSDINFO:
   1370 		if ((error = raidlock(rs)) != 0)
   1371 			return (error);
   1372 
   1373 		rs->sc_flags |= RAIDF_LABELLING;
   1374 
   1375 		error = setdisklabel(rs->sc_dkdev.dk_label,
   1376 		    (struct disklabel *) data, 0, rs->sc_dkdev.dk_cpulabel);
   1377 		if (error == 0) {
   1378 			if (cmd == DIOCWDINFO)
   1379 				error = writedisklabel(RAIDLABELDEV(dev),
   1380 				    raidstrategy, rs->sc_dkdev.dk_label,
   1381 				    rs->sc_dkdev.dk_cpulabel);
   1382 		}
   1383 		rs->sc_flags &= ~RAIDF_LABELLING;
   1384 
   1385 		raidunlock(rs);
   1386 
   1387 		if (error)
   1388 			return (error);
   1389 		break;
   1390 
   1391 	case DIOCWLABEL:
   1392 		if (*(int *) data != 0)
   1393 			rs->sc_flags |= RAIDF_WLABEL;
   1394 		else
   1395 			rs->sc_flags &= ~RAIDF_WLABEL;
   1396 		break;
   1397 
   1398 	case DIOCGDEFLABEL:
   1399 		raidgetdefaultlabel(raidPtr, rs,
   1400 		    (struct disklabel *) data);
   1401 		break;
   1402 
   1403 	default:
   1404 		retcode = ENOTTY;
   1405 	}
   1406 	return (retcode);
   1407 
   1408 }
   1409 
   1410 
   1411 /* raidinit -- complete the rest of the initialization for the
   1412    RAIDframe device.  */
   1413 
   1414 
   1415 static void
   1416 raidinit(raidPtr)
   1417 	RF_Raid_t *raidPtr;
   1418 {
   1419 	struct raid_softc *rs;
   1420 	int     unit;
   1421 
   1422 	unit = raidPtr->raidid;
   1423 
   1424 	rs = &raid_softc[unit];
   1425 	pool_init(&rs->sc_cbufpool, sizeof(struct raidbuf), 0,
   1426 		  0, 0, "raidpl", 0, NULL, NULL, M_RAIDFRAME);
   1427 
   1428 
   1429 	/* XXX should check return code first... */
   1430 	rs->sc_flags |= RAIDF_INITED;
   1431 
   1432 	sprintf(rs->sc_xname, "raid%d", unit);	/* XXX doesn't check bounds. */
   1433 
   1434 	rs->sc_dkdev.dk_name = rs->sc_xname;
   1435 
   1436 	/* disk_attach actually creates space for the CPU disklabel, among
   1437 	 * other things, so it's critical to call this *BEFORE* we try putzing
   1438 	 * with disklabels. */
   1439 
   1440 	disk_attach(&rs->sc_dkdev);
   1441 
   1442 	/* XXX There may be a weird interaction here between this, and
   1443 	 * protectedSectors, as used in RAIDframe.  */
   1444 
   1445 	rs->sc_size = raidPtr->totalSectors;
   1446 
   1447 }
   1448 
   1449 /* wake up the daemon & tell it to get us a spare table
   1450  * XXX
   1451  * the entries in the queues should be tagged with the raidPtr
   1452  * so that in the extremely rare case that two recons happen at once,
   1453  * we know for which device were requesting a spare table
   1454  * XXX
   1455  *
   1456  * XXX This code is not currently used. GO
   1457  */
   1458 int
   1459 rf_GetSpareTableFromDaemon(req)
   1460 	RF_SparetWait_t *req;
   1461 {
   1462 	int     retcode;
   1463 
   1464 	RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1465 	req->next = rf_sparet_wait_queue;
   1466 	rf_sparet_wait_queue = req;
   1467 	wakeup(&rf_sparet_wait_queue);
   1468 
   1469 	/* mpsleep unlocks the mutex */
   1470 	while (!rf_sparet_resp_queue) {
   1471 		tsleep(&rf_sparet_resp_queue, PRIBIO,
   1472 		    "raidframe getsparetable", 0);
   1473 	}
   1474 	req = rf_sparet_resp_queue;
   1475 	rf_sparet_resp_queue = req->next;
   1476 	RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1477 
   1478 	retcode = req->fcol;
   1479 	RF_Free(req, sizeof(*req));	/* this is not the same req as we
   1480 					 * alloc'd */
   1481 	return (retcode);
   1482 }
   1483 
   1484 /* a wrapper around rf_DoAccess that extracts appropriate info from the
   1485  * bp & passes it down.
   1486  * any calls originating in the kernel must use non-blocking I/O
   1487  * do some extra sanity checking to return "appropriate" error values for
   1488  * certain conditions (to make some standard utilities work)
   1489  *
   1490  * Formerly known as: rf_DoAccessKernel
   1491  */
   1492 void
   1493 raidstart(raidPtr)
   1494 	RF_Raid_t *raidPtr;
   1495 {
   1496 	RF_SectorCount_t num_blocks, pb, sum;
   1497 	RF_RaidAddr_t raid_addr;
   1498 	int     retcode;
   1499 	struct partition *pp;
   1500 	daddr_t blocknum;
   1501 	int     unit;
   1502 	struct raid_softc *rs;
   1503 	int     do_async;
   1504 	struct buf *bp;
   1505 
   1506 	unit = raidPtr->raidid;
   1507 	rs = &raid_softc[unit];
   1508 
   1509 	/* quick check to see if anything has died recently */
   1510 	RF_LOCK_MUTEX(raidPtr->mutex);
   1511 	if (raidPtr->numNewFailures > 0) {
   1512 		rf_update_component_labels(raidPtr);
   1513 		raidPtr->numNewFailures--;
   1514 	}
   1515 	RF_UNLOCK_MUTEX(raidPtr->mutex);
   1516 
   1517 	/* Check to see if we're at the limit... */
   1518 	RF_LOCK_MUTEX(raidPtr->mutex);
   1519 	while (raidPtr->openings > 0) {
   1520 		RF_UNLOCK_MUTEX(raidPtr->mutex);
   1521 
   1522 		/* get the next item, if any, from the queue */
   1523 		if ((bp = BUFQ_FIRST(&rs->buf_queue)) == NULL) {
   1524 			/* nothing more to do */
   1525 			return;
   1526 		}
   1527 		BUFQ_REMOVE(&rs->buf_queue, bp);
   1528 
   1529 		/* Ok, for the bp we have here, bp->b_blkno is relative to the
   1530 		 * partition.. Need to make it absolute to the underlying
   1531 		 * device.. */
   1532 
   1533 		blocknum = bp->b_blkno;
   1534 		if (DISKPART(bp->b_dev) != RAW_PART) {
   1535 			pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
   1536 			blocknum += pp->p_offset;
   1537 		}
   1538 
   1539 		db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
   1540 			    (int) blocknum));
   1541 
   1542 		db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
   1543 		db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
   1544 
   1545 		/* *THIS* is where we adjust what block we're going to...
   1546 		 * but DO NOT TOUCH bp->b_blkno!!! */
   1547 		raid_addr = blocknum;
   1548 
   1549 		num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
   1550 		pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
   1551 		sum = raid_addr + num_blocks + pb;
   1552 		if (1 || rf_debugKernelAccess) {
   1553 			db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
   1554 				    (int) raid_addr, (int) sum, (int) num_blocks,
   1555 				    (int) pb, (int) bp->b_resid));
   1556 		}
   1557 		if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
   1558 		    || (sum < num_blocks) || (sum < pb)) {
   1559 			bp->b_error = ENOSPC;
   1560 			bp->b_flags |= B_ERROR;
   1561 			bp->b_resid = bp->b_bcount;
   1562 			biodone(bp);
   1563 			RF_LOCK_MUTEX(raidPtr->mutex);
   1564 			continue;
   1565 		}
   1566 		/*
   1567 		 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
   1568 		 */
   1569 
   1570 		if (bp->b_bcount & raidPtr->sectorMask) {
   1571 			bp->b_error = EINVAL;
   1572 			bp->b_flags |= B_ERROR;
   1573 			bp->b_resid = bp->b_bcount;
   1574 			biodone(bp);
   1575 			RF_LOCK_MUTEX(raidPtr->mutex);
   1576 			continue;
   1577 
   1578 		}
   1579 		db1_printf(("Calling DoAccess..\n"));
   1580 
   1581 
   1582 		RF_LOCK_MUTEX(raidPtr->mutex);
   1583 		raidPtr->openings--;
   1584 		RF_UNLOCK_MUTEX(raidPtr->mutex);
   1585 
   1586 		/*
   1587 		 * Everything is async.
   1588 		 */
   1589 		do_async = 1;
   1590 
   1591 		/* don't ever condition on bp->b_flags & B_WRITE.
   1592 		 * always condition on B_READ instead */
   1593 
   1594 		/* XXX we're still at splbio() here... do we *really*
   1595 		   need to be? */
   1596 
   1597 
   1598 		retcode = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
   1599 				      RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
   1600 				      do_async, raid_addr, num_blocks,
   1601 				      bp->b_un.b_addr, bp, NULL, NULL,
   1602 				      RF_DAG_NONBLOCKING_IO, NULL, NULL, NULL);
   1603 
   1604 
   1605 		RF_LOCK_MUTEX(raidPtr->mutex);
   1606 	}
   1607 	RF_UNLOCK_MUTEX(raidPtr->mutex);
   1608 }
   1609 
   1610 
   1611 
   1612 
   1613 /* invoke an I/O from kernel mode.  Disk queue should be locked upon entry */
   1614 
   1615 int
   1616 rf_DispatchKernelIO(queue, req)
   1617 	RF_DiskQueue_t *queue;
   1618 	RF_DiskQueueData_t *req;
   1619 {
   1620 	int     op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
   1621 	struct buf *bp;
   1622 	struct raidbuf *raidbp = NULL;
   1623 	struct raid_softc *rs;
   1624 	int     unit;
   1625 	int s;
   1626 
   1627 	s=0;
   1628 	/* s = splbio();*/ /* want to test this */
   1629 	/* XXX along with the vnode, we also need the softc associated with
   1630 	 * this device.. */
   1631 
   1632 	req->queue = queue;
   1633 
   1634 	unit = queue->raidPtr->raidid;
   1635 
   1636 	db1_printf(("DispatchKernelIO unit: %d\n", unit));
   1637 
   1638 	if (unit >= numraid) {
   1639 		printf("Invalid unit number: %d %d\n", unit, numraid);
   1640 		panic("Invalid Unit number in rf_DispatchKernelIO\n");
   1641 	}
   1642 	rs = &raid_softc[unit];
   1643 
   1644 	/* XXX is this the right place? */
   1645 	disk_busy(&rs->sc_dkdev);
   1646 
   1647 	bp = req->bp;
   1648 #if 1
   1649 	/* XXX when there is a physical disk failure, someone is passing us a
   1650 	 * buffer that contains old stuff!!  Attempt to deal with this problem
   1651 	 * without taking a performance hit... (not sure where the real bug
   1652 	 * is.  It's buried in RAIDframe somewhere) :-(  GO ) */
   1653 
   1654 	if (bp->b_flags & B_ERROR) {
   1655 		bp->b_flags &= ~B_ERROR;
   1656 	}
   1657 	if (bp->b_error != 0) {
   1658 		bp->b_error = 0;
   1659 	}
   1660 #endif
   1661 	raidbp = RAIDGETBUF(rs);
   1662 
   1663 	raidbp->rf_flags = 0;	/* XXX not really used anywhere... */
   1664 
   1665 	/*
   1666 	 * context for raidiodone
   1667 	 */
   1668 	raidbp->rf_obp = bp;
   1669 	raidbp->req = req;
   1670 
   1671 	LIST_INIT(&raidbp->rf_buf.b_dep);
   1672 
   1673 	switch (req->type) {
   1674 	case RF_IO_TYPE_NOP:	/* used primarily to unlock a locked queue */
   1675 		/* XXX need to do something extra here.. */
   1676 		/* I'm leaving this in, as I've never actually seen it used,
   1677 		 * and I'd like folks to report it... GO */
   1678 		printf(("WAKEUP CALLED\n"));
   1679 		queue->numOutstanding++;
   1680 
   1681 		/* XXX need to glue the original buffer into this??  */
   1682 
   1683 		KernelWakeupFunc(&raidbp->rf_buf);
   1684 		break;
   1685 
   1686 	case RF_IO_TYPE_READ:
   1687 	case RF_IO_TYPE_WRITE:
   1688 
   1689 		if (req->tracerec) {
   1690 			RF_ETIMER_START(req->tracerec->timer);
   1691 		}
   1692 		InitBP(&raidbp->rf_buf, queue->rf_cinfo->ci_vp,
   1693 		    op | bp->b_flags, queue->rf_cinfo->ci_dev,
   1694 		    req->sectorOffset, req->numSector,
   1695 		    req->buf, KernelWakeupFunc, (void *) req,
   1696 		    queue->raidPtr->logBytesPerSector, req->b_proc);
   1697 
   1698 		if (rf_debugKernelAccess) {
   1699 			db1_printf(("dispatch: bp->b_blkno = %ld\n",
   1700 				(long) bp->b_blkno));
   1701 		}
   1702 		queue->numOutstanding++;
   1703 		queue->last_deq_sector = req->sectorOffset;
   1704 		/* acc wouldn't have been let in if there were any pending
   1705 		 * reqs at any other priority */
   1706 		queue->curPriority = req->priority;
   1707 
   1708 		db1_printf(("Going for %c to unit %d row %d col %d\n",
   1709 			req->type, unit, queue->row, queue->col));
   1710 		db1_printf(("sector %d count %d (%d bytes) %d\n",
   1711 			(int) req->sectorOffset, (int) req->numSector,
   1712 			(int) (req->numSector <<
   1713 			    queue->raidPtr->logBytesPerSector),
   1714 			(int) queue->raidPtr->logBytesPerSector));
   1715 		if ((raidbp->rf_buf.b_flags & B_READ) == 0) {
   1716 			raidbp->rf_buf.b_vp->v_numoutput++;
   1717 		}
   1718 		VOP_STRATEGY(&raidbp->rf_buf);
   1719 
   1720 		break;
   1721 
   1722 	default:
   1723 		panic("bad req->type in rf_DispatchKernelIO");
   1724 	}
   1725 	db1_printf(("Exiting from DispatchKernelIO\n"));
   1726 	/* splx(s); */ /* want to test this */
   1727 	return (0);
   1728 }
   1729 /* this is the callback function associated with a I/O invoked from
   1730    kernel code.
   1731  */
   1732 static void
   1733 KernelWakeupFunc(vbp)
   1734 	struct buf *vbp;
   1735 {
   1736 	RF_DiskQueueData_t *req = NULL;
   1737 	RF_DiskQueue_t *queue;
   1738 	struct raidbuf *raidbp = (struct raidbuf *) vbp;
   1739 	struct buf *bp;
   1740 	struct raid_softc *rs;
   1741 	int     unit;
   1742 	register int s;
   1743 
   1744 	s = splbio();
   1745 	db1_printf(("recovering the request queue:\n"));
   1746 	req = raidbp->req;
   1747 
   1748 	bp = raidbp->rf_obp;
   1749 
   1750 	queue = (RF_DiskQueue_t *) req->queue;
   1751 
   1752 	if (raidbp->rf_buf.b_flags & B_ERROR) {
   1753 		bp->b_flags |= B_ERROR;
   1754 		bp->b_error = raidbp->rf_buf.b_error ?
   1755 		    raidbp->rf_buf.b_error : EIO;
   1756 	}
   1757 
   1758 	/* XXX methinks this could be wrong... */
   1759 #if 1
   1760 	bp->b_resid = raidbp->rf_buf.b_resid;
   1761 #endif
   1762 
   1763 	if (req->tracerec) {
   1764 		RF_ETIMER_STOP(req->tracerec->timer);
   1765 		RF_ETIMER_EVAL(req->tracerec->timer);
   1766 		RF_LOCK_MUTEX(rf_tracing_mutex);
   1767 		req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
   1768 		req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
   1769 		req->tracerec->num_phys_ios++;
   1770 		RF_UNLOCK_MUTEX(rf_tracing_mutex);
   1771 	}
   1772 	bp->b_bcount = raidbp->rf_buf.b_bcount;	/* XXXX ?? */
   1773 
   1774 	unit = queue->raidPtr->raidid;	/* *Much* simpler :-> */
   1775 
   1776 
   1777 	/* XXX Ok, let's get aggressive... If B_ERROR is set, let's go
   1778 	 * ballistic, and mark the component as hosed... */
   1779 
   1780 	if (bp->b_flags & B_ERROR) {
   1781 		/* Mark the disk as dead */
   1782 		/* but only mark it once... */
   1783 		if (queue->raidPtr->Disks[queue->row][queue->col].status ==
   1784 		    rf_ds_optimal) {
   1785 			printf("raid%d: IO Error.  Marking %s as failed.\n",
   1786 			    unit, queue->raidPtr->Disks[queue->row][queue->col].devname);
   1787 			queue->raidPtr->Disks[queue->row][queue->col].status =
   1788 			    rf_ds_failed;
   1789 			queue->raidPtr->status[queue->row] = rf_rs_degraded;
   1790 			queue->raidPtr->numFailures++;
   1791 			queue->raidPtr->numNewFailures++;
   1792 			/* XXX here we should bump the version number for each component, and write that data out */
   1793 		} else {	/* Disk is already dead... */
   1794 			/* printf("Disk already marked as dead!\n"); */
   1795 		}
   1796 
   1797 	}
   1798 
   1799 	rs = &raid_softc[unit];
   1800 	RAIDPUTBUF(rs, raidbp);
   1801 
   1802 
   1803 	if (bp->b_resid == 0) {
   1804 		/* XXX is this the right place for a disk_unbusy()??!??!?!? */
   1805 		disk_unbusy(&rs->sc_dkdev, (bp->b_bcount - bp->b_resid));
   1806 	}
   1807 
   1808 	rf_DiskIOComplete(queue, req, (bp->b_flags & B_ERROR) ? 1 : 0);
   1809 	(req->CompleteFunc) (req->argument, (bp->b_flags & B_ERROR) ? 1 : 0);
   1810 
   1811 	splx(s);
   1812 }
   1813 
   1814 
   1815 
   1816 /*
   1817  * initialize a buf structure for doing an I/O in the kernel.
   1818  */
   1819 static void
   1820 InitBP(
   1821     struct buf * bp,
   1822     struct vnode * b_vp,
   1823     unsigned rw_flag,
   1824     dev_t dev,
   1825     RF_SectorNum_t startSect,
   1826     RF_SectorCount_t numSect,
   1827     caddr_t buf,
   1828     void (*cbFunc) (struct buf *),
   1829     void *cbArg,
   1830     int logBytesPerSector,
   1831     struct proc * b_proc)
   1832 {
   1833 	/* bp->b_flags       = B_PHYS | rw_flag; */
   1834 	bp->b_flags = B_CALL | rw_flag;	/* XXX need B_PHYS here too??? */
   1835 	bp->b_bcount = numSect << logBytesPerSector;
   1836 	bp->b_bufsize = bp->b_bcount;
   1837 	bp->b_error = 0;
   1838 	bp->b_dev = dev;
   1839 	bp->b_un.b_addr = buf;
   1840 	bp->b_blkno = startSect;
   1841 	bp->b_resid = bp->b_bcount;	/* XXX is this right!??!?!! */
   1842 	if (bp->b_bcount == 0) {
   1843 		panic("bp->b_bcount is zero in InitBP!!\n");
   1844 	}
   1845 	bp->b_proc = b_proc;
   1846 	bp->b_iodone = cbFunc;
   1847 	bp->b_vp = b_vp;
   1848 
   1849 }
   1850 
   1851 static void
   1852 raidgetdefaultlabel(raidPtr, rs, lp)
   1853 	RF_Raid_t *raidPtr;
   1854 	struct raid_softc *rs;
   1855 	struct disklabel *lp;
   1856 {
   1857 	db1_printf(("Building a default label...\n"));
   1858 	bzero(lp, sizeof(*lp));
   1859 
   1860 	/* fabricate a label... */
   1861 	lp->d_secperunit = raidPtr->totalSectors;
   1862 	lp->d_secsize = raidPtr->bytesPerSector;
   1863 	lp->d_nsectors = raidPtr->Layout.dataSectorsPerStripe;
   1864 	lp->d_ntracks = 1;
   1865 	lp->d_ncylinders = raidPtr->totalSectors /
   1866 		(lp->d_nsectors * lp->d_ntracks);
   1867 	lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
   1868 
   1869 	strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
   1870 	lp->d_type = DTYPE_RAID;
   1871 	strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
   1872 	lp->d_rpm = 3600;
   1873 	lp->d_interleave = 1;
   1874 	lp->d_flags = 0;
   1875 
   1876 	lp->d_partitions[RAW_PART].p_offset = 0;
   1877 	lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors;
   1878 	lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
   1879 	lp->d_npartitions = RAW_PART + 1;
   1880 
   1881 	lp->d_magic = DISKMAGIC;
   1882 	lp->d_magic2 = DISKMAGIC;
   1883 	lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
   1884 
   1885 }
   1886 /*
   1887  * Read the disklabel from the raid device.  If one is not present, fake one
   1888  * up.
   1889  */
   1890 static void
   1891 raidgetdisklabel(dev)
   1892 	dev_t   dev;
   1893 {
   1894 	int     unit = raidunit(dev);
   1895 	struct raid_softc *rs = &raid_softc[unit];
   1896 	char   *errstring;
   1897 	struct disklabel *lp = rs->sc_dkdev.dk_label;
   1898 	struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel;
   1899 	RF_Raid_t *raidPtr;
   1900 
   1901 	db1_printf(("Getting the disklabel...\n"));
   1902 
   1903 	bzero(clp, sizeof(*clp));
   1904 
   1905 	raidPtr = raidPtrs[unit];
   1906 
   1907 	raidgetdefaultlabel(raidPtr, rs, lp);
   1908 
   1909 	/*
   1910 	 * Call the generic disklabel extraction routine.
   1911 	 */
   1912 	errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
   1913 	    rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
   1914 	if (errstring)
   1915 		raidmakedisklabel(rs);
   1916 	else {
   1917 		int     i;
   1918 		struct partition *pp;
   1919 
   1920 		/*
   1921 		 * Sanity check whether the found disklabel is valid.
   1922 		 *
   1923 		 * This is necessary since total size of the raid device
   1924 		 * may vary when an interleave is changed even though exactly
   1925 		 * same componets are used, and old disklabel may used
   1926 		 * if that is found.
   1927 		 */
   1928 		if (lp->d_secperunit != rs->sc_size)
   1929 			printf("WARNING: %s: "
   1930 			    "total sector size in disklabel (%d) != "
   1931 			    "the size of raid (%ld)\n", rs->sc_xname,
   1932 			    lp->d_secperunit, (long) rs->sc_size);
   1933 		for (i = 0; i < lp->d_npartitions; i++) {
   1934 			pp = &lp->d_partitions[i];
   1935 			if (pp->p_offset + pp->p_size > rs->sc_size)
   1936 				printf("WARNING: %s: end of partition `%c' "
   1937 				    "exceeds the size of raid (%ld)\n",
   1938 				    rs->sc_xname, 'a' + i, (long) rs->sc_size);
   1939 		}
   1940 	}
   1941 
   1942 }
   1943 /*
   1944  * Take care of things one might want to take care of in the event
   1945  * that a disklabel isn't present.
   1946  */
   1947 static void
   1948 raidmakedisklabel(rs)
   1949 	struct raid_softc *rs;
   1950 {
   1951 	struct disklabel *lp = rs->sc_dkdev.dk_label;
   1952 	db1_printf(("Making a label..\n"));
   1953 
   1954 	/*
   1955 	 * For historical reasons, if there's no disklabel present
   1956 	 * the raw partition must be marked FS_BSDFFS.
   1957 	 */
   1958 
   1959 	lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
   1960 
   1961 	strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
   1962 
   1963 	lp->d_checksum = dkcksum(lp);
   1964 }
   1965 /*
   1966  * Lookup the provided name in the filesystem.  If the file exists,
   1967  * is a valid block device, and isn't being used by anyone else,
   1968  * set *vpp to the file's vnode.
   1969  * You'll find the original of this in ccd.c
   1970  */
   1971 int
   1972 raidlookup(path, p, vpp)
   1973 	char   *path;
   1974 	struct proc *p;
   1975 	struct vnode **vpp;	/* result */
   1976 {
   1977 	struct nameidata nd;
   1978 	struct vnode *vp;
   1979 	struct vattr va;
   1980 	int     error;
   1981 
   1982 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, path, p);
   1983 	if ((error = vn_open(&nd, FREAD | FWRITE, 0)) != 0) {
   1984 #ifdef DEBUG
   1985 		printf("RAIDframe: vn_open returned %d\n", error);
   1986 #endif
   1987 		return (error);
   1988 	}
   1989 	vp = nd.ni_vp;
   1990 	if (vp->v_usecount > 1) {
   1991 		VOP_UNLOCK(vp, 0);
   1992 		(void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
   1993 		return (EBUSY);
   1994 	}
   1995 	if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)) != 0) {
   1996 		VOP_UNLOCK(vp, 0);
   1997 		(void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
   1998 		return (error);
   1999 	}
   2000 	/* XXX: eventually we should handle VREG, too. */
   2001 	if (va.va_type != VBLK) {
   2002 		VOP_UNLOCK(vp, 0);
   2003 		(void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
   2004 		return (ENOTBLK);
   2005 	}
   2006 	VOP_UNLOCK(vp, 0);
   2007 	*vpp = vp;
   2008 	return (0);
   2009 }
   2010 /*
   2011  * Wait interruptibly for an exclusive lock.
   2012  *
   2013  * XXX
   2014  * Several drivers do this; it should be abstracted and made MP-safe.
   2015  * (Hmm... where have we seen this warning before :->  GO )
   2016  */
   2017 static int
   2018 raidlock(rs)
   2019 	struct raid_softc *rs;
   2020 {
   2021 	int     error;
   2022 
   2023 	while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
   2024 		rs->sc_flags |= RAIDF_WANTED;
   2025 		if ((error =
   2026 			tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
   2027 			return (error);
   2028 	}
   2029 	rs->sc_flags |= RAIDF_LOCKED;
   2030 	return (0);
   2031 }
   2032 /*
   2033  * Unlock and wake up any waiters.
   2034  */
   2035 static void
   2036 raidunlock(rs)
   2037 	struct raid_softc *rs;
   2038 {
   2039 
   2040 	rs->sc_flags &= ~RAIDF_LOCKED;
   2041 	if ((rs->sc_flags & RAIDF_WANTED) != 0) {
   2042 		rs->sc_flags &= ~RAIDF_WANTED;
   2043 		wakeup(rs);
   2044 	}
   2045 }
   2046 
   2047 
   2048 #define RF_COMPONENT_INFO_OFFSET  16384 /* bytes */
   2049 #define RF_COMPONENT_INFO_SIZE     1024 /* bytes */
   2050 
   2051 int
   2052 raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter)
   2053 {
   2054 	RF_ComponentLabel_t clabel;
   2055 	raidread_component_label(dev, b_vp, &clabel);
   2056 	clabel.mod_counter = mod_counter;
   2057 	clabel.clean = RF_RAID_CLEAN;
   2058 	raidwrite_component_label(dev, b_vp, &clabel);
   2059 	return(0);
   2060 }
   2061 
   2062 
   2063 int
   2064 raidmarkdirty(dev_t dev, struct vnode *b_vp, int mod_counter)
   2065 {
   2066 	RF_ComponentLabel_t clabel;
   2067 	raidread_component_label(dev, b_vp, &clabel);
   2068 	clabel.mod_counter = mod_counter;
   2069 	clabel.clean = RF_RAID_DIRTY;
   2070 	raidwrite_component_label(dev, b_vp, &clabel);
   2071 	return(0);
   2072 }
   2073 
   2074 /* ARGSUSED */
   2075 int
   2076 raidread_component_label(dev, b_vp, clabel)
   2077 	dev_t dev;
   2078 	struct vnode *b_vp;
   2079 	RF_ComponentLabel_t *clabel;
   2080 {
   2081 	struct buf *bp;
   2082 	int error;
   2083 
   2084 	/* XXX should probably ensure that we don't try to do this if
   2085 	   someone has changed rf_protected_sectors. */
   2086 
   2087 	/* get a block of the appropriate size... */
   2088 	bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
   2089 	bp->b_dev = dev;
   2090 
   2091 	/* get our ducks in a row for the read */
   2092 	bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
   2093 	bp->b_bcount = RF_COMPONENT_INFO_SIZE;
   2094 	bp->b_flags = B_BUSY | B_READ;
   2095  	bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
   2096 
   2097 	(*bdevsw[major(bp->b_dev)].d_strategy)(bp);
   2098 
   2099 	error = biowait(bp);
   2100 
   2101 	if (!error) {
   2102 		memcpy(clabel, bp->b_un.b_addr,
   2103 		       sizeof(RF_ComponentLabel_t));
   2104 #if 0
   2105 		print_component_label( clabel );
   2106 #endif
   2107         } else {
   2108 #if 0
   2109 		printf("Failed to read RAID component label!\n");
   2110 #endif
   2111 	}
   2112 
   2113         bp->b_flags = B_INVAL | B_AGE;
   2114 	brelse(bp);
   2115 	return(error);
   2116 }
   2117 /* ARGSUSED */
   2118 int
   2119 raidwrite_component_label(dev, b_vp, clabel)
   2120 	dev_t dev;
   2121 	struct vnode *b_vp;
   2122 	RF_ComponentLabel_t *clabel;
   2123 {
   2124 	struct buf *bp;
   2125 	int error;
   2126 
   2127 	/* get a block of the appropriate size... */
   2128 	bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
   2129 	bp->b_dev = dev;
   2130 
   2131 	/* get our ducks in a row for the write */
   2132 	bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
   2133 	bp->b_bcount = RF_COMPONENT_INFO_SIZE;
   2134 	bp->b_flags = B_BUSY | B_WRITE;
   2135  	bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
   2136 
   2137 	memset( bp->b_un.b_addr, 0, RF_COMPONENT_INFO_SIZE );
   2138 
   2139 	memcpy( bp->b_un.b_addr, clabel, sizeof(RF_ComponentLabel_t));
   2140 
   2141 	(*bdevsw[major(bp->b_dev)].d_strategy)(bp);
   2142 	error = biowait(bp);
   2143         bp->b_flags = B_INVAL | B_AGE;
   2144 	brelse(bp);
   2145 	if (error) {
   2146 #if 1
   2147 		printf("Failed to write RAID component info!\n");
   2148 #endif
   2149 	}
   2150 
   2151 	return(error);
   2152 }
   2153 
   2154 void
   2155 rf_markalldirty( raidPtr )
   2156 	RF_Raid_t *raidPtr;
   2157 {
   2158 	RF_ComponentLabel_t clabel;
   2159 	int r,c;
   2160 
   2161 	raidPtr->mod_counter++;
   2162 	for (r = 0; r < raidPtr->numRow; r++) {
   2163 		for (c = 0; c < raidPtr->numCol; c++) {
   2164 			if (raidPtr->Disks[r][c].status != rf_ds_failed) {
   2165 				raidread_component_label(
   2166 					raidPtr->Disks[r][c].dev,
   2167 					raidPtr->raid_cinfo[r][c].ci_vp,
   2168 					&clabel);
   2169 				if (clabel.status == rf_ds_spared) {
   2170 					/* XXX do something special...
   2171 					 but whatever you do, don't
   2172 					 try to access it!! */
   2173 				} else {
   2174 #if 0
   2175 				clabel.status =
   2176 					raidPtr->Disks[r][c].status;
   2177 				raidwrite_component_label(
   2178 					raidPtr->Disks[r][c].dev,
   2179 					raidPtr->raid_cinfo[r][c].ci_vp,
   2180 					&clabel);
   2181 #endif
   2182 				raidmarkdirty(
   2183 				       raidPtr->Disks[r][c].dev,
   2184 				       raidPtr->raid_cinfo[r][c].ci_vp,
   2185 				       raidPtr->mod_counter);
   2186 				}
   2187 			}
   2188 		}
   2189 	}
   2190 	/* printf("Component labels marked dirty.\n"); */
   2191 #if 0
   2192 	for( c = 0; c < raidPtr->numSpare ; c++) {
   2193 		sparecol = raidPtr->numCol + c;
   2194 		if (raidPtr->Disks[r][sparecol].status == rf_ds_used_spare) {
   2195 			/*
   2196 
   2197 			   XXX this is where we get fancy and map this spare
   2198 			   into it's correct spot in the array.
   2199 
   2200 			 */
   2201 			/*
   2202 
   2203 			   we claim this disk is "optimal" if it's
   2204 			   rf_ds_used_spare, as that means it should be
   2205 			   directly substitutable for the disk it replaced.
   2206 			   We note that too...
   2207 
   2208 			 */
   2209 
   2210 			for(i=0;i<raidPtr->numRow;i++) {
   2211 				for(j=0;j<raidPtr->numCol;j++) {
   2212 					if ((raidPtr->Disks[i][j].spareRow ==
   2213 					     r) &&
   2214 					    (raidPtr->Disks[i][j].spareCol ==
   2215 					     sparecol)) {
   2216 						srow = r;
   2217 						scol = sparecol;
   2218 						break;
   2219 					}
   2220 				}
   2221 			}
   2222 
   2223 			raidread_component_label(
   2224 				      raidPtr->Disks[r][sparecol].dev,
   2225 				      raidPtr->raid_cinfo[r][sparecol].ci_vp,
   2226 				      &clabel);
   2227 			/* make sure status is noted */
   2228 			clabel.version = RF_COMPONENT_LABEL_VERSION;
   2229 			clabel.mod_counter = raidPtr->mod_counter;
   2230 			clabel.serial_number = raidPtr->serial_number;
   2231 			clabel.row = srow;
   2232 			clabel.column = scol;
   2233 			clabel.num_rows = raidPtr->numRow;
   2234 			clabel.num_columns = raidPtr->numCol;
   2235 			clabel.clean = RF_RAID_DIRTY; /* changed in a bit*/
   2236 			clabel.status = rf_ds_optimal;
   2237 			raidwrite_component_label(
   2238 				      raidPtr->Disks[r][sparecol].dev,
   2239 				      raidPtr->raid_cinfo[r][sparecol].ci_vp,
   2240 				      &clabel);
   2241 			raidmarkclean( raidPtr->Disks[r][sparecol].dev,
   2242 			              raidPtr->raid_cinfo[r][sparecol].ci_vp);
   2243 		}
   2244 	}
   2245 
   2246 #endif
   2247 }
   2248 
   2249 
   2250 void
   2251 rf_update_component_labels( raidPtr )
   2252 	RF_Raid_t *raidPtr;
   2253 {
   2254 	RF_ComponentLabel_t clabel;
   2255 	int sparecol;
   2256 	int r,c;
   2257 	int i,j;
   2258 	int srow, scol;
   2259 
   2260 	srow = -1;
   2261 	scol = -1;
   2262 
   2263 	/* XXX should do extra checks to make sure things really are clean,
   2264 	   rather than blindly setting the clean bit... */
   2265 
   2266 	raidPtr->mod_counter++;
   2267 
   2268 	for (r = 0; r < raidPtr->numRow; r++) {
   2269 		for (c = 0; c < raidPtr->numCol; c++) {
   2270 			if (raidPtr->Disks[r][c].status == rf_ds_optimal) {
   2271 				raidread_component_label(
   2272 					raidPtr->Disks[r][c].dev,
   2273 					raidPtr->raid_cinfo[r][c].ci_vp,
   2274 					&clabel);
   2275 				/* make sure status is noted */
   2276 				clabel.status = rf_ds_optimal;
   2277 				/* bump the counter */
   2278 				clabel.mod_counter = raidPtr->mod_counter;
   2279 
   2280 				raidwrite_component_label(
   2281 					raidPtr->Disks[r][c].dev,
   2282 					raidPtr->raid_cinfo[r][c].ci_vp,
   2283 					&clabel);
   2284 			}
   2285 			/* else we don't touch it.. */
   2286 		}
   2287 	}
   2288 
   2289 	for( c = 0; c < raidPtr->numSpare ; c++) {
   2290 		sparecol = raidPtr->numCol + c;
   2291 		if (raidPtr->Disks[0][sparecol].status == rf_ds_used_spare) {
   2292 			/*
   2293 
   2294 			   we claim this disk is "optimal" if it's
   2295 			   rf_ds_used_spare, as that means it should be
   2296 			   directly substitutable for the disk it replaced.
   2297 			   We note that too...
   2298 
   2299 			 */
   2300 
   2301 			for(i=0;i<raidPtr->numRow;i++) {
   2302 				for(j=0;j<raidPtr->numCol;j++) {
   2303 					if ((raidPtr->Disks[i][j].spareRow ==
   2304 					     0) &&
   2305 					    (raidPtr->Disks[i][j].spareCol ==
   2306 					     sparecol)) {
   2307 						srow = i;
   2308 						scol = j;
   2309 						break;
   2310 					}
   2311 				}
   2312 			}
   2313 
   2314 			/* XXX shouldn't *really* need this... */
   2315 			raidread_component_label(
   2316 				      raidPtr->Disks[0][sparecol].dev,
   2317 				      raidPtr->raid_cinfo[0][sparecol].ci_vp,
   2318 				      &clabel);
   2319 			/* make sure status is noted */
   2320 
   2321 			raid_init_component_label(raidPtr, &clabel);
   2322 
   2323 			clabel.mod_counter = raidPtr->mod_counter;
   2324 			clabel.row = srow;
   2325 			clabel.column = scol;
   2326 			clabel.status = rf_ds_optimal;
   2327 
   2328 			raidwrite_component_label(
   2329 				      raidPtr->Disks[0][sparecol].dev,
   2330 				      raidPtr->raid_cinfo[0][sparecol].ci_vp,
   2331 				      &clabel);
   2332 		}
   2333 	}
   2334 	/* 	printf("Component labels updated\n"); */
   2335 }
   2336 
   2337 
   2338 void
   2339 rf_final_update_component_labels( raidPtr )
   2340 	RF_Raid_t *raidPtr;
   2341 {
   2342 	RF_ComponentLabel_t clabel;
   2343 	int sparecol;
   2344 	int r,c;
   2345 	int i,j;
   2346 	int srow, scol;
   2347 
   2348 	srow = -1;
   2349 	scol = -1;
   2350 
   2351 	/* XXX should do extra checks to make sure things really are clean,
   2352 	   rather than blindly setting the clean bit... */
   2353 
   2354 	raidPtr->mod_counter++;
   2355 
   2356 	for (r = 0; r < raidPtr->numRow; r++) {
   2357 		for (c = 0; c < raidPtr->numCol; c++) {
   2358 			if (raidPtr->Disks[r][c].status == rf_ds_optimal) {
   2359 				raidread_component_label(
   2360 					raidPtr->Disks[r][c].dev,
   2361 					raidPtr->raid_cinfo[r][c].ci_vp,
   2362 					&clabel);
   2363 				/* make sure status is noted */
   2364 				clabel.status = rf_ds_optimal;
   2365 				/* bump the counter */
   2366 				clabel.mod_counter = raidPtr->mod_counter;
   2367 
   2368 				raidwrite_component_label(
   2369 					raidPtr->Disks[r][c].dev,
   2370 					raidPtr->raid_cinfo[r][c].ci_vp,
   2371 					&clabel);
   2372 				if (raidPtr->parity_good == RF_RAID_CLEAN) {
   2373 					raidmarkclean(
   2374 					      raidPtr->Disks[r][c].dev,
   2375 					      raidPtr->raid_cinfo[r][c].ci_vp,
   2376 					      raidPtr->mod_counter);
   2377 				}
   2378 			}
   2379 			/* else we don't touch it.. */
   2380 		}
   2381 	}
   2382 
   2383 	for( c = 0; c < raidPtr->numSpare ; c++) {
   2384 		sparecol = raidPtr->numCol + c;
   2385 		if (raidPtr->Disks[0][sparecol].status == rf_ds_used_spare) {
   2386 			/*
   2387 
   2388 			   we claim this disk is "optimal" if it's
   2389 			   rf_ds_used_spare, as that means it should be
   2390 			   directly substitutable for the disk it replaced.
   2391 			   We note that too...
   2392 
   2393 			 */
   2394 
   2395 			for(i=0;i<raidPtr->numRow;i++) {
   2396 				for(j=0;j<raidPtr->numCol;j++) {
   2397 					if ((raidPtr->Disks[i][j].spareRow ==
   2398 					     0) &&
   2399 					    (raidPtr->Disks[i][j].spareCol ==
   2400 					     sparecol)) {
   2401 						srow = i;
   2402 						scol = j;
   2403 						break;
   2404 					}
   2405 				}
   2406 			}
   2407 
   2408 			/* XXX shouldn't *really* need this... */
   2409 			raidread_component_label(
   2410 				      raidPtr->Disks[0][sparecol].dev,
   2411 				      raidPtr->raid_cinfo[0][sparecol].ci_vp,
   2412 				      &clabel);
   2413 			/* make sure status is noted */
   2414 
   2415 			raid_init_component_label(raidPtr, &clabel);
   2416 
   2417 			clabel.mod_counter = raidPtr->mod_counter;
   2418 			clabel.row = srow;
   2419 			clabel.column = scol;
   2420 			clabel.status = rf_ds_optimal;
   2421 
   2422 			raidwrite_component_label(
   2423 				      raidPtr->Disks[0][sparecol].dev,
   2424 				      raidPtr->raid_cinfo[0][sparecol].ci_vp,
   2425 				      &clabel);
   2426 			if (raidPtr->parity_good == RF_RAID_CLEAN) {
   2427 				raidmarkclean( raidPtr->Disks[0][sparecol].dev,
   2428 			              raidPtr->raid_cinfo[0][sparecol].ci_vp,
   2429 					       raidPtr->mod_counter);
   2430 			}
   2431 		}
   2432 	}
   2433 	/* 	printf("Component labels updated\n"); */
   2434 }
   2435 
   2436 
   2437 void
   2438 rf_ReconThread(req)
   2439 	struct rf_recon_req *req;
   2440 {
   2441 	int     s;
   2442 	RF_Raid_t *raidPtr;
   2443 
   2444 	s = splbio();
   2445 	raidPtr = (RF_Raid_t *) req->raidPtr;
   2446 	raidPtr->recon_in_progress = 1;
   2447 
   2448 	rf_FailDisk((RF_Raid_t *) req->raidPtr, req->row, req->col,
   2449 		    ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
   2450 
   2451 	/* XXX get rid of this! we don't need it at all.. */
   2452 	RF_Free(req, sizeof(*req));
   2453 
   2454 	raidPtr->recon_in_progress = 0;
   2455 	splx(s);
   2456 
   2457 	/* That's all... */
   2458 	kthread_exit(0);        /* does not return */
   2459 }
   2460 
   2461 void
   2462 rf_RewriteParityThread(raidPtr)
   2463 	RF_Raid_t *raidPtr;
   2464 {
   2465 	int retcode;
   2466 	int s;
   2467 
   2468 	raidPtr->parity_rewrite_in_progress = 1;
   2469 	s = splbio();
   2470 	retcode = rf_RewriteParity(raidPtr);
   2471 	splx(s);
   2472 	if (retcode) {
   2473 		printf("raid%d: Error re-writing parity!\n",raidPtr->raidid);
   2474 	} else {
   2475 		/* set the clean bit!  If we shutdown correctly,
   2476 		   the clean bit on each component label will get
   2477 		   set */
   2478 		raidPtr->parity_good = RF_RAID_CLEAN;
   2479 	}
   2480 	raidPtr->parity_rewrite_in_progress = 0;
   2481 
   2482 	/* That's all... */
   2483 	kthread_exit(0);        /* does not return */
   2484 }
   2485 
   2486 
   2487 void
   2488 rf_CopybackThread(raidPtr)
   2489 	RF_Raid_t *raidPtr;
   2490 {
   2491 	int s;
   2492 
   2493 	raidPtr->copyback_in_progress = 1;
   2494 	s = splbio();
   2495 	rf_CopybackReconstructedData(raidPtr);
   2496 	splx(s);
   2497 	raidPtr->copyback_in_progress = 0;
   2498 
   2499 	/* That's all... */
   2500 	kthread_exit(0);        /* does not return */
   2501 }
   2502 
   2503 
   2504 void
   2505 rf_ReconstructInPlaceThread(req)
   2506 	struct rf_recon_req *req;
   2507 {
   2508 	int retcode;
   2509 	int s;
   2510 	RF_Raid_t *raidPtr;
   2511 
   2512 	s = splbio();
   2513 	raidPtr = req->raidPtr;
   2514 	raidPtr->recon_in_progress = 1;
   2515 	retcode = rf_ReconstructInPlace(raidPtr, req->row, req->col);
   2516 	RF_Free(req, sizeof(*req));
   2517 	raidPtr->recon_in_progress = 0;
   2518 	splx(s);
   2519 
   2520 	/* That's all... */
   2521 	kthread_exit(0);        /* does not return */
   2522 }
   2523 
   2524 void
   2525 rf_mountroot_hook(dev)
   2526 	struct device *dev;
   2527 {
   2528 
   2529 }
   2530 
   2531 
   2532 RF_AutoConfig_t *
   2533 rf_find_raid_components()
   2534 {
   2535 	struct devnametobdevmaj *dtobdm;
   2536 	struct vnode *vp;
   2537 	struct disklabel label;
   2538 	struct device *dv;
   2539 	char *cd_name;
   2540 	dev_t dev;
   2541 	int error;
   2542 	int i;
   2543 	int good_one;
   2544 	RF_ComponentLabel_t *clabel;
   2545 	RF_AutoConfig_t *ac_list;
   2546 	RF_AutoConfig_t *ac;
   2547 
   2548 
   2549 	/* initialize the AutoConfig list */
   2550 	ac_list = NULL;
   2551 
   2552 if (raidautoconfig) {
   2553 
   2554 	/* we begin by trolling through *all* the devices on the system */
   2555 
   2556 	for (dv = alldevs.tqh_first; dv != NULL;
   2557 	     dv = dv->dv_list.tqe_next) {
   2558 
   2559 		/* we are only interested in disks... */
   2560 		if (dv->dv_class != DV_DISK)
   2561 			continue;
   2562 
   2563 		/* we don't care about floppies... */
   2564 		if (!strcmp(dv->dv_cfdata->cf_driver->cd_name,"fd")) {
   2565 			continue;
   2566 		}
   2567 
   2568 		/* need to find the device_name_to_block_device_major stuff */
   2569 		cd_name = dv->dv_cfdata->cf_driver->cd_name;
   2570 		dtobdm = dev_name2blk;
   2571 		while (dtobdm->d_name && strcmp(dtobdm->d_name, cd_name)) {
   2572 			dtobdm++;
   2573 		}
   2574 
   2575 		/* get a vnode for the raw partition of this disk */
   2576 
   2577 		dev = MAKEDISKDEV(dtobdm->d_maj, dv->dv_unit, RAW_PART);
   2578 		if (bdevvp(dev, &vp))
   2579 			panic("RAID can't alloc vnode");
   2580 
   2581 		error = VOP_OPEN(vp, FREAD, NOCRED, 0);
   2582 
   2583 		if (error) {
   2584 			/* "Who cares."  Continue looking
   2585 			   for something that exists*/
   2586 			vput(vp);
   2587 			continue;
   2588 		}
   2589 
   2590 		/* Ok, the disk exists.  Go get the disklabel. */
   2591 		error = VOP_IOCTL(vp, DIOCGDINFO, (caddr_t)&label,
   2592 				  FREAD, NOCRED, 0);
   2593 		if (error) {
   2594 			/*
   2595 			 * XXX can't happen - open() would
   2596 			 * have errored out (or faked up one)
   2597 			 */
   2598 			printf("can't get label for dev %s%c (%d)!?!?\n",
   2599 			       dv->dv_xname, 'a' + RAW_PART, error);
   2600 		}
   2601 
   2602 		/* don't need this any more.  We'll allocate it again
   2603 		   a little later if we really do... */
   2604 		VOP_CLOSE(vp, FREAD, NOCRED, 0);
   2605 		vput(vp);
   2606 
   2607 		for (i=0; i < label.d_npartitions; i++) {
   2608 			/* We only support partitions marked as RAID */
   2609 			if (label.d_partitions[i].p_fstype != FS_RAID)
   2610 				continue;
   2611 
   2612 			dev = MAKEDISKDEV(dtobdm->d_maj, dv->dv_unit, i);
   2613 			if (bdevvp(dev, &vp))
   2614 				panic("RAID can't alloc vnode");
   2615 
   2616 			error = VOP_OPEN(vp, FREAD, NOCRED, 0);
   2617 			if (error) {
   2618 				/* Whatever... */
   2619 				vput(vp);
   2620 				continue;
   2621 			}
   2622 
   2623 			good_one = 0;
   2624 
   2625 			clabel = (RF_ComponentLabel_t *)
   2626 				malloc(sizeof(RF_ComponentLabel_t),
   2627 				       M_RAIDFRAME, M_NOWAIT);
   2628 			if (clabel == NULL) {
   2629 				/* XXX CLEANUP HERE */
   2630 				printf("RAID auto config: out of memory!\n");
   2631 				return(NULL); /* XXX probably should panic? */
   2632 			}
   2633 
   2634 			if (!raidread_component_label(dev, vp, clabel)) {
   2635 				/* Got the label.  Does it look reasonable? */
   2636 				if (rf_reasonable_label(clabel) &&
   2637 				    (clabel->partitionSize <=
   2638 				     label.d_partitions[i].p_size)) {
   2639 #if DEBUG
   2640 					printf("Component on: %s%c: %d\n",
   2641 					       dv->dv_xname, 'a'+i,
   2642 					       label.d_partitions[i].p_size);
   2643 					print_component_label(clabel);
   2644 #endif
   2645 					/* if it's reasonable, add it,
   2646 					   else ignore it. */
   2647 					ac = (RF_AutoConfig_t *)
   2648 						malloc(sizeof(RF_AutoConfig_t),
   2649 						       M_RAIDFRAME,
   2650 						       M_NOWAIT);
   2651 					if (ac == NULL) {
   2652 						/* XXX should panic?? */
   2653 						return(NULL);
   2654 					}
   2655 
   2656 					sprintf(ac->devname, "%s%c",
   2657 						dv->dv_xname, 'a'+i);
   2658 					ac->dev = dev;
   2659 					ac->vp = vp;
   2660 					ac->clabel = clabel;
   2661 					ac->next = ac_list;
   2662 					ac_list = ac;
   2663 					good_one = 1;
   2664 				}
   2665 			}
   2666 			if (!good_one) {
   2667 				/* cleanup */
   2668 				free(clabel, M_RAIDFRAME);
   2669 				VOP_CLOSE(vp, FREAD, NOCRED, 0);
   2670 				vput(vp);
   2671 			}
   2672 		}
   2673 	}
   2674 }
   2675 return(ac_list);
   2676 }
   2677 
   2678 static int
   2679 rf_reasonable_label(clabel)
   2680 	RF_ComponentLabel_t *clabel;
   2681 {
   2682 
   2683 	if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) ||
   2684 	     (clabel->version==RF_COMPONENT_LABEL_VERSION)) &&
   2685 	    ((clabel->clean == RF_RAID_CLEAN) ||
   2686 	     (clabel->clean == RF_RAID_DIRTY)) &&
   2687 	    clabel->row >=0 &&
   2688 	    clabel->column >= 0 &&
   2689 	    clabel->num_rows > 0 &&
   2690 	    clabel->num_columns > 0 &&
   2691 	    clabel->row < clabel->num_rows &&
   2692 	    clabel->column < clabel->num_columns &&
   2693 	    clabel->blockSize > 0 &&
   2694 	    clabel->numBlocks > 0) {
   2695 		/* label looks reasonable enough... */
   2696 		return(1);
   2697 	}
   2698 	return(0);
   2699 }
   2700 
   2701 
   2702 void
   2703 print_component_label(clabel)
   2704 	RF_ComponentLabel_t *clabel;
   2705 {
   2706 	printf("   Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
   2707 	       clabel->row, clabel->column,
   2708 	       clabel->num_rows, clabel->num_columns);
   2709 	printf("   Version: %d Serial Number: %d Mod Counter: %d\n",
   2710 	       clabel->version, clabel->serial_number,
   2711 	       clabel->mod_counter);
   2712 	printf("   Clean: %s Status: %d\n",
   2713 	       clabel->clean ? "Yes" : "No", clabel->status );
   2714 	printf("   sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
   2715 	       clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
   2716 	printf("   RAID Level: %c  blocksize: %d numBlocks: %d\n",
   2717 	       (char) clabel->parityConfig, clabel->blockSize,
   2718 	       clabel->numBlocks);
   2719 	printf("   Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No" );
   2720 	printf("   Last configured as: raid%d\n", clabel->last_unit );
   2721 #if 0
   2722 	   printf("   Config order: %d\n", clabel->config_order);
   2723 #endif
   2724 
   2725 }
   2726 
   2727 RF_ConfigSet_t *
   2728 rf_create_auto_sets(ac_list)
   2729 	RF_AutoConfig_t *ac_list;
   2730 {
   2731 	RF_AutoConfig_t *ac;
   2732 	RF_ConfigSet_t *config_sets;
   2733 	RF_ConfigSet_t *cset;
   2734 	RF_AutoConfig_t *ac_next;
   2735 
   2736 
   2737 	config_sets = NULL;
   2738 
   2739 	/* Go through the AutoConfig list, and figure out which components
   2740 	   belong to what sets.  */
   2741 	ac = ac_list;
   2742 	while(ac!=NULL) {
   2743 		/* we're going to putz with ac->next, so save it here
   2744 		   for use at the end of the loop */
   2745 		ac_next = ac->next;
   2746 
   2747 		if (config_sets == NULL) {
   2748 			/* will need at least this one... */
   2749 			config_sets = (RF_ConfigSet_t *)
   2750 				malloc(sizeof(RF_ConfigSet_t),
   2751 				       M_RAIDFRAME, M_NOWAIT);
   2752 			if (config_sets == NULL) {
   2753 				panic("rf_create_auto_sets: No memory!\n");
   2754 			}
   2755 			/* this one is easy :) */
   2756 			config_sets->ac = ac;
   2757 			config_sets->next = NULL;
   2758 			config_sets->rootable = 0;
   2759 			ac->next = NULL;
   2760 		} else {
   2761 			/* which set does this component fit into? */
   2762 			cset = config_sets;
   2763 			while(cset!=NULL) {
   2764 				if (rf_does_it_fit(cset, ac)) {
   2765 					/* looks like it matches */
   2766 					ac->next = cset->ac;
   2767 					cset->ac = ac;
   2768 					break;
   2769 				}
   2770 				cset = cset->next;
   2771 			}
   2772 			if (cset==NULL) {
   2773 				/* didn't find a match above... new set..*/
   2774 				cset = (RF_ConfigSet_t *)
   2775 					malloc(sizeof(RF_ConfigSet_t),
   2776 					       M_RAIDFRAME, M_NOWAIT);
   2777 				if (cset == NULL) {
   2778 					panic("rf_create_auto_sets: No memory!\n");
   2779 				}
   2780 				cset->ac = ac;
   2781 				ac->next = NULL;
   2782 				cset->next = config_sets;
   2783 				cset->rootable = 0;
   2784 				config_sets = cset;
   2785 			}
   2786 		}
   2787 		ac = ac_next;
   2788 	}
   2789 
   2790 
   2791 	return(config_sets);
   2792 }
   2793 
   2794 static int
   2795 rf_does_it_fit(cset, ac)
   2796 	RF_ConfigSet_t *cset;
   2797 	RF_AutoConfig_t *ac;
   2798 {
   2799 	RF_ComponentLabel_t *clabel1, *clabel2;
   2800 
   2801 	/* If this one matches the *first* one in the set, that's good
   2802 	   enough, since the other members of the set would have been
   2803 	   through here too... */
   2804 	/* note that we are not checking partitionSize here..
   2805 
   2806 	   Note that we are also not checking the mod_counters here.
   2807 	   If everything else matches execpt the mod_counter, that's
   2808 	   good enough for this test.  We will deal with the mod_counters
   2809 	   a little later in the autoconfiguration process.
   2810 
   2811 	    (clabel1->mod_counter == clabel2->mod_counter) &&
   2812 
   2813 	*/
   2814 
   2815 	clabel1 = cset->ac->clabel;
   2816 	clabel2 = ac->clabel;
   2817 	if ((clabel1->version == clabel2->version) &&
   2818 	    (clabel1->serial_number == clabel2->serial_number) &&
   2819 	    (clabel1->num_rows == clabel2->num_rows) &&
   2820 	    (clabel1->num_columns == clabel2->num_columns) &&
   2821 	    (clabel1->sectPerSU == clabel2->sectPerSU) &&
   2822 	    (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
   2823 	    (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
   2824 	    (clabel1->parityConfig == clabel2->parityConfig) &&
   2825 	    (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
   2826 	    (clabel1->blockSize == clabel2->blockSize) &&
   2827 	    (clabel1->numBlocks == clabel2->numBlocks) &&
   2828 	    (clabel1->autoconfigure == clabel2->autoconfigure) &&
   2829 	    (clabel1->root_partition == clabel2->root_partition) &&
   2830 	    (clabel1->last_unit == clabel2->last_unit) &&
   2831 	    (clabel1->config_order == clabel2->config_order)) {
   2832 		/* if it get's here, it almost *has* to be a match */
   2833 	} else {
   2834 		/* it's not consistent with somebody in the set..
   2835 		   punt */
   2836 		return(0);
   2837 	}
   2838 	/* all was fine.. it must fit... */
   2839 	return(1);
   2840 }
   2841 
   2842 int
   2843 rf_have_enough_components(cset)
   2844 	RF_ConfigSet_t *cset;
   2845 {
   2846 	RF_AutoConfig_t *ac;
   2847 	RF_AutoConfig_t *auto_config;
   2848 	RF_ComponentLabel_t *clabel;
   2849 	int r,c;
   2850 	int num_rows;
   2851 	int num_cols;
   2852 	int num_missing;
   2853 
   2854 	/* check to see that we have enough 'live' components
   2855 	   of this set.  If so, we can configure it if necessary */
   2856 
   2857 	num_rows = cset->ac->clabel->num_rows;
   2858 	num_cols = cset->ac->clabel->num_columns;
   2859 
   2860 	/* XXX Check for duplicate components!?!?!? */
   2861 
   2862 	num_missing = 0;
   2863 	auto_config = cset->ac;
   2864 
   2865 	for(r=0; r<num_rows; r++) {
   2866 		for(c=0; c<num_cols; c++) {
   2867 			ac = auto_config;
   2868 			while(ac!=NULL) {
   2869 				if (ac->clabel==NULL) {
   2870 					/* big-time bad news. */
   2871 					goto fail;
   2872 				}
   2873 				if ((ac->clabel->row == r) &&
   2874 				    (ac->clabel->column == c)) {
   2875 					/* it's this one... */
   2876 #if DEBUG
   2877 					printf("Found: %s at %d,%d\n",
   2878 					       ac->devname,r,c);
   2879 #endif
   2880 					break;
   2881 				}
   2882 				ac=ac->next;
   2883 			}
   2884 			if (ac==NULL) {
   2885 				/* Didn't find one here! */
   2886 				num_missing++;
   2887 			}
   2888 		}
   2889 	}
   2890 
   2891 	clabel = cset->ac->clabel;
   2892 
   2893 	if (((clabel->parityConfig == '0') && (num_missing > 0)) ||
   2894 	    ((clabel->parityConfig == '1') && (num_missing > 1)) ||
   2895 	    ((clabel->parityConfig == '4') && (num_missing > 1)) ||
   2896 	    ((clabel->parityConfig == '5') && (num_missing > 1))) {
   2897 		/* XXX this needs to be made *much* more general */
   2898 		/* Too many failures */
   2899 		return(0);
   2900 	}
   2901 	/* otherwise, all is well, and we've got enough to take a kick
   2902 	   at autoconfiguring this set */
   2903 	return(1);
   2904 fail:
   2905 	return(0);
   2906 
   2907 }
   2908 
   2909 void
   2910 rf_create_configuration(ac,config,raidPtr)
   2911 	RF_AutoConfig_t *ac;
   2912 	RF_Config_t *config;
   2913 	RF_Raid_t *raidPtr;
   2914 {
   2915 	RF_ComponentLabel_t *clabel;
   2916 
   2917 	clabel = ac->clabel;
   2918 
   2919 	/* 1. Fill in the common stuff */
   2920 	config->numRow = clabel->num_rows;
   2921 	config->numCol = clabel->num_columns;
   2922 	config->numSpare = 0; /* XXX should this be set here? */
   2923 	config->sectPerSU = clabel->sectPerSU;
   2924 	config->SUsPerPU = clabel->SUsPerPU;
   2925 	config->SUsPerRU = clabel->SUsPerRU;
   2926 	config->parityConfig = clabel->parityConfig;
   2927 	/* XXX... */
   2928 	strcpy(config->diskQueueType,"fifo");
   2929 	config->maxOutstandingDiskReqs = clabel->maxOutstanding;
   2930 	config->layoutSpecificSize = 0; /* XXX ?? */
   2931 
   2932 	while(ac!=NULL) {
   2933 		/* row/col values will be in range due to the checks
   2934 		   in reasonable_label() */
   2935 		strcpy(config->devnames[ac->clabel->row][ac->clabel->column],
   2936 		       ac->devname);
   2937 		ac = ac->next;
   2938 	}
   2939 
   2940 }
   2941 
   2942 int
   2943 rf_set_autoconfig(raidPtr, new_value)
   2944 	RF_Raid_t *raidPtr;
   2945 	int new_value;
   2946 {
   2947 	RF_ComponentLabel_t clabel;
   2948 	struct vnode *vp;
   2949 	dev_t dev;
   2950 	int row, column;
   2951 
   2952 	raidPtr->autoconfigure = new_value;
   2953 	for(row=0; row<raidPtr->numRow; row++) {
   2954 		for(column=0; column<raidPtr->numCol; column++) {
   2955 			dev = raidPtr->Disks[row][column].dev;
   2956 			vp = raidPtr->raid_cinfo[row][column].ci_vp;
   2957 			raidread_component_label(dev, vp, &clabel);
   2958 			clabel.autoconfigure = new_value;
   2959 			raidwrite_component_label(dev, vp, &clabel);
   2960 		}
   2961 	}
   2962 	return(new_value);
   2963 }
   2964 
   2965 int
   2966 rf_set_rootpartition(raidPtr, new_value)
   2967 	RF_Raid_t *raidPtr;
   2968 	int new_value;
   2969 {
   2970 	RF_ComponentLabel_t clabel;
   2971 	struct vnode *vp;
   2972 	dev_t dev;
   2973 	int row, column;
   2974 
   2975 	raidPtr->root_partition = new_value;
   2976 	for(row=0; row<raidPtr->numRow; row++) {
   2977 		for(column=0; column<raidPtr->numCol; column++) {
   2978 			dev = raidPtr->Disks[row][column].dev;
   2979 			vp = raidPtr->raid_cinfo[row][column].ci_vp;
   2980 			raidread_component_label(dev, vp, &clabel);
   2981 			clabel.root_partition = new_value;
   2982 			raidwrite_component_label(dev, vp, &clabel);
   2983 		}
   2984 	}
   2985 	return(new_value);
   2986 }
   2987 
   2988 void
   2989 rf_release_all_vps(cset)
   2990 	RF_ConfigSet_t *cset;
   2991 {
   2992 	RF_AutoConfig_t *ac;
   2993 
   2994 	ac = cset->ac;
   2995 	while(ac!=NULL) {
   2996 		/* Close the vp, and give it back */
   2997 		if (ac->vp) {
   2998 			VOP_CLOSE(ac->vp, FREAD, NOCRED, 0);
   2999 			vput(ac->vp);
   3000 		}
   3001 		ac = ac->next;
   3002 	}
   3003 }
   3004 
   3005 
   3006 void
   3007 rf_cleanup_config_set(cset)
   3008 	RF_ConfigSet_t *cset;
   3009 {
   3010 	RF_AutoConfig_t *ac;
   3011 	RF_AutoConfig_t *next_ac;
   3012 
   3013 	ac = cset->ac;
   3014 	while(ac!=NULL) {
   3015 		next_ac = ac->next;
   3016 		/* nuke the label */
   3017 		free(ac->clabel, M_RAIDFRAME);
   3018 		/* cleanup the config structure */
   3019 		free(ac, M_RAIDFRAME);
   3020 		/* "next.." */
   3021 		ac = next_ac;
   3022 	}
   3023 	/* and, finally, nuke the config set */
   3024 	free(cset, M_RAIDFRAME);
   3025 }
   3026 
   3027 
   3028 void
   3029 raid_init_component_label(raidPtr, clabel)
   3030 	RF_Raid_t *raidPtr;
   3031 	RF_ComponentLabel_t *clabel;
   3032 {
   3033 	/* current version number */
   3034 	clabel->version = RF_COMPONENT_LABEL_VERSION;
   3035 	clabel->serial_number = raidPtr->serial_number;
   3036 	clabel->mod_counter = raidPtr->mod_counter;
   3037 	clabel->num_rows = raidPtr->numRow;
   3038 	clabel->num_columns = raidPtr->numCol;
   3039 	clabel->clean = RF_RAID_DIRTY; /* not clean */
   3040 	clabel->status = rf_ds_optimal; /* "It's good!" */
   3041 
   3042 	clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
   3043 	clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
   3044 	clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
   3045 
   3046 	clabel->blockSize = raidPtr->bytesPerSector;
   3047 	clabel->numBlocks = raidPtr->sectorsPerDisk;
   3048 
   3049 	/* XXX not portable */
   3050 	clabel->parityConfig = raidPtr->Layout.map->parityConfig;
   3051 	clabel->maxOutstanding = raidPtr->maxOutstanding;
   3052 	clabel->autoconfigure = raidPtr->autoconfigure;
   3053 	clabel->root_partition = raidPtr->root_partition;
   3054 	clabel->last_unit = raidPtr->raidid;
   3055 	clabel->config_order = raidPtr->config_order;
   3056 }
   3057 
   3058 int
   3059 rf_auto_config_set(cset,unit)
   3060 	RF_ConfigSet_t *cset;
   3061 	int *unit;
   3062 {
   3063 	RF_Raid_t *raidPtr;
   3064 	RF_Config_t *config;
   3065 	int raidID;
   3066 	int retcode;
   3067 
   3068 	printf("Starting autoconfigure on raid%d\n",raidID);
   3069 
   3070 	retcode = 0;
   3071 	*unit = -1;
   3072 
   3073 	/* 1. Create a config structure */
   3074 
   3075 	config = (RF_Config_t *)malloc(sizeof(RF_Config_t),
   3076 				       M_RAIDFRAME,
   3077 				       M_NOWAIT);
   3078 	if (config==NULL) {
   3079 		printf("Out of mem!?!?\n");
   3080 				/* XXX do something more intelligent here. */
   3081 		return(1);
   3082 	}
   3083 	/* XXX raidID needs to be set correctly.. */
   3084 
   3085 	/*
   3086 	   2. Figure out what RAID ID this one is supposed to live at
   3087 	   See if we can get the same RAID dev that it was configured
   3088 	   on last time..
   3089 	*/
   3090 
   3091 	raidID = cset->ac->clabel->last_unit;
   3092 	if ((raidID < 0) || (raidID >= numraid)) {
   3093 		/* let's not wander off into lala land. */
   3094 		raidID = numraid - 1;
   3095 	}
   3096 	if (raidPtrs[raidID]->valid != 0) {
   3097 
   3098 		/*
   3099 		   Nope... Go looking for an alternative...
   3100 		   Start high so we don't immediately use raid0 if that's
   3101 		   not taken.
   3102 		*/
   3103 
   3104 		for(raidID = numraid; raidID >= 0; raidID--) {
   3105 			if (raidPtrs[raidID]->valid == 0) {
   3106 				/* can use this one! */
   3107 				break;
   3108 			}
   3109 		}
   3110 	}
   3111 
   3112 	if (raidID < 0) {
   3113 		/* punt... */
   3114 		printf("Unable to auto configure this set!\n");
   3115 		printf("(Out of RAID devs!)\n");
   3116 		return(1);
   3117 	}
   3118 
   3119 	raidPtr = raidPtrs[raidID];
   3120 
   3121 	/* XXX all this stuff should be done SOMEWHERE ELSE! */
   3122 	raidPtr->raidid = raidID;
   3123 	raidPtr->openings = RAIDOUTSTANDING;
   3124 
   3125 	/* 3. Build the configuration structure */
   3126 	rf_create_configuration(cset->ac, config, raidPtr);
   3127 
   3128 	/* 4. Do the configuration */
   3129 	retcode = rf_Configure(raidPtr, config, cset->ac);
   3130 
   3131 	if (retcode == 0) {
   3132 
   3133 		raidinit(raidPtrs[raidID]);
   3134 
   3135 		rf_markalldirty(raidPtrs[raidID]);
   3136 		raidPtrs[raidID]->autoconfigure = 1; /* XXX do this here? */
   3137 		if (cset->ac->clabel->root_partition==1) {
   3138 			/* everything configured just fine.  Make a note
   3139 			   that this set is eligible to be root. */
   3140 			cset->rootable = 1;
   3141 			/* XXX do this here? */
   3142 			raidPtrs[raidID]->root_partition = 1;
   3143 		}
   3144 	}
   3145 
   3146 	/* 5. Cleanup */
   3147 	free(config, M_RAIDFRAME);
   3148 
   3149 	*unit = raidID;
   3150 	return(retcode);
   3151 }
   3152