Home | History | Annotate | Line # | Download | only in raidframe
rf_netbsdkintf.c revision 1.137
      1 /*	$NetBSD: rf_netbsdkintf.c,v 1.137 2002/09/23 03:17:36 itojun Exp $	*/
      2 /*-
      3  * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
      4  * All rights reserved.
      5  *
      6  * This code is derived from software contributed to The NetBSD Foundation
      7  * by Greg Oster; Jason R. Thorpe.
      8  *
      9  * Redistribution and use in source and binary forms, with or without
     10  * modification, are permitted provided that the following conditions
     11  * are met:
     12  * 1. Redistributions of source code must retain the above copyright
     13  *    notice, this list of conditions and the following disclaimer.
     14  * 2. Redistributions in binary form must reproduce the above copyright
     15  *    notice, this list of conditions and the following disclaimer in the
     16  *    documentation and/or other materials provided with the distribution.
     17  * 3. All advertising materials mentioning features or use of this software
     18  *    must display the following acknowledgement:
     19  *        This product includes software developed by the NetBSD
     20  *        Foundation, Inc. and its contributors.
     21  * 4. Neither the name of The NetBSD Foundation nor the names of its
     22  *    contributors may be used to endorse or promote products derived
     23  *    from this software without specific prior written permission.
     24  *
     25  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     26  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     27  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     28  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     29  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     30  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     31  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     32  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     33  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     34  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     35  * POSSIBILITY OF SUCH DAMAGE.
     36  */
     37 
     38 /*
     39  * Copyright (c) 1988 University of Utah.
     40  * Copyright (c) 1990, 1993
     41  *      The Regents of the University of California.  All rights reserved.
     42  *
     43  * This code is derived from software contributed to Berkeley by
     44  * the Systems Programming Group of the University of Utah Computer
     45  * Science Department.
     46  *
     47  * Redistribution and use in source and binary forms, with or without
     48  * modification, are permitted provided that the following conditions
     49  * are met:
     50  * 1. Redistributions of source code must retain the above copyright
     51  *    notice, this list of conditions and the following disclaimer.
     52  * 2. Redistributions in binary form must reproduce the above copyright
     53  *    notice, this list of conditions and the following disclaimer in the
     54  *    documentation and/or other materials provided with the distribution.
     55  * 3. All advertising materials mentioning features or use of this software
     56  *    must display the following acknowledgement:
     57  *      This product includes software developed by the University of
     58  *      California, Berkeley and its contributors.
     59  * 4. Neither the name of the University nor the names of its contributors
     60  *    may be used to endorse or promote products derived from this software
     61  *    without specific prior written permission.
     62  *
     63  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     64  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     65  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     66  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     67  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     68  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     69  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     70  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     71  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     72  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     73  * SUCH DAMAGE.
     74  *
     75  * from: Utah $Hdr: cd.c 1.6 90/11/28$
     76  *
     77  *      @(#)cd.c        8.2 (Berkeley) 11/16/93
     78  */
     79 
     80 
     81 
     82 
     83 /*
     84  * Copyright (c) 1995 Carnegie-Mellon University.
     85  * All rights reserved.
     86  *
     87  * Authors: Mark Holland, Jim Zelenka
     88  *
     89  * Permission to use, copy, modify and distribute this software and
     90  * its documentation is hereby granted, provided that both the copyright
     91  * notice and this permission notice appear in all copies of the
     92  * software, derivative works or modified versions, and any portions
     93  * thereof, and that both notices appear in supporting documentation.
     94  *
     95  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
     96  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
     97  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
     98  *
     99  * Carnegie Mellon requests users of this software to return to
    100  *
    101  *  Software Distribution Coordinator  or  Software.Distribution (at) CS.CMU.EDU
    102  *  School of Computer Science
    103  *  Carnegie Mellon University
    104  *  Pittsburgh PA 15213-3890
    105  *
    106  * any improvements or extensions that they make and grant Carnegie the
    107  * rights to redistribute these changes.
    108  */
    109 
    110 /***********************************************************
    111  *
    112  * rf_kintf.c -- the kernel interface routines for RAIDframe
    113  *
    114  ***********************************************************/
    115 
    116 #include <sys/cdefs.h>
    117 __KERNEL_RCSID(0, "$NetBSD: rf_netbsdkintf.c,v 1.137 2002/09/23 03:17:36 itojun Exp $");
    118 
    119 #include <sys/param.h>
    120 #include <sys/errno.h>
    121 #include <sys/pool.h>
    122 #include <sys/queue.h>
    123 #include <sys/disk.h>
    124 #include <sys/device.h>
    125 #include <sys/stat.h>
    126 #include <sys/ioctl.h>
    127 #include <sys/fcntl.h>
    128 #include <sys/systm.h>
    129 #include <sys/namei.h>
    130 #include <sys/vnode.h>
    131 #include <sys/disklabel.h>
    132 #include <sys/conf.h>
    133 #include <sys/lock.h>
    134 #include <sys/buf.h>
    135 #include <sys/user.h>
    136 #include <sys/reboot.h>
    137 
    138 #include <dev/raidframe/raidframevar.h>
    139 #include <dev/raidframe/raidframeio.h>
    140 #include "raid.h"
    141 #include "opt_raid_autoconfig.h"
    142 #include "rf_raid.h"
    143 #include "rf_copyback.h"
    144 #include "rf_dag.h"
    145 #include "rf_dagflags.h"
    146 #include "rf_desc.h"
    147 #include "rf_diskqueue.h"
    148 #include "rf_etimer.h"
    149 #include "rf_general.h"
    150 #include "rf_kintf.h"
    151 #include "rf_options.h"
    152 #include "rf_driver.h"
    153 #include "rf_parityscan.h"
    154 #include "rf_threadstuff.h"
    155 
    156 #ifdef DEBUG
    157 int     rf_kdebug_level = 0;
    158 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
    159 #else				/* DEBUG */
    160 #define db1_printf(a) { }
    161 #endif				/* DEBUG */
    162 
    163 static RF_Raid_t **raidPtrs;	/* global raid device descriptors */
    164 
    165 RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex)
    166 
    167 static RF_SparetWait_t *rf_sparet_wait_queue;	/* requests to install a
    168 						 * spare table */
    169 static RF_SparetWait_t *rf_sparet_resp_queue;	/* responses from
    170 						 * installation process */
    171 
    172 /* prototypes */
    173 static void KernelWakeupFunc(struct buf * bp);
    174 static void InitBP(struct buf * bp, struct vnode *, unsigned rw_flag,
    175 		   dev_t dev, RF_SectorNum_t startSect,
    176 		   RF_SectorCount_t numSect, caddr_t buf,
    177 		   void (*cbFunc) (struct buf *), void *cbArg,
    178 		   int logBytesPerSector, struct proc * b_proc);
    179 static void raidinit(RF_Raid_t *);
    180 
    181 void raidattach(int);
    182 
    183 dev_type_open(raidopen);
    184 dev_type_close(raidclose);
    185 dev_type_read(raidread);
    186 dev_type_write(raidwrite);
    187 dev_type_ioctl(raidioctl);
    188 dev_type_strategy(raidstrategy);
    189 dev_type_dump(raiddump);
    190 dev_type_size(raidsize);
    191 
    192 const struct bdevsw raid_bdevsw = {
    193 	raidopen, raidclose, raidstrategy, raidioctl,
    194 	raiddump, raidsize, D_DISK
    195 };
    196 
    197 const struct cdevsw raid_cdevsw = {
    198 	raidopen, raidclose, raidread, raidwrite, raidioctl,
    199 	nostop, notty, nopoll, nommap, D_DISK
    200 };
    201 
    202 /*
    203  * Pilfered from ccd.c
    204  */
    205 
    206 struct raidbuf {
    207 	struct buf rf_buf;	/* new I/O buf.  MUST BE FIRST!!! */
    208 	struct buf *rf_obp;	/* ptr. to original I/O buf */
    209 	RF_DiskQueueData_t *req;/* the request that this was part of.. */
    210 };
    211 
    212 /* component buffer pool */
    213 struct pool raidframe_cbufpool;
    214 
    215 /* XXX Not sure if the following should be replacing the raidPtrs above,
    216    or if it should be used in conjunction with that...
    217 */
    218 
    219 struct raid_softc {
    220 	int     sc_flags;	/* flags */
    221 	int     sc_cflags;	/* configuration flags */
    222 	size_t  sc_size;        /* size of the raid device */
    223 	char    sc_xname[20];	/* XXX external name */
    224 	struct disk sc_dkdev;	/* generic disk device info */
    225 	struct bufq_state buf_queue;	/* used for the device queue */
    226 };
    227 /* sc_flags */
    228 #define RAIDF_INITED	0x01	/* unit has been initialized */
    229 #define RAIDF_WLABEL	0x02	/* label area is writable */
    230 #define RAIDF_LABELLING	0x04	/* unit is currently being labelled */
    231 #define RAIDF_WANTED	0x40	/* someone is waiting to obtain a lock */
    232 #define RAIDF_LOCKED	0x80	/* unit is locked */
    233 
    234 #define	raidunit(x)	DISKUNIT(x)
    235 int numraid = 0;
    236 
    237 /*
    238  * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
    239  * Be aware that large numbers can allow the driver to consume a lot of
    240  * kernel memory, especially on writes, and in degraded mode reads.
    241  *
    242  * For example: with a stripe width of 64 blocks (32k) and 5 disks,
    243  * a single 64K write will typically require 64K for the old data,
    244  * 64K for the old parity, and 64K for the new parity, for a total
    245  * of 192K (if the parity buffer is not re-used immediately).
    246  * Even it if is used immediately, that's still 128K, which when multiplied
    247  * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
    248  *
    249  * Now in degraded mode, for example, a 64K read on the above setup may
    250  * require data reconstruction, which will require *all* of the 4 remaining
    251  * disks to participate -- 4 * 32K/disk == 128K again.
    252  */
    253 
    254 #ifndef RAIDOUTSTANDING
    255 #define RAIDOUTSTANDING   6
    256 #endif
    257 
    258 #define RAIDLABELDEV(dev)	\
    259 	(MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
    260 
    261 /* declared here, and made public, for the benefit of KVM stuff.. */
    262 struct raid_softc *raid_softc;
    263 
    264 static void raidgetdefaultlabel(RF_Raid_t *, struct raid_softc *,
    265 				     struct disklabel *);
    266 static void raidgetdisklabel(dev_t);
    267 static void raidmakedisklabel(struct raid_softc *);
    268 
    269 static int raidlock(struct raid_softc *);
    270 static void raidunlock(struct raid_softc *);
    271 
    272 static void rf_markalldirty(RF_Raid_t *);
    273 
    274 struct device *raidrootdev;
    275 
    276 void rf_ReconThread(struct rf_recon_req *);
    277 /* XXX what I want is: */
    278 /*void rf_ReconThread(RF_Raid_t *raidPtr);  */
    279 void rf_RewriteParityThread(RF_Raid_t *raidPtr);
    280 void rf_CopybackThread(RF_Raid_t *raidPtr);
    281 void rf_ReconstructInPlaceThread(struct rf_recon_req *);
    282 void rf_buildroothack(void *);
    283 
    284 RF_AutoConfig_t *rf_find_raid_components(void);
    285 RF_ConfigSet_t *rf_create_auto_sets(RF_AutoConfig_t *);
    286 static int rf_does_it_fit(RF_ConfigSet_t *,RF_AutoConfig_t *);
    287 static int rf_reasonable_label(RF_ComponentLabel_t *);
    288 void rf_create_configuration(RF_AutoConfig_t *,RF_Config_t *, RF_Raid_t *);
    289 int rf_set_autoconfig(RF_Raid_t *, int);
    290 int rf_set_rootpartition(RF_Raid_t *, int);
    291 void rf_release_all_vps(RF_ConfigSet_t *);
    292 void rf_cleanup_config_set(RF_ConfigSet_t *);
    293 int rf_have_enough_components(RF_ConfigSet_t *);
    294 int rf_auto_config_set(RF_ConfigSet_t *, int *);
    295 
    296 static int raidautoconfig = 0; /* Debugging, mostly.  Set to 0 to not
    297 				  allow autoconfig to take place.
    298 			          Note that this is overridden by having
    299 			          RAID_AUTOCONFIG as an option in the
    300 			          kernel config file.  */
    301 
    302 void
    303 raidattach(num)
    304 	int     num;
    305 {
    306 	int raidID;
    307 	int i, rc;
    308 	RF_AutoConfig_t *ac_list; /* autoconfig list */
    309 	RF_ConfigSet_t *config_sets;
    310 
    311 #ifdef DEBUG
    312 	printf("raidattach: Asked for %d units\n", num);
    313 #endif
    314 
    315 	if (num <= 0) {
    316 #ifdef DIAGNOSTIC
    317 		panic("raidattach: count <= 0");
    318 #endif
    319 		return;
    320 	}
    321 	/* This is where all the initialization stuff gets done. */
    322 
    323 	numraid = num;
    324 
    325 	/* Make some space for requested number of units... */
    326 
    327 	RF_Calloc(raidPtrs, num, sizeof(RF_Raid_t *), (RF_Raid_t **));
    328 	if (raidPtrs == NULL) {
    329 		panic("raidPtrs is NULL!!\n");
    330 	}
    331 
    332 	/* Initialize the component buffer pool. */
    333 	pool_init(&raidframe_cbufpool, sizeof(struct raidbuf), 0,
    334 	    0, 0, "raidpl", NULL);
    335 
    336 	rc = rf_mutex_init(&rf_sparet_wait_mutex);
    337 	if (rc) {
    338 		RF_PANIC();
    339 	}
    340 
    341 	rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
    342 
    343 	for (i = 0; i < num; i++)
    344 		raidPtrs[i] = NULL;
    345 	rc = rf_BootRaidframe();
    346 	if (rc == 0)
    347 		printf("Kernelized RAIDframe activated\n");
    348 	else
    349 		panic("Serious error booting RAID!!\n");
    350 
    351 	/* put together some datastructures like the CCD device does.. This
    352 	 * lets us lock the device and what-not when it gets opened. */
    353 
    354 	raid_softc = (struct raid_softc *)
    355 		malloc(num * sizeof(struct raid_softc),
    356 		       M_RAIDFRAME, M_NOWAIT);
    357 	if (raid_softc == NULL) {
    358 		printf("WARNING: no memory for RAIDframe driver\n");
    359 		return;
    360 	}
    361 
    362 	memset(raid_softc, 0, num * sizeof(struct raid_softc));
    363 
    364 	raidrootdev = (struct device *)malloc(num * sizeof(struct device),
    365 					      M_RAIDFRAME, M_NOWAIT);
    366 	if (raidrootdev == NULL) {
    367 		panic("No memory for RAIDframe driver!!?!?!\n");
    368 	}
    369 
    370 	for (raidID = 0; raidID < num; raidID++) {
    371 		bufq_alloc(&raid_softc[raidID].buf_queue, BUFQ_FCFS);
    372 
    373 		raidrootdev[raidID].dv_class  = DV_DISK;
    374 		raidrootdev[raidID].dv_cfdata = NULL;
    375 		raidrootdev[raidID].dv_unit   = raidID;
    376 		raidrootdev[raidID].dv_parent = NULL;
    377 		raidrootdev[raidID].dv_flags  = 0;
    378 		sprintf(raidrootdev[raidID].dv_xname,"raid%d",raidID);
    379 
    380 		RF_Calloc(raidPtrs[raidID], 1, sizeof(RF_Raid_t),
    381 			  (RF_Raid_t *));
    382 		if (raidPtrs[raidID] == NULL) {
    383 			printf("WARNING: raidPtrs[%d] is NULL\n", raidID);
    384 			numraid = raidID;
    385 			return;
    386 		}
    387 	}
    388 
    389 #ifdef RAID_AUTOCONFIG
    390 	raidautoconfig = 1;
    391 #endif
    392 
    393 if (raidautoconfig) {
    394 	/* 1. locate all RAID components on the system */
    395 
    396 #if DEBUG
    397 	printf("Searching for raid components...\n");
    398 #endif
    399 	ac_list = rf_find_raid_components();
    400 
    401 	/* 2. sort them into their respective sets */
    402 
    403 	config_sets = rf_create_auto_sets(ac_list);
    404 
    405 	/* 3. evaluate each set and configure the valid ones
    406 	   This gets done in rf_buildroothack() */
    407 
    408 	/* schedule the creation of the thread to do the
    409 	   "/ on RAID" stuff */
    410 
    411 	kthread_create(rf_buildroothack,config_sets);
    412 
    413 }
    414 
    415 }
    416 
    417 void
    418 rf_buildroothack(arg)
    419 	void *arg;
    420 {
    421 	RF_ConfigSet_t *config_sets = arg;
    422 	RF_ConfigSet_t *cset;
    423 	RF_ConfigSet_t *next_cset;
    424 	int retcode;
    425 	int raidID;
    426 	int rootID;
    427 	int num_root;
    428 
    429 	rootID = 0;
    430 	num_root = 0;
    431 	cset = config_sets;
    432 	while(cset != NULL ) {
    433 		next_cset = cset->next;
    434 		if (rf_have_enough_components(cset) &&
    435 		    cset->ac->clabel->autoconfigure==1) {
    436 			retcode = rf_auto_config_set(cset,&raidID);
    437 			if (!retcode) {
    438 				if (cset->rootable) {
    439 					rootID = raidID;
    440 					num_root++;
    441 				}
    442 			} else {
    443 				/* The autoconfig didn't work :( */
    444 #if DEBUG
    445 				printf("Autoconfig failed with code %d for raid%d\n", retcode, raidID);
    446 #endif
    447 				rf_release_all_vps(cset);
    448 			}
    449 		} else {
    450 			/* we're not autoconfiguring this set...
    451 			   release the associated resources */
    452 			rf_release_all_vps(cset);
    453 		}
    454 		/* cleanup */
    455 		rf_cleanup_config_set(cset);
    456 		cset = next_cset;
    457 	}
    458 
    459 	/* we found something bootable... */
    460 
    461 	if (num_root == 1) {
    462 		booted_device = &raidrootdev[rootID];
    463 	} else if (num_root > 1) {
    464 		/* we can't guess.. require the user to answer... */
    465 		boothowto |= RB_ASKNAME;
    466 	}
    467 }
    468 
    469 
    470 int
    471 raidsize(dev)
    472 	dev_t   dev;
    473 {
    474 	struct raid_softc *rs;
    475 	struct disklabel *lp;
    476 	int     part, unit, omask, size;
    477 
    478 	unit = raidunit(dev);
    479 	if (unit >= numraid)
    480 		return (-1);
    481 	rs = &raid_softc[unit];
    482 
    483 	if ((rs->sc_flags & RAIDF_INITED) == 0)
    484 		return (-1);
    485 
    486 	part = DISKPART(dev);
    487 	omask = rs->sc_dkdev.dk_openmask & (1 << part);
    488 	lp = rs->sc_dkdev.dk_label;
    489 
    490 	if (omask == 0 && raidopen(dev, 0, S_IFBLK, curproc))
    491 		return (-1);
    492 
    493 	if (lp->d_partitions[part].p_fstype != FS_SWAP)
    494 		size = -1;
    495 	else
    496 		size = lp->d_partitions[part].p_size *
    497 		    (lp->d_secsize / DEV_BSIZE);
    498 
    499 	if (omask == 0 && raidclose(dev, 0, S_IFBLK, curproc))
    500 		return (-1);
    501 
    502 	return (size);
    503 
    504 }
    505 
    506 int
    507 raiddump(dev, blkno, va, size)
    508 	dev_t   dev;
    509 	daddr_t blkno;
    510 	caddr_t va;
    511 	size_t  size;
    512 {
    513 	/* Not implemented. */
    514 	return ENXIO;
    515 }
    516 /* ARGSUSED */
    517 int
    518 raidopen(dev, flags, fmt, p)
    519 	dev_t   dev;
    520 	int     flags, fmt;
    521 	struct proc *p;
    522 {
    523 	int     unit = raidunit(dev);
    524 	struct raid_softc *rs;
    525 	struct disklabel *lp;
    526 	int     part, pmask;
    527 	int     error = 0;
    528 
    529 	if (unit >= numraid)
    530 		return (ENXIO);
    531 	rs = &raid_softc[unit];
    532 
    533 	if ((error = raidlock(rs)) != 0)
    534 		return (error);
    535 	lp = rs->sc_dkdev.dk_label;
    536 
    537 	part = DISKPART(dev);
    538 	pmask = (1 << part);
    539 
    540 	db1_printf(("Opening raid device number: %d partition: %d\n",
    541 		unit, part));
    542 
    543 
    544 	if ((rs->sc_flags & RAIDF_INITED) &&
    545 	    (rs->sc_dkdev.dk_openmask == 0))
    546 		raidgetdisklabel(dev);
    547 
    548 	/* make sure that this partition exists */
    549 
    550 	if (part != RAW_PART) {
    551 		db1_printf(("Not a raw partition..\n"));
    552 		if (((rs->sc_flags & RAIDF_INITED) == 0) ||
    553 		    ((part >= lp->d_npartitions) ||
    554 			(lp->d_partitions[part].p_fstype == FS_UNUSED))) {
    555 			error = ENXIO;
    556 			raidunlock(rs);
    557 			db1_printf(("Bailing out...\n"));
    558 			return (error);
    559 		}
    560 	}
    561 	/* Prevent this unit from being unconfigured while open. */
    562 	switch (fmt) {
    563 	case S_IFCHR:
    564 		rs->sc_dkdev.dk_copenmask |= pmask;
    565 		break;
    566 
    567 	case S_IFBLK:
    568 		rs->sc_dkdev.dk_bopenmask |= pmask;
    569 		break;
    570 	}
    571 
    572 	if ((rs->sc_dkdev.dk_openmask == 0) &&
    573 	    ((rs->sc_flags & RAIDF_INITED) != 0)) {
    574 		/* First one... mark things as dirty... Note that we *MUST*
    575 		 have done a configure before this.  I DO NOT WANT TO BE
    576 		 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
    577 		 THAT THEY BELONG TOGETHER!!!!! */
    578 		/* XXX should check to see if we're only open for reading
    579 		   here... If so, we needn't do this, but then need some
    580 		   other way of keeping track of what's happened.. */
    581 
    582 		rf_markalldirty( raidPtrs[unit] );
    583 	}
    584 
    585 
    586 	rs->sc_dkdev.dk_openmask =
    587 	    rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
    588 
    589 	raidunlock(rs);
    590 
    591 	return (error);
    592 
    593 
    594 }
    595 /* ARGSUSED */
    596 int
    597 raidclose(dev, flags, fmt, p)
    598 	dev_t   dev;
    599 	int     flags, fmt;
    600 	struct proc *p;
    601 {
    602 	int     unit = raidunit(dev);
    603 	struct raid_softc *rs;
    604 	int     error = 0;
    605 	int     part;
    606 
    607 	if (unit >= numraid)
    608 		return (ENXIO);
    609 	rs = &raid_softc[unit];
    610 
    611 	if ((error = raidlock(rs)) != 0)
    612 		return (error);
    613 
    614 	part = DISKPART(dev);
    615 
    616 	/* ...that much closer to allowing unconfiguration... */
    617 	switch (fmt) {
    618 	case S_IFCHR:
    619 		rs->sc_dkdev.dk_copenmask &= ~(1 << part);
    620 		break;
    621 
    622 	case S_IFBLK:
    623 		rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
    624 		break;
    625 	}
    626 	rs->sc_dkdev.dk_openmask =
    627 	    rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
    628 
    629 	if ((rs->sc_dkdev.dk_openmask == 0) &&
    630 	    ((rs->sc_flags & RAIDF_INITED) != 0)) {
    631 		/* Last one... device is not unconfigured yet.
    632 		   Device shutdown has taken care of setting the
    633 		   clean bits if RAIDF_INITED is not set
    634 		   mark things as clean... */
    635 #if 0
    636 		printf("Last one on raid%d.  Updating status.\n",unit);
    637 #endif
    638 		rf_update_component_labels(raidPtrs[unit],
    639 						 RF_FINAL_COMPONENT_UPDATE);
    640 		if (doing_shutdown) {
    641 			/* last one, and we're going down, so
    642 			   lights out for this RAID set too. */
    643 			error = rf_Shutdown(raidPtrs[unit]);
    644 
    645 			/* It's no longer initialized... */
    646 			rs->sc_flags &= ~RAIDF_INITED;
    647 
    648 			/* Detach the disk. */
    649 			disk_detach(&rs->sc_dkdev);
    650 		}
    651 	}
    652 
    653 	raidunlock(rs);
    654 	return (0);
    655 
    656 }
    657 
    658 void
    659 raidstrategy(bp)
    660 	struct buf *bp;
    661 {
    662 	int s;
    663 
    664 	unsigned int raidID = raidunit(bp->b_dev);
    665 	RF_Raid_t *raidPtr;
    666 	struct raid_softc *rs = &raid_softc[raidID];
    667 	struct disklabel *lp;
    668 	int     wlabel;
    669 
    670 	if ((rs->sc_flags & RAIDF_INITED) ==0) {
    671 		bp->b_error = ENXIO;
    672 		bp->b_flags |= B_ERROR;
    673 		bp->b_resid = bp->b_bcount;
    674 		biodone(bp);
    675 		return;
    676 	}
    677 	if (raidID >= numraid || !raidPtrs[raidID]) {
    678 		bp->b_error = ENODEV;
    679 		bp->b_flags |= B_ERROR;
    680 		bp->b_resid = bp->b_bcount;
    681 		biodone(bp);
    682 		return;
    683 	}
    684 	raidPtr = raidPtrs[raidID];
    685 	if (!raidPtr->valid) {
    686 		bp->b_error = ENODEV;
    687 		bp->b_flags |= B_ERROR;
    688 		bp->b_resid = bp->b_bcount;
    689 		biodone(bp);
    690 		return;
    691 	}
    692 	if (bp->b_bcount == 0) {
    693 		db1_printf(("b_bcount is zero..\n"));
    694 		biodone(bp);
    695 		return;
    696 	}
    697 	lp = rs->sc_dkdev.dk_label;
    698 
    699 	/*
    700 	 * Do bounds checking and adjust transfer.  If there's an
    701 	 * error, the bounds check will flag that for us.
    702 	 */
    703 
    704 	wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
    705 	if (DISKPART(bp->b_dev) != RAW_PART)
    706 		if (bounds_check_with_label(bp, lp, wlabel) <= 0) {
    707 			db1_printf(("Bounds check failed!!:%d %d\n",
    708 				(int) bp->b_blkno, (int) wlabel));
    709 			biodone(bp);
    710 			return;
    711 		}
    712 	s = splbio();
    713 
    714 	bp->b_resid = 0;
    715 
    716 	/* stuff it onto our queue */
    717 	BUFQ_PUT(&rs->buf_queue, bp);
    718 
    719 	raidstart(raidPtrs[raidID]);
    720 
    721 	splx(s);
    722 }
    723 /* ARGSUSED */
    724 int
    725 raidread(dev, uio, flags)
    726 	dev_t   dev;
    727 	struct uio *uio;
    728 	int     flags;
    729 {
    730 	int     unit = raidunit(dev);
    731 	struct raid_softc *rs;
    732 	int     part;
    733 
    734 	if (unit >= numraid)
    735 		return (ENXIO);
    736 	rs = &raid_softc[unit];
    737 
    738 	if ((rs->sc_flags & RAIDF_INITED) == 0)
    739 		return (ENXIO);
    740 	part = DISKPART(dev);
    741 
    742 	db1_printf(("raidread: unit: %d partition: %d\n", unit, part));
    743 
    744 	return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
    745 
    746 }
    747 /* ARGSUSED */
    748 int
    749 raidwrite(dev, uio, flags)
    750 	dev_t   dev;
    751 	struct uio *uio;
    752 	int     flags;
    753 {
    754 	int     unit = raidunit(dev);
    755 	struct raid_softc *rs;
    756 
    757 	if (unit >= numraid)
    758 		return (ENXIO);
    759 	rs = &raid_softc[unit];
    760 
    761 	if ((rs->sc_flags & RAIDF_INITED) == 0)
    762 		return (ENXIO);
    763 	db1_printf(("raidwrite\n"));
    764 	return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
    765 
    766 }
    767 
    768 int
    769 raidioctl(dev, cmd, data, flag, p)
    770 	dev_t   dev;
    771 	u_long  cmd;
    772 	caddr_t data;
    773 	int     flag;
    774 	struct proc *p;
    775 {
    776 	int     unit = raidunit(dev);
    777 	int     error = 0;
    778 	int     part, pmask;
    779 	struct raid_softc *rs;
    780 	RF_Config_t *k_cfg, *u_cfg;
    781 	RF_Raid_t *raidPtr;
    782 	RF_RaidDisk_t *diskPtr;
    783 	RF_AccTotals_t *totals;
    784 	RF_DeviceConfig_t *d_cfg, **ucfgp;
    785 	u_char *specific_buf;
    786 	int retcode = 0;
    787 	int row;
    788 	int column;
    789 	int raidid;
    790 	struct rf_recon_req *rrcopy, *rr;
    791 	RF_ComponentLabel_t *clabel;
    792 	RF_ComponentLabel_t ci_label;
    793 	RF_ComponentLabel_t **clabel_ptr;
    794 	RF_SingleComponent_t *sparePtr,*componentPtr;
    795 	RF_SingleComponent_t hot_spare;
    796 	RF_SingleComponent_t component;
    797 	RF_ProgressInfo_t progressInfo, **progressInfoPtr;
    798 	int i, j, d;
    799 #ifdef __HAVE_OLD_DISKLABEL
    800 	struct disklabel newlabel;
    801 #endif
    802 
    803 	if (unit >= numraid)
    804 		return (ENXIO);
    805 	rs = &raid_softc[unit];
    806 	raidPtr = raidPtrs[unit];
    807 
    808 	db1_printf(("raidioctl: %d %d %d %d\n", (int) dev,
    809 		(int) DISKPART(dev), (int) unit, (int) cmd));
    810 
    811 	/* Must be open for writes for these commands... */
    812 	switch (cmd) {
    813 	case DIOCSDINFO:
    814 	case DIOCWDINFO:
    815 #ifdef __HAVE_OLD_DISKLABEL
    816 	case ODIOCWDINFO:
    817 	case ODIOCSDINFO:
    818 #endif
    819 	case DIOCWLABEL:
    820 		if ((flag & FWRITE) == 0)
    821 			return (EBADF);
    822 	}
    823 
    824 	/* Must be initialized for these... */
    825 	switch (cmd) {
    826 	case DIOCGDINFO:
    827 	case DIOCSDINFO:
    828 	case DIOCWDINFO:
    829 #ifdef __HAVE_OLD_DISKLABEL
    830 	case ODIOCGDINFO:
    831 	case ODIOCWDINFO:
    832 	case ODIOCSDINFO:
    833 	case ODIOCGDEFLABEL:
    834 #endif
    835 	case DIOCGPART:
    836 	case DIOCWLABEL:
    837 	case DIOCGDEFLABEL:
    838 	case RAIDFRAME_SHUTDOWN:
    839 	case RAIDFRAME_REWRITEPARITY:
    840 	case RAIDFRAME_GET_INFO:
    841 	case RAIDFRAME_RESET_ACCTOTALS:
    842 	case RAIDFRAME_GET_ACCTOTALS:
    843 	case RAIDFRAME_KEEP_ACCTOTALS:
    844 	case RAIDFRAME_GET_SIZE:
    845 	case RAIDFRAME_FAIL_DISK:
    846 	case RAIDFRAME_COPYBACK:
    847 	case RAIDFRAME_CHECK_RECON_STATUS:
    848 	case RAIDFRAME_CHECK_RECON_STATUS_EXT:
    849 	case RAIDFRAME_GET_COMPONENT_LABEL:
    850 	case RAIDFRAME_SET_COMPONENT_LABEL:
    851 	case RAIDFRAME_ADD_HOT_SPARE:
    852 	case RAIDFRAME_REMOVE_HOT_SPARE:
    853 	case RAIDFRAME_INIT_LABELS:
    854 	case RAIDFRAME_REBUILD_IN_PLACE:
    855 	case RAIDFRAME_CHECK_PARITY:
    856 	case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
    857 	case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
    858 	case RAIDFRAME_CHECK_COPYBACK_STATUS:
    859 	case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
    860 	case RAIDFRAME_SET_AUTOCONFIG:
    861 	case RAIDFRAME_SET_ROOT:
    862 	case RAIDFRAME_DELETE_COMPONENT:
    863 	case RAIDFRAME_INCORPORATE_HOT_SPARE:
    864 		if ((rs->sc_flags & RAIDF_INITED) == 0)
    865 			return (ENXIO);
    866 	}
    867 
    868 	switch (cmd) {
    869 
    870 		/* configure the system */
    871 	case RAIDFRAME_CONFIGURE:
    872 
    873 		if (raidPtr->valid) {
    874 			/* There is a valid RAID set running on this unit! */
    875 			printf("raid%d: Device already configured!\n",unit);
    876 			return(EINVAL);
    877 		}
    878 
    879 		/* copy-in the configuration information */
    880 		/* data points to a pointer to the configuration structure */
    881 
    882 		u_cfg = *((RF_Config_t **) data);
    883 		RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
    884 		if (k_cfg == NULL) {
    885 			return (ENOMEM);
    886 		}
    887 		retcode = copyin((caddr_t) u_cfg, (caddr_t) k_cfg,
    888 		    sizeof(RF_Config_t));
    889 		if (retcode) {
    890 			RF_Free(k_cfg, sizeof(RF_Config_t));
    891 			db1_printf(("rf_ioctl: retcode=%d copyin.1\n",
    892 				retcode));
    893 			return (retcode);
    894 		}
    895 		/* allocate a buffer for the layout-specific data, and copy it
    896 		 * in */
    897 		if (k_cfg->layoutSpecificSize) {
    898 			if (k_cfg->layoutSpecificSize > 10000) {
    899 				/* sanity check */
    900 				RF_Free(k_cfg, sizeof(RF_Config_t));
    901 				return (EINVAL);
    902 			}
    903 			RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
    904 			    (u_char *));
    905 			if (specific_buf == NULL) {
    906 				RF_Free(k_cfg, sizeof(RF_Config_t));
    907 				return (ENOMEM);
    908 			}
    909 			retcode = copyin(k_cfg->layoutSpecific,
    910 			    (caddr_t) specific_buf,
    911 			    k_cfg->layoutSpecificSize);
    912 			if (retcode) {
    913 				RF_Free(k_cfg, sizeof(RF_Config_t));
    914 				RF_Free(specific_buf,
    915 					k_cfg->layoutSpecificSize);
    916 				db1_printf(("rf_ioctl: retcode=%d copyin.2\n",
    917 					retcode));
    918 				return (retcode);
    919 			}
    920 		} else
    921 			specific_buf = NULL;
    922 		k_cfg->layoutSpecific = specific_buf;
    923 
    924 		/* should do some kind of sanity check on the configuration.
    925 		 * Store the sum of all the bytes in the last byte? */
    926 
    927 		/* configure the system */
    928 
    929 		/*
    930 		 * Clear the entire RAID descriptor, just to make sure
    931 		 *  there is no stale data left in the case of a
    932 		 *  reconfiguration
    933 		 */
    934 		memset((char *) raidPtr, 0, sizeof(RF_Raid_t));
    935 		raidPtr->raidid = unit;
    936 
    937 		retcode = rf_Configure(raidPtr, k_cfg, NULL);
    938 
    939 		if (retcode == 0) {
    940 
    941 			/* allow this many simultaneous IO's to
    942 			   this RAID device */
    943 			raidPtr->openings = RAIDOUTSTANDING;
    944 
    945 			raidinit(raidPtr);
    946 			rf_markalldirty(raidPtr);
    947 		}
    948 		/* free the buffers.  No return code here. */
    949 		if (k_cfg->layoutSpecificSize) {
    950 			RF_Free(specific_buf, k_cfg->layoutSpecificSize);
    951 		}
    952 		RF_Free(k_cfg, sizeof(RF_Config_t));
    953 
    954 		return (retcode);
    955 
    956 		/* shutdown the system */
    957 	case RAIDFRAME_SHUTDOWN:
    958 
    959 		if ((error = raidlock(rs)) != 0)
    960 			return (error);
    961 
    962 		/*
    963 		 * If somebody has a partition mounted, we shouldn't
    964 		 * shutdown.
    965 		 */
    966 
    967 		part = DISKPART(dev);
    968 		pmask = (1 << part);
    969 		if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
    970 		    ((rs->sc_dkdev.dk_bopenmask & pmask) &&
    971 			(rs->sc_dkdev.dk_copenmask & pmask))) {
    972 			raidunlock(rs);
    973 			return (EBUSY);
    974 		}
    975 
    976 		retcode = rf_Shutdown(raidPtr);
    977 
    978 		/* It's no longer initialized... */
    979 		rs->sc_flags &= ~RAIDF_INITED;
    980 
    981 		/* Detach the disk. */
    982 		disk_detach(&rs->sc_dkdev);
    983 
    984 		raidunlock(rs);
    985 
    986 		return (retcode);
    987 	case RAIDFRAME_GET_COMPONENT_LABEL:
    988 		clabel_ptr = (RF_ComponentLabel_t **) data;
    989 		/* need to read the component label for the disk indicated
    990 		   by row,column in clabel */
    991 
    992 		/* For practice, let's get it directly fromdisk, rather
    993 		   than from the in-core copy */
    994 		RF_Malloc( clabel, sizeof( RF_ComponentLabel_t ),
    995 			   (RF_ComponentLabel_t *));
    996 		if (clabel == NULL)
    997 			return (ENOMEM);
    998 
    999 		memset((char *) clabel, 0, sizeof(RF_ComponentLabel_t));
   1000 
   1001 		retcode = copyin( *clabel_ptr, clabel,
   1002 				  sizeof(RF_ComponentLabel_t));
   1003 
   1004 		if (retcode) {
   1005 			RF_Free( clabel, sizeof(RF_ComponentLabel_t));
   1006 			return(retcode);
   1007 		}
   1008 
   1009 		row = clabel->row;
   1010 		column = clabel->column;
   1011 
   1012 		if ((row < 0) || (row >= raidPtr->numRow) ||
   1013 		    (column < 0) || (column >= raidPtr->numCol +
   1014 				     raidPtr->numSpare)) {
   1015 			RF_Free( clabel, sizeof(RF_ComponentLabel_t));
   1016 			return(EINVAL);
   1017 		}
   1018 
   1019 		raidread_component_label(raidPtr->Disks[row][column].dev,
   1020 				raidPtr->raid_cinfo[row][column].ci_vp,
   1021 				clabel );
   1022 
   1023 		retcode = copyout((caddr_t) clabel,
   1024 				  (caddr_t) *clabel_ptr,
   1025 				  sizeof(RF_ComponentLabel_t));
   1026 		RF_Free( clabel, sizeof(RF_ComponentLabel_t));
   1027 		return (retcode);
   1028 
   1029 	case RAIDFRAME_SET_COMPONENT_LABEL:
   1030 		clabel = (RF_ComponentLabel_t *) data;
   1031 
   1032 		/* XXX check the label for valid stuff... */
   1033 		/* Note that some things *should not* get modified --
   1034 		   the user should be re-initing the labels instead of
   1035 		   trying to patch things.
   1036 		   */
   1037 
   1038 		raidid = raidPtr->raidid;
   1039 		printf("raid%d: Got component label:\n", raidid);
   1040 		printf("raid%d: Version: %d\n", raidid, clabel->version);
   1041 		printf("raid%d: Serial Number: %d\n", raidid, clabel->serial_number);
   1042 		printf("raid%d: Mod counter: %d\n", raidid, clabel->mod_counter);
   1043 		printf("raid%d: Row: %d\n", raidid, clabel->row);
   1044 		printf("raid%d: Column: %d\n", raidid, clabel->column);
   1045 		printf("raid%d: Num Rows: %d\n", raidid, clabel->num_rows);
   1046 		printf("raid%d: Num Columns: %d\n", raidid, clabel->num_columns);
   1047 		printf("raid%d: Clean: %d\n", raidid, clabel->clean);
   1048 		printf("raid%d: Status: %d\n", raidid, clabel->status);
   1049 
   1050 		row = clabel->row;
   1051 		column = clabel->column;
   1052 
   1053 		if ((row < 0) || (row >= raidPtr->numRow) ||
   1054 		    (column < 0) || (column >= raidPtr->numCol)) {
   1055 			return(EINVAL);
   1056 		}
   1057 
   1058 		/* XXX this isn't allowed to do anything for now :-) */
   1059 
   1060 		/* XXX and before it is, we need to fill in the rest
   1061 		   of the fields!?!?!?! */
   1062 #if 0
   1063 		raidwrite_component_label(
   1064                             raidPtr->Disks[row][column].dev,
   1065 			    raidPtr->raid_cinfo[row][column].ci_vp,
   1066 			    clabel );
   1067 #endif
   1068 		return (0);
   1069 
   1070 	case RAIDFRAME_INIT_LABELS:
   1071 		clabel = (RF_ComponentLabel_t *) data;
   1072 		/*
   1073 		   we only want the serial number from
   1074 		   the above.  We get all the rest of the information
   1075 		   from the config that was used to create this RAID
   1076 		   set.
   1077 		   */
   1078 
   1079 		raidPtr->serial_number = clabel->serial_number;
   1080 
   1081 		raid_init_component_label(raidPtr, &ci_label);
   1082 		ci_label.serial_number = clabel->serial_number;
   1083 
   1084 		for(row=0;row<raidPtr->numRow;row++) {
   1085 			ci_label.row = row;
   1086 			for(column=0;column<raidPtr->numCol;column++) {
   1087 				diskPtr = &raidPtr->Disks[row][column];
   1088 				if (!RF_DEAD_DISK(diskPtr->status)) {
   1089 					ci_label.partitionSize = diskPtr->partitionSize;
   1090 					ci_label.column = column;
   1091 					raidwrite_component_label(
   1092 					  raidPtr->Disks[row][column].dev,
   1093 					  raidPtr->raid_cinfo[row][column].ci_vp,
   1094 					  &ci_label );
   1095 				}
   1096 			}
   1097 		}
   1098 
   1099 		return (retcode);
   1100 	case RAIDFRAME_SET_AUTOCONFIG:
   1101 		d = rf_set_autoconfig(raidPtr, *(int *) data);
   1102 		printf("raid%d: New autoconfig value is: %d\n",
   1103 		       raidPtr->raidid, d);
   1104 		*(int *) data = d;
   1105 		return (retcode);
   1106 
   1107 	case RAIDFRAME_SET_ROOT:
   1108 		d = rf_set_rootpartition(raidPtr, *(int *) data);
   1109 		printf("raid%d: New rootpartition value is: %d\n",
   1110 		       raidPtr->raidid, d);
   1111 		*(int *) data = d;
   1112 		return (retcode);
   1113 
   1114 		/* initialize all parity */
   1115 	case RAIDFRAME_REWRITEPARITY:
   1116 
   1117 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1118 			/* Parity for RAID 0 is trivially correct */
   1119 			raidPtr->parity_good = RF_RAID_CLEAN;
   1120 			return(0);
   1121 		}
   1122 
   1123 		if (raidPtr->parity_rewrite_in_progress == 1) {
   1124 			/* Re-write is already in progress! */
   1125 			return(EINVAL);
   1126 		}
   1127 
   1128 		retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
   1129 					   rf_RewriteParityThread,
   1130 					   raidPtr,"raid_parity");
   1131 		return (retcode);
   1132 
   1133 
   1134 	case RAIDFRAME_ADD_HOT_SPARE:
   1135 		sparePtr = (RF_SingleComponent_t *) data;
   1136 		memcpy( &hot_spare, sparePtr, sizeof(RF_SingleComponent_t));
   1137 		retcode = rf_add_hot_spare(raidPtr, &hot_spare);
   1138 		return(retcode);
   1139 
   1140 	case RAIDFRAME_REMOVE_HOT_SPARE:
   1141 		return(retcode);
   1142 
   1143 	case RAIDFRAME_DELETE_COMPONENT:
   1144 		componentPtr = (RF_SingleComponent_t *)data;
   1145 		memcpy( &component, componentPtr,
   1146 			sizeof(RF_SingleComponent_t));
   1147 		retcode = rf_delete_component(raidPtr, &component);
   1148 		return(retcode);
   1149 
   1150 	case RAIDFRAME_INCORPORATE_HOT_SPARE:
   1151 		componentPtr = (RF_SingleComponent_t *)data;
   1152 		memcpy( &component, componentPtr,
   1153 			sizeof(RF_SingleComponent_t));
   1154 		retcode = rf_incorporate_hot_spare(raidPtr, &component);
   1155 		return(retcode);
   1156 
   1157 	case RAIDFRAME_REBUILD_IN_PLACE:
   1158 
   1159 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1160 			/* Can't do this on a RAID 0!! */
   1161 			return(EINVAL);
   1162 		}
   1163 
   1164 		if (raidPtr->recon_in_progress == 1) {
   1165 			/* a reconstruct is already in progress! */
   1166 			return(EINVAL);
   1167 		}
   1168 
   1169 		componentPtr = (RF_SingleComponent_t *) data;
   1170 		memcpy( &component, componentPtr,
   1171 			sizeof(RF_SingleComponent_t));
   1172 		row = component.row;
   1173 		column = component.column;
   1174 		printf("raid%d: Rebuild: %d %d\n", raidPtr->raidid,
   1175 		       row, column);
   1176 		if ((row < 0) || (row >= raidPtr->numRow) ||
   1177 		    (column < 0) || (column >= raidPtr->numCol)) {
   1178 			return(EINVAL);
   1179 		}
   1180 
   1181 		RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
   1182 		if (rrcopy == NULL)
   1183 			return(ENOMEM);
   1184 
   1185 		rrcopy->raidPtr = (void *) raidPtr;
   1186 		rrcopy->row = row;
   1187 		rrcopy->col = column;
   1188 
   1189 		retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
   1190 					   rf_ReconstructInPlaceThread,
   1191 					   rrcopy,"raid_reconip");
   1192 		return(retcode);
   1193 
   1194 	case RAIDFRAME_GET_INFO:
   1195 		if (!raidPtr->valid)
   1196 			return (ENODEV);
   1197 		ucfgp = (RF_DeviceConfig_t **) data;
   1198 		RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t),
   1199 			  (RF_DeviceConfig_t *));
   1200 		if (d_cfg == NULL)
   1201 			return (ENOMEM);
   1202 		memset((char *) d_cfg, 0, sizeof(RF_DeviceConfig_t));
   1203 		d_cfg->rows = raidPtr->numRow;
   1204 		d_cfg->cols = raidPtr->numCol;
   1205 		d_cfg->ndevs = raidPtr->numRow * raidPtr->numCol;
   1206 		if (d_cfg->ndevs >= RF_MAX_DISKS) {
   1207 			RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
   1208 			return (ENOMEM);
   1209 		}
   1210 		d_cfg->nspares = raidPtr->numSpare;
   1211 		if (d_cfg->nspares >= RF_MAX_DISKS) {
   1212 			RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
   1213 			return (ENOMEM);
   1214 		}
   1215 		d_cfg->maxqdepth = raidPtr->maxQueueDepth;
   1216 		d = 0;
   1217 		for (i = 0; i < d_cfg->rows; i++) {
   1218 			for (j = 0; j < d_cfg->cols; j++) {
   1219 				d_cfg->devs[d] = raidPtr->Disks[i][j];
   1220 				d++;
   1221 			}
   1222 		}
   1223 		for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) {
   1224 			d_cfg->spares[i] = raidPtr->Disks[0][j];
   1225 		}
   1226 		retcode = copyout((caddr_t) d_cfg, (caddr_t) * ucfgp,
   1227 				  sizeof(RF_DeviceConfig_t));
   1228 		RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
   1229 
   1230 		return (retcode);
   1231 
   1232 	case RAIDFRAME_CHECK_PARITY:
   1233 		*(int *) data = raidPtr->parity_good;
   1234 		return (0);
   1235 
   1236 	case RAIDFRAME_RESET_ACCTOTALS:
   1237 		memset(&raidPtr->acc_totals, 0, sizeof(raidPtr->acc_totals));
   1238 		return (0);
   1239 
   1240 	case RAIDFRAME_GET_ACCTOTALS:
   1241 		totals = (RF_AccTotals_t *) data;
   1242 		*totals = raidPtr->acc_totals;
   1243 		return (0);
   1244 
   1245 	case RAIDFRAME_KEEP_ACCTOTALS:
   1246 		raidPtr->keep_acc_totals = *(int *)data;
   1247 		return (0);
   1248 
   1249 	case RAIDFRAME_GET_SIZE:
   1250 		*(int *) data = raidPtr->totalSectors;
   1251 		return (0);
   1252 
   1253 		/* fail a disk & optionally start reconstruction */
   1254 	case RAIDFRAME_FAIL_DISK:
   1255 
   1256 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1257 			/* Can't do this on a RAID 0!! */
   1258 			return(EINVAL);
   1259 		}
   1260 
   1261 		rr = (struct rf_recon_req *) data;
   1262 
   1263 		if (rr->row < 0 || rr->row >= raidPtr->numRow
   1264 		    || rr->col < 0 || rr->col >= raidPtr->numCol)
   1265 			return (EINVAL);
   1266 
   1267 		printf("raid%d: Failing the disk: row: %d col: %d\n",
   1268 		       unit, rr->row, rr->col);
   1269 
   1270 		/* make a copy of the recon request so that we don't rely on
   1271 		 * the user's buffer */
   1272 		RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
   1273 		if (rrcopy == NULL)
   1274 			return(ENOMEM);
   1275 		memcpy(rrcopy, rr, sizeof(*rr));
   1276 		rrcopy->raidPtr = (void *) raidPtr;
   1277 
   1278 		retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
   1279 					   rf_ReconThread,
   1280 					   rrcopy,"raid_recon");
   1281 		return (0);
   1282 
   1283 		/* invoke a copyback operation after recon on whatever disk
   1284 		 * needs it, if any */
   1285 	case RAIDFRAME_COPYBACK:
   1286 
   1287 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1288 			/* This makes no sense on a RAID 0!! */
   1289 			return(EINVAL);
   1290 		}
   1291 
   1292 		if (raidPtr->copyback_in_progress == 1) {
   1293 			/* Copyback is already in progress! */
   1294 			return(EINVAL);
   1295 		}
   1296 
   1297 		retcode = RF_CREATE_THREAD(raidPtr->copyback_thread,
   1298 					   rf_CopybackThread,
   1299 					   raidPtr,"raid_copyback");
   1300 		return (retcode);
   1301 
   1302 		/* return the percentage completion of reconstruction */
   1303 	case RAIDFRAME_CHECK_RECON_STATUS:
   1304 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1305 			/* This makes no sense on a RAID 0, so tell the
   1306 			   user it's done. */
   1307 			*(int *) data = 100;
   1308 			return(0);
   1309 		}
   1310 		row = 0; /* XXX we only consider a single row... */
   1311 		if (raidPtr->status[row] != rf_rs_reconstructing)
   1312 			*(int *) data = 100;
   1313 		else
   1314 			*(int *) data = raidPtr->reconControl[row]->percentComplete;
   1315 		return (0);
   1316 	case RAIDFRAME_CHECK_RECON_STATUS_EXT:
   1317 		progressInfoPtr = (RF_ProgressInfo_t **) data;
   1318 		row = 0; /* XXX we only consider a single row... */
   1319 		if (raidPtr->status[row] != rf_rs_reconstructing) {
   1320 			progressInfo.remaining = 0;
   1321 			progressInfo.completed = 100;
   1322 			progressInfo.total = 100;
   1323 		} else {
   1324 			progressInfo.total =
   1325 				raidPtr->reconControl[row]->numRUsTotal;
   1326 			progressInfo.completed =
   1327 				raidPtr->reconControl[row]->numRUsComplete;
   1328 			progressInfo.remaining = progressInfo.total -
   1329 				progressInfo.completed;
   1330 		}
   1331 		retcode = copyout((caddr_t) &progressInfo,
   1332 				  (caddr_t) *progressInfoPtr,
   1333 				  sizeof(RF_ProgressInfo_t));
   1334 		return (retcode);
   1335 
   1336 	case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
   1337 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1338 			/* This makes no sense on a RAID 0, so tell the
   1339 			   user it's done. */
   1340 			*(int *) data = 100;
   1341 			return(0);
   1342 		}
   1343 		if (raidPtr->parity_rewrite_in_progress == 1) {
   1344 			*(int *) data = 100 *
   1345 				raidPtr->parity_rewrite_stripes_done /
   1346 				raidPtr->Layout.numStripe;
   1347 		} else {
   1348 			*(int *) data = 100;
   1349 		}
   1350 		return (0);
   1351 
   1352 	case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
   1353 		progressInfoPtr = (RF_ProgressInfo_t **) data;
   1354 		if (raidPtr->parity_rewrite_in_progress == 1) {
   1355 			progressInfo.total = raidPtr->Layout.numStripe;
   1356 			progressInfo.completed =
   1357 				raidPtr->parity_rewrite_stripes_done;
   1358 			progressInfo.remaining = progressInfo.total -
   1359 				progressInfo.completed;
   1360 		} else {
   1361 			progressInfo.remaining = 0;
   1362 			progressInfo.completed = 100;
   1363 			progressInfo.total = 100;
   1364 		}
   1365 		retcode = copyout((caddr_t) &progressInfo,
   1366 				  (caddr_t) *progressInfoPtr,
   1367 				  sizeof(RF_ProgressInfo_t));
   1368 		return (retcode);
   1369 
   1370 	case RAIDFRAME_CHECK_COPYBACK_STATUS:
   1371 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1372 			/* This makes no sense on a RAID 0 */
   1373 			*(int *) data = 100;
   1374 			return(0);
   1375 		}
   1376 		if (raidPtr->copyback_in_progress == 1) {
   1377 			*(int *) data = 100 * raidPtr->copyback_stripes_done /
   1378 				raidPtr->Layout.numStripe;
   1379 		} else {
   1380 			*(int *) data = 100;
   1381 		}
   1382 		return (0);
   1383 
   1384 	case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
   1385 		progressInfoPtr = (RF_ProgressInfo_t **) data;
   1386 		if (raidPtr->copyback_in_progress == 1) {
   1387 			progressInfo.total = raidPtr->Layout.numStripe;
   1388 			progressInfo.completed =
   1389 				raidPtr->copyback_stripes_done;
   1390 			progressInfo.remaining = progressInfo.total -
   1391 				progressInfo.completed;
   1392 		} else {
   1393 			progressInfo.remaining = 0;
   1394 			progressInfo.completed = 100;
   1395 			progressInfo.total = 100;
   1396 		}
   1397 		retcode = copyout((caddr_t) &progressInfo,
   1398 				  (caddr_t) *progressInfoPtr,
   1399 				  sizeof(RF_ProgressInfo_t));
   1400 		return (retcode);
   1401 
   1402 		/* the sparetable daemon calls this to wait for the kernel to
   1403 		 * need a spare table. this ioctl does not return until a
   1404 		 * spare table is needed. XXX -- calling mpsleep here in the
   1405 		 * ioctl code is almost certainly wrong and evil. -- XXX XXX
   1406 		 * -- I should either compute the spare table in the kernel,
   1407 		 * or have a different -- XXX XXX -- interface (a different
   1408 		 * character device) for delivering the table     -- XXX */
   1409 #if 0
   1410 	case RAIDFRAME_SPARET_WAIT:
   1411 		RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1412 		while (!rf_sparet_wait_queue)
   1413 			mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE);
   1414 		waitreq = rf_sparet_wait_queue;
   1415 		rf_sparet_wait_queue = rf_sparet_wait_queue->next;
   1416 		RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1417 
   1418 		/* structure assignment */
   1419 		*((RF_SparetWait_t *) data) = *waitreq;
   1420 
   1421 		RF_Free(waitreq, sizeof(*waitreq));
   1422 		return (0);
   1423 
   1424 		/* wakes up a process waiting on SPARET_WAIT and puts an error
   1425 		 * code in it that will cause the dameon to exit */
   1426 	case RAIDFRAME_ABORT_SPARET_WAIT:
   1427 		RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
   1428 		waitreq->fcol = -1;
   1429 		RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1430 		waitreq->next = rf_sparet_wait_queue;
   1431 		rf_sparet_wait_queue = waitreq;
   1432 		RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1433 		wakeup(&rf_sparet_wait_queue);
   1434 		return (0);
   1435 
   1436 		/* used by the spare table daemon to deliver a spare table
   1437 		 * into the kernel */
   1438 	case RAIDFRAME_SEND_SPARET:
   1439 
   1440 		/* install the spare table */
   1441 		retcode = rf_SetSpareTable(raidPtr, *(void **) data);
   1442 
   1443 		/* respond to the requestor.  the return status of the spare
   1444 		 * table installation is passed in the "fcol" field */
   1445 		RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
   1446 		waitreq->fcol = retcode;
   1447 		RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1448 		waitreq->next = rf_sparet_resp_queue;
   1449 		rf_sparet_resp_queue = waitreq;
   1450 		wakeup(&rf_sparet_resp_queue);
   1451 		RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1452 
   1453 		return (retcode);
   1454 #endif
   1455 
   1456 	default:
   1457 		break; /* fall through to the os-specific code below */
   1458 
   1459 	}
   1460 
   1461 	if (!raidPtr->valid)
   1462 		return (EINVAL);
   1463 
   1464 	/*
   1465 	 * Add support for "regular" device ioctls here.
   1466 	 */
   1467 
   1468 	switch (cmd) {
   1469 	case DIOCGDINFO:
   1470 		*(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
   1471 		break;
   1472 #ifdef __HAVE_OLD_DISKLABEL
   1473 	case ODIOCGDINFO:
   1474 		newlabel = *(rs->sc_dkdev.dk_label);
   1475 		if (newlabel.d_npartitions > OLDMAXPARTITIONS)
   1476 			return ENOTTY;
   1477 		memcpy(data, &newlabel, sizeof (struct olddisklabel));
   1478 		break;
   1479 #endif
   1480 
   1481 	case DIOCGPART:
   1482 		((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
   1483 		((struct partinfo *) data)->part =
   1484 		    &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
   1485 		break;
   1486 
   1487 	case DIOCWDINFO:
   1488 	case DIOCSDINFO:
   1489 #ifdef __HAVE_OLD_DISKLABEL
   1490 	case ODIOCWDINFO:
   1491 	case ODIOCSDINFO:
   1492 #endif
   1493 	{
   1494 		struct disklabel *lp;
   1495 #ifdef __HAVE_OLD_DISKLABEL
   1496 		if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) {
   1497 			memset(&newlabel, 0, sizeof newlabel);
   1498 			memcpy(&newlabel, data, sizeof (struct olddisklabel));
   1499 			lp = &newlabel;
   1500 		} else
   1501 #endif
   1502 		lp = (struct disklabel *)data;
   1503 
   1504 		if ((error = raidlock(rs)) != 0)
   1505 			return (error);
   1506 
   1507 		rs->sc_flags |= RAIDF_LABELLING;
   1508 
   1509 		error = setdisklabel(rs->sc_dkdev.dk_label,
   1510 		    lp, 0, rs->sc_dkdev.dk_cpulabel);
   1511 		if (error == 0) {
   1512 			if (cmd == DIOCWDINFO
   1513 #ifdef __HAVE_OLD_DISKLABEL
   1514 			    || cmd == ODIOCWDINFO
   1515 #endif
   1516 			   )
   1517 				error = writedisklabel(RAIDLABELDEV(dev),
   1518 				    raidstrategy, rs->sc_dkdev.dk_label,
   1519 				    rs->sc_dkdev.dk_cpulabel);
   1520 		}
   1521 		rs->sc_flags &= ~RAIDF_LABELLING;
   1522 
   1523 		raidunlock(rs);
   1524 
   1525 		if (error)
   1526 			return (error);
   1527 		break;
   1528 	}
   1529 
   1530 	case DIOCWLABEL:
   1531 		if (*(int *) data != 0)
   1532 			rs->sc_flags |= RAIDF_WLABEL;
   1533 		else
   1534 			rs->sc_flags &= ~RAIDF_WLABEL;
   1535 		break;
   1536 
   1537 	case DIOCGDEFLABEL:
   1538 		raidgetdefaultlabel(raidPtr, rs, (struct disklabel *) data);
   1539 		break;
   1540 
   1541 #ifdef __HAVE_OLD_DISKLABEL
   1542 	case ODIOCGDEFLABEL:
   1543 		raidgetdefaultlabel(raidPtr, rs, &newlabel);
   1544 		if (newlabel.d_npartitions > OLDMAXPARTITIONS)
   1545 			return ENOTTY;
   1546 		memcpy(data, &newlabel, sizeof (struct olddisklabel));
   1547 		break;
   1548 #endif
   1549 
   1550 	default:
   1551 		retcode = ENOTTY;
   1552 	}
   1553 	return (retcode);
   1554 
   1555 }
   1556 
   1557 
   1558 /* raidinit -- complete the rest of the initialization for the
   1559    RAIDframe device.  */
   1560 
   1561 
   1562 static void
   1563 raidinit(raidPtr)
   1564 	RF_Raid_t *raidPtr;
   1565 {
   1566 	struct raid_softc *rs;
   1567 	int     unit;
   1568 
   1569 	unit = raidPtr->raidid;
   1570 
   1571 	rs = &raid_softc[unit];
   1572 
   1573 	/* XXX should check return code first... */
   1574 	rs->sc_flags |= RAIDF_INITED;
   1575 
   1576 	sprintf(rs->sc_xname, "raid%d", unit);	/* XXX doesn't check bounds. */
   1577 
   1578 	rs->sc_dkdev.dk_name = rs->sc_xname;
   1579 
   1580 	/* disk_attach actually creates space for the CPU disklabel, among
   1581 	 * other things, so it's critical to call this *BEFORE* we try putzing
   1582 	 * with disklabels. */
   1583 
   1584 	disk_attach(&rs->sc_dkdev);
   1585 
   1586 	/* XXX There may be a weird interaction here between this, and
   1587 	 * protectedSectors, as used in RAIDframe.  */
   1588 
   1589 	rs->sc_size = raidPtr->totalSectors;
   1590 
   1591 }
   1592 
   1593 /* wake up the daemon & tell it to get us a spare table
   1594  * XXX
   1595  * the entries in the queues should be tagged with the raidPtr
   1596  * so that in the extremely rare case that two recons happen at once,
   1597  * we know for which device were requesting a spare table
   1598  * XXX
   1599  *
   1600  * XXX This code is not currently used. GO
   1601  */
   1602 int
   1603 rf_GetSpareTableFromDaemon(req)
   1604 	RF_SparetWait_t *req;
   1605 {
   1606 	int     retcode;
   1607 
   1608 	RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1609 	req->next = rf_sparet_wait_queue;
   1610 	rf_sparet_wait_queue = req;
   1611 	wakeup(&rf_sparet_wait_queue);
   1612 
   1613 	/* mpsleep unlocks the mutex */
   1614 	while (!rf_sparet_resp_queue) {
   1615 		tsleep(&rf_sparet_resp_queue, PRIBIO,
   1616 		    "raidframe getsparetable", 0);
   1617 	}
   1618 	req = rf_sparet_resp_queue;
   1619 	rf_sparet_resp_queue = req->next;
   1620 	RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1621 
   1622 	retcode = req->fcol;
   1623 	RF_Free(req, sizeof(*req));	/* this is not the same req as we
   1624 					 * alloc'd */
   1625 	return (retcode);
   1626 }
   1627 
   1628 /* a wrapper around rf_DoAccess that extracts appropriate info from the
   1629  * bp & passes it down.
   1630  * any calls originating in the kernel must use non-blocking I/O
   1631  * do some extra sanity checking to return "appropriate" error values for
   1632  * certain conditions (to make some standard utilities work)
   1633  *
   1634  * Formerly known as: rf_DoAccessKernel
   1635  */
   1636 void
   1637 raidstart(raidPtr)
   1638 	RF_Raid_t *raidPtr;
   1639 {
   1640 	RF_SectorCount_t num_blocks, pb, sum;
   1641 	RF_RaidAddr_t raid_addr;
   1642 	int     retcode;
   1643 	struct partition *pp;
   1644 	daddr_t blocknum;
   1645 	int     unit;
   1646 	struct raid_softc *rs;
   1647 	int     do_async;
   1648 	struct buf *bp;
   1649 
   1650 	unit = raidPtr->raidid;
   1651 	rs = &raid_softc[unit];
   1652 
   1653 	/* quick check to see if anything has died recently */
   1654 	RF_LOCK_MUTEX(raidPtr->mutex);
   1655 	if (raidPtr->numNewFailures > 0) {
   1656 		rf_update_component_labels(raidPtr,
   1657 					   RF_NORMAL_COMPONENT_UPDATE);
   1658 		raidPtr->numNewFailures--;
   1659 	}
   1660 
   1661 	/* Check to see if we're at the limit... */
   1662 	while (raidPtr->openings > 0) {
   1663 		RF_UNLOCK_MUTEX(raidPtr->mutex);
   1664 
   1665 		/* get the next item, if any, from the queue */
   1666 		if ((bp = BUFQ_GET(&rs->buf_queue)) == NULL) {
   1667 			/* nothing more to do */
   1668 			return;
   1669 		}
   1670 
   1671 		/* Ok, for the bp we have here, bp->b_blkno is relative to the
   1672 		 * partition.. Need to make it absolute to the underlying
   1673 		 * device.. */
   1674 
   1675 		blocknum = bp->b_blkno;
   1676 		if (DISKPART(bp->b_dev) != RAW_PART) {
   1677 			pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
   1678 			blocknum += pp->p_offset;
   1679 		}
   1680 
   1681 		db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
   1682 			    (int) blocknum));
   1683 
   1684 		db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
   1685 		db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
   1686 
   1687 		/* *THIS* is where we adjust what block we're going to...
   1688 		 * but DO NOT TOUCH bp->b_blkno!!! */
   1689 		raid_addr = blocknum;
   1690 
   1691 		num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
   1692 		pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
   1693 		sum = raid_addr + num_blocks + pb;
   1694 		if (1 || rf_debugKernelAccess) {
   1695 			db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
   1696 				    (int) raid_addr, (int) sum, (int) num_blocks,
   1697 				    (int) pb, (int) bp->b_resid));
   1698 		}
   1699 		if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
   1700 		    || (sum < num_blocks) || (sum < pb)) {
   1701 			bp->b_error = ENOSPC;
   1702 			bp->b_flags |= B_ERROR;
   1703 			bp->b_resid = bp->b_bcount;
   1704 			biodone(bp);
   1705 			RF_LOCK_MUTEX(raidPtr->mutex);
   1706 			continue;
   1707 		}
   1708 		/*
   1709 		 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
   1710 		 */
   1711 
   1712 		if (bp->b_bcount & raidPtr->sectorMask) {
   1713 			bp->b_error = EINVAL;
   1714 			bp->b_flags |= B_ERROR;
   1715 			bp->b_resid = bp->b_bcount;
   1716 			biodone(bp);
   1717 			RF_LOCK_MUTEX(raidPtr->mutex);
   1718 			continue;
   1719 
   1720 		}
   1721 		db1_printf(("Calling DoAccess..\n"));
   1722 
   1723 
   1724 		RF_LOCK_MUTEX(raidPtr->mutex);
   1725 		raidPtr->openings--;
   1726 		RF_UNLOCK_MUTEX(raidPtr->mutex);
   1727 
   1728 		/*
   1729 		 * Everything is async.
   1730 		 */
   1731 		do_async = 1;
   1732 
   1733 		disk_busy(&rs->sc_dkdev);
   1734 
   1735 		/* XXX we're still at splbio() here... do we *really*
   1736 		   need to be? */
   1737 
   1738 		/* don't ever condition on bp->b_flags & B_WRITE.
   1739 		 * always condition on B_READ instead */
   1740 
   1741 		retcode = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
   1742 				      RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
   1743 				      do_async, raid_addr, num_blocks,
   1744 				      bp->b_data, bp, RF_DAG_NONBLOCKING_IO);
   1745 
   1746 		RF_LOCK_MUTEX(raidPtr->mutex);
   1747 	}
   1748 	RF_UNLOCK_MUTEX(raidPtr->mutex);
   1749 }
   1750 
   1751 
   1752 
   1753 
   1754 /* invoke an I/O from kernel mode.  Disk queue should be locked upon entry */
   1755 
   1756 int
   1757 rf_DispatchKernelIO(queue, req)
   1758 	RF_DiskQueue_t *queue;
   1759 	RF_DiskQueueData_t *req;
   1760 {
   1761 	int     op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
   1762 	struct buf *bp;
   1763 	struct raidbuf *raidbp = NULL;
   1764 
   1765 	req->queue = queue;
   1766 
   1767 #if DIAGNOSTIC
   1768 	if (queue->raidPtr->raidid >= numraid) {
   1769 		printf("Invalid unit number: %d %d\n", queue->raidPtr->raidid,
   1770 		    numraid);
   1771 		panic("Invalid Unit number in rf_DispatchKernelIO\n");
   1772 	}
   1773 #endif
   1774 
   1775 	bp = req->bp;
   1776 #if 1
   1777 	/* XXX when there is a physical disk failure, someone is passing us a
   1778 	 * buffer that contains old stuff!!  Attempt to deal with this problem
   1779 	 * without taking a performance hit... (not sure where the real bug
   1780 	 * is.  It's buried in RAIDframe somewhere) :-(  GO ) */
   1781 
   1782 	if (bp->b_flags & B_ERROR) {
   1783 		bp->b_flags &= ~B_ERROR;
   1784 	}
   1785 	if (bp->b_error != 0) {
   1786 		bp->b_error = 0;
   1787 	}
   1788 #endif
   1789 	raidbp = pool_get(&raidframe_cbufpool, PR_NOWAIT);
   1790 
   1791 	/*
   1792 	 * context for raidiodone
   1793 	 */
   1794 	raidbp->rf_obp = bp;
   1795 	raidbp->req = req;
   1796 
   1797 	LIST_INIT(&raidbp->rf_buf.b_dep);
   1798 
   1799 	switch (req->type) {
   1800 	case RF_IO_TYPE_NOP:	/* used primarily to unlock a locked queue */
   1801 		/* XXX need to do something extra here.. */
   1802 		/* I'm leaving this in, as I've never actually seen it used,
   1803 		 * and I'd like folks to report it... GO */
   1804 		printf(("WAKEUP CALLED\n"));
   1805 		queue->numOutstanding++;
   1806 
   1807 		/* XXX need to glue the original buffer into this??  */
   1808 
   1809 		KernelWakeupFunc(&raidbp->rf_buf);
   1810 		break;
   1811 
   1812 	case RF_IO_TYPE_READ:
   1813 	case RF_IO_TYPE_WRITE:
   1814 
   1815 		if (req->tracerec) {
   1816 			RF_ETIMER_START(req->tracerec->timer);
   1817 		}
   1818 		InitBP(&raidbp->rf_buf, queue->rf_cinfo->ci_vp,
   1819 		    op | bp->b_flags, queue->rf_cinfo->ci_dev,
   1820 		    req->sectorOffset, req->numSector,
   1821 		    req->buf, KernelWakeupFunc, (void *) req,
   1822 		    queue->raidPtr->logBytesPerSector, req->b_proc);
   1823 
   1824 		if (rf_debugKernelAccess) {
   1825 			db1_printf(("dispatch: bp->b_blkno = %ld\n",
   1826 				(long) bp->b_blkno));
   1827 		}
   1828 		queue->numOutstanding++;
   1829 		queue->last_deq_sector = req->sectorOffset;
   1830 		/* acc wouldn't have been let in if there were any pending
   1831 		 * reqs at any other priority */
   1832 		queue->curPriority = req->priority;
   1833 
   1834 		db1_printf(("Going for %c to unit %d row %d col %d\n",
   1835 			    req->type, queue->raidPtr->raidid,
   1836 			    queue->row, queue->col));
   1837 		db1_printf(("sector %d count %d (%d bytes) %d\n",
   1838 			(int) req->sectorOffset, (int) req->numSector,
   1839 			(int) (req->numSector <<
   1840 			    queue->raidPtr->logBytesPerSector),
   1841 			(int) queue->raidPtr->logBytesPerSector));
   1842 		if ((raidbp->rf_buf.b_flags & B_READ) == 0) {
   1843 			raidbp->rf_buf.b_vp->v_numoutput++;
   1844 		}
   1845 		VOP_STRATEGY(&raidbp->rf_buf);
   1846 
   1847 		break;
   1848 
   1849 	default:
   1850 		panic("bad req->type in rf_DispatchKernelIO");
   1851 	}
   1852 	db1_printf(("Exiting from DispatchKernelIO\n"));
   1853 
   1854 	return (0);
   1855 }
   1856 /* this is the callback function associated with a I/O invoked from
   1857    kernel code.
   1858  */
   1859 static void
   1860 KernelWakeupFunc(vbp)
   1861 	struct buf *vbp;
   1862 {
   1863 	RF_DiskQueueData_t *req = NULL;
   1864 	RF_DiskQueue_t *queue;
   1865 	struct raidbuf *raidbp = (struct raidbuf *) vbp;
   1866 	struct buf *bp;
   1867 	int s;
   1868 
   1869 	s = splbio();
   1870 	db1_printf(("recovering the request queue:\n"));
   1871 	req = raidbp->req;
   1872 
   1873 	bp = raidbp->rf_obp;
   1874 
   1875 	queue = (RF_DiskQueue_t *) req->queue;
   1876 
   1877 	if (raidbp->rf_buf.b_flags & B_ERROR) {
   1878 		bp->b_flags |= B_ERROR;
   1879 		bp->b_error = raidbp->rf_buf.b_error ?
   1880 		    raidbp->rf_buf.b_error : EIO;
   1881 	}
   1882 
   1883 	/* XXX methinks this could be wrong... */
   1884 #if 1
   1885 	bp->b_resid = raidbp->rf_buf.b_resid;
   1886 #endif
   1887 
   1888 	if (req->tracerec) {
   1889 		RF_ETIMER_STOP(req->tracerec->timer);
   1890 		RF_ETIMER_EVAL(req->tracerec->timer);
   1891 		RF_LOCK_MUTEX(rf_tracing_mutex);
   1892 		req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
   1893 		req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
   1894 		req->tracerec->num_phys_ios++;
   1895 		RF_UNLOCK_MUTEX(rf_tracing_mutex);
   1896 	}
   1897 	bp->b_bcount = raidbp->rf_buf.b_bcount;	/* XXXX ?? */
   1898 
   1899 	/* XXX Ok, let's get aggressive... If B_ERROR is set, let's go
   1900 	 * ballistic, and mark the component as hosed... */
   1901 
   1902 	if (bp->b_flags & B_ERROR) {
   1903 		/* Mark the disk as dead */
   1904 		/* but only mark it once... */
   1905 		if (queue->raidPtr->Disks[queue->row][queue->col].status ==
   1906 		    rf_ds_optimal) {
   1907 			printf("raid%d: IO Error.  Marking %s as failed.\n",
   1908 			       queue->raidPtr->raidid,
   1909 			       queue->raidPtr->Disks[queue->row][queue->col].devname);
   1910 			queue->raidPtr->Disks[queue->row][queue->col].status =
   1911 			    rf_ds_failed;
   1912 			queue->raidPtr->status[queue->row] = rf_rs_degraded;
   1913 			queue->raidPtr->numFailures++;
   1914 			queue->raidPtr->numNewFailures++;
   1915 		} else {	/* Disk is already dead... */
   1916 			/* printf("Disk already marked as dead!\n"); */
   1917 		}
   1918 
   1919 	}
   1920 
   1921 	pool_put(&raidframe_cbufpool, raidbp);
   1922 
   1923 	rf_DiskIOComplete(queue, req, (bp->b_flags & B_ERROR) ? 1 : 0);
   1924 	(req->CompleteFunc) (req->argument, (bp->b_flags & B_ERROR) ? 1 : 0);
   1925 
   1926 	splx(s);
   1927 }
   1928 
   1929 
   1930 
   1931 /*
   1932  * initialize a buf structure for doing an I/O in the kernel.
   1933  */
   1934 static void
   1935 InitBP(bp, b_vp, rw_flag, dev, startSect, numSect, buf, cbFunc, cbArg,
   1936        logBytesPerSector, b_proc)
   1937 	struct buf *bp;
   1938 	struct vnode *b_vp;
   1939 	unsigned rw_flag;
   1940 	dev_t dev;
   1941 	RF_SectorNum_t startSect;
   1942 	RF_SectorCount_t numSect;
   1943 	caddr_t buf;
   1944 	void (*cbFunc) (struct buf *);
   1945 	void *cbArg;
   1946 	int logBytesPerSector;
   1947 	struct proc *b_proc;
   1948 {
   1949 	/* bp->b_flags       = B_PHYS | rw_flag; */
   1950 	bp->b_flags = B_CALL | rw_flag;	/* XXX need B_PHYS here too??? */
   1951 	bp->b_bcount = numSect << logBytesPerSector;
   1952 	bp->b_bufsize = bp->b_bcount;
   1953 	bp->b_error = 0;
   1954 	bp->b_dev = dev;
   1955 	bp->b_data = buf;
   1956 	bp->b_blkno = startSect;
   1957 	bp->b_resid = bp->b_bcount;	/* XXX is this right!??!?!! */
   1958 	if (bp->b_bcount == 0) {
   1959 		panic("bp->b_bcount is zero in InitBP!!\n");
   1960 	}
   1961 	bp->b_proc = b_proc;
   1962 	bp->b_iodone = cbFunc;
   1963 	bp->b_vp = b_vp;
   1964 
   1965 }
   1966 
   1967 static void
   1968 raidgetdefaultlabel(raidPtr, rs, lp)
   1969 	RF_Raid_t *raidPtr;
   1970 	struct raid_softc *rs;
   1971 	struct disklabel *lp;
   1972 {
   1973 	db1_printf(("Building a default label...\n"));
   1974 	memset(lp, 0, sizeof(*lp));
   1975 
   1976 	/* fabricate a label... */
   1977 	lp->d_secperunit = raidPtr->totalSectors;
   1978 	lp->d_secsize = raidPtr->bytesPerSector;
   1979 	lp->d_nsectors = raidPtr->Layout.dataSectorsPerStripe;
   1980 	lp->d_ntracks = 4 * raidPtr->numCol;
   1981 	lp->d_ncylinders = raidPtr->totalSectors /
   1982 		(lp->d_nsectors * lp->d_ntracks);
   1983 	lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
   1984 
   1985 	strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
   1986 	lp->d_type = DTYPE_RAID;
   1987 	strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
   1988 	lp->d_rpm = 3600;
   1989 	lp->d_interleave = 1;
   1990 	lp->d_flags = 0;
   1991 
   1992 	lp->d_partitions[RAW_PART].p_offset = 0;
   1993 	lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors;
   1994 	lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
   1995 	lp->d_npartitions = RAW_PART + 1;
   1996 
   1997 	lp->d_magic = DISKMAGIC;
   1998 	lp->d_magic2 = DISKMAGIC;
   1999 	lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
   2000 
   2001 }
   2002 /*
   2003  * Read the disklabel from the raid device.  If one is not present, fake one
   2004  * up.
   2005  */
   2006 static void
   2007 raidgetdisklabel(dev)
   2008 	dev_t   dev;
   2009 {
   2010 	int     unit = raidunit(dev);
   2011 	struct raid_softc *rs = &raid_softc[unit];
   2012 	char   *errstring;
   2013 	struct disklabel *lp = rs->sc_dkdev.dk_label;
   2014 	struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel;
   2015 	RF_Raid_t *raidPtr;
   2016 
   2017 	db1_printf(("Getting the disklabel...\n"));
   2018 
   2019 	memset(clp, 0, sizeof(*clp));
   2020 
   2021 	raidPtr = raidPtrs[unit];
   2022 
   2023 	raidgetdefaultlabel(raidPtr, rs, lp);
   2024 
   2025 	/*
   2026 	 * Call the generic disklabel extraction routine.
   2027 	 */
   2028 	errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
   2029 	    rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
   2030 	if (errstring)
   2031 		raidmakedisklabel(rs);
   2032 	else {
   2033 		int     i;
   2034 		struct partition *pp;
   2035 
   2036 		/*
   2037 		 * Sanity check whether the found disklabel is valid.
   2038 		 *
   2039 		 * This is necessary since total size of the raid device
   2040 		 * may vary when an interleave is changed even though exactly
   2041 		 * same componets are used, and old disklabel may used
   2042 		 * if that is found.
   2043 		 */
   2044 		if (lp->d_secperunit != rs->sc_size)
   2045 			printf("raid%d: WARNING: %s: "
   2046 			    "total sector size in disklabel (%d) != "
   2047 			    "the size of raid (%ld)\n", unit, rs->sc_xname,
   2048 			    lp->d_secperunit, (long) rs->sc_size);
   2049 		for (i = 0; i < lp->d_npartitions; i++) {
   2050 			pp = &lp->d_partitions[i];
   2051 			if (pp->p_offset + pp->p_size > rs->sc_size)
   2052 				printf("raid%d: WARNING: %s: end of partition `%c' "
   2053 				       "exceeds the size of raid (%ld)\n",
   2054 				       unit, rs->sc_xname, 'a' + i, (long) rs->sc_size);
   2055 		}
   2056 	}
   2057 
   2058 }
   2059 /*
   2060  * Take care of things one might want to take care of in the event
   2061  * that a disklabel isn't present.
   2062  */
   2063 static void
   2064 raidmakedisklabel(rs)
   2065 	struct raid_softc *rs;
   2066 {
   2067 	struct disklabel *lp = rs->sc_dkdev.dk_label;
   2068 	db1_printf(("Making a label..\n"));
   2069 
   2070 	/*
   2071 	 * For historical reasons, if there's no disklabel present
   2072 	 * the raw partition must be marked FS_BSDFFS.
   2073 	 */
   2074 
   2075 	lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
   2076 
   2077 	strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
   2078 
   2079 	lp->d_checksum = dkcksum(lp);
   2080 }
   2081 /*
   2082  * Lookup the provided name in the filesystem.  If the file exists,
   2083  * is a valid block device, and isn't being used by anyone else,
   2084  * set *vpp to the file's vnode.
   2085  * You'll find the original of this in ccd.c
   2086  */
   2087 int
   2088 raidlookup(path, p, vpp)
   2089 	char   *path;
   2090 	struct proc *p;
   2091 	struct vnode **vpp;	/* result */
   2092 {
   2093 	struct nameidata nd;
   2094 	struct vnode *vp;
   2095 	struct vattr va;
   2096 	int     error;
   2097 
   2098 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, path, p);
   2099 	if ((error = vn_open(&nd, FREAD | FWRITE, 0)) != 0) {
   2100 #if 0
   2101 		printf("RAIDframe: vn_open returned %d\n", error);
   2102 #endif
   2103 		return (error);
   2104 	}
   2105 	vp = nd.ni_vp;
   2106 	if (vp->v_usecount > 1) {
   2107 		VOP_UNLOCK(vp, 0);
   2108 		(void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
   2109 		return (EBUSY);
   2110 	}
   2111 	if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)) != 0) {
   2112 		VOP_UNLOCK(vp, 0);
   2113 		(void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
   2114 		return (error);
   2115 	}
   2116 	/* XXX: eventually we should handle VREG, too. */
   2117 	if (va.va_type != VBLK) {
   2118 		VOP_UNLOCK(vp, 0);
   2119 		(void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
   2120 		return (ENOTBLK);
   2121 	}
   2122 	VOP_UNLOCK(vp, 0);
   2123 	*vpp = vp;
   2124 	return (0);
   2125 }
   2126 /*
   2127  * Wait interruptibly for an exclusive lock.
   2128  *
   2129  * XXX
   2130  * Several drivers do this; it should be abstracted and made MP-safe.
   2131  * (Hmm... where have we seen this warning before :->  GO )
   2132  */
   2133 static int
   2134 raidlock(rs)
   2135 	struct raid_softc *rs;
   2136 {
   2137 	int     error;
   2138 
   2139 	while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
   2140 		rs->sc_flags |= RAIDF_WANTED;
   2141 		if ((error =
   2142 			tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
   2143 			return (error);
   2144 	}
   2145 	rs->sc_flags |= RAIDF_LOCKED;
   2146 	return (0);
   2147 }
   2148 /*
   2149  * Unlock and wake up any waiters.
   2150  */
   2151 static void
   2152 raidunlock(rs)
   2153 	struct raid_softc *rs;
   2154 {
   2155 
   2156 	rs->sc_flags &= ~RAIDF_LOCKED;
   2157 	if ((rs->sc_flags & RAIDF_WANTED) != 0) {
   2158 		rs->sc_flags &= ~RAIDF_WANTED;
   2159 		wakeup(rs);
   2160 	}
   2161 }
   2162 
   2163 
   2164 #define RF_COMPONENT_INFO_OFFSET  16384 /* bytes */
   2165 #define RF_COMPONENT_INFO_SIZE     1024 /* bytes */
   2166 
   2167 int
   2168 raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter)
   2169 {
   2170 	RF_ComponentLabel_t clabel;
   2171 	raidread_component_label(dev, b_vp, &clabel);
   2172 	clabel.mod_counter = mod_counter;
   2173 	clabel.clean = RF_RAID_CLEAN;
   2174 	raidwrite_component_label(dev, b_vp, &clabel);
   2175 	return(0);
   2176 }
   2177 
   2178 
   2179 int
   2180 raidmarkdirty(dev_t dev, struct vnode *b_vp, int mod_counter)
   2181 {
   2182 	RF_ComponentLabel_t clabel;
   2183 	raidread_component_label(dev, b_vp, &clabel);
   2184 	clabel.mod_counter = mod_counter;
   2185 	clabel.clean = RF_RAID_DIRTY;
   2186 	raidwrite_component_label(dev, b_vp, &clabel);
   2187 	return(0);
   2188 }
   2189 
   2190 /* ARGSUSED */
   2191 int
   2192 raidread_component_label(dev, b_vp, clabel)
   2193 	dev_t dev;
   2194 	struct vnode *b_vp;
   2195 	RF_ComponentLabel_t *clabel;
   2196 {
   2197 	struct buf *bp;
   2198 	const struct bdevsw *bdev;
   2199 	int error;
   2200 
   2201 	/* XXX should probably ensure that we don't try to do this if
   2202 	   someone has changed rf_protected_sectors. */
   2203 
   2204 	if (b_vp == NULL) {
   2205 		/* For whatever reason, this component is not valid.
   2206 		   Don't try to read a component label from it. */
   2207 		return(EINVAL);
   2208 	}
   2209 
   2210 	/* get a block of the appropriate size... */
   2211 	bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
   2212 	bp->b_dev = dev;
   2213 
   2214 	/* get our ducks in a row for the read */
   2215 	bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
   2216 	bp->b_bcount = RF_COMPONENT_INFO_SIZE;
   2217 	bp->b_flags |= B_READ;
   2218  	bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
   2219 
   2220 	bdev = bdevsw_lookup(bp->b_dev);
   2221 	if (bdev == NULL)
   2222 		return (ENXIO);
   2223 	(*bdev->d_strategy)(bp);
   2224 
   2225 	error = biowait(bp);
   2226 
   2227 	if (!error) {
   2228 		memcpy(clabel, bp->b_data,
   2229 		       sizeof(RF_ComponentLabel_t));
   2230 #if 0
   2231 		rf_print_component_label( clabel );
   2232 #endif
   2233         } else {
   2234 #if 0
   2235 		printf("Failed to read RAID component label!\n");
   2236 #endif
   2237 	}
   2238 
   2239 	brelse(bp);
   2240 	return(error);
   2241 }
   2242 /* ARGSUSED */
   2243 int
   2244 raidwrite_component_label(dev, b_vp, clabel)
   2245 	dev_t dev;
   2246 	struct vnode *b_vp;
   2247 	RF_ComponentLabel_t *clabel;
   2248 {
   2249 	struct buf *bp;
   2250 	const struct bdevsw *bdev;
   2251 	int error;
   2252 
   2253 	/* get a block of the appropriate size... */
   2254 	bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
   2255 	bp->b_dev = dev;
   2256 
   2257 	/* get our ducks in a row for the write */
   2258 	bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
   2259 	bp->b_bcount = RF_COMPONENT_INFO_SIZE;
   2260 	bp->b_flags |= B_WRITE;
   2261  	bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
   2262 
   2263 	memset(bp->b_data, 0, RF_COMPONENT_INFO_SIZE );
   2264 
   2265 	memcpy(bp->b_data, clabel, sizeof(RF_ComponentLabel_t));
   2266 
   2267 	bdev = bdevsw_lookup(bp->b_dev);
   2268 	if (bdev == NULL)
   2269 		return (ENXIO);
   2270 	(*bdev->d_strategy)(bp);
   2271 	error = biowait(bp);
   2272 	brelse(bp);
   2273 	if (error) {
   2274 #if 1
   2275 		printf("Failed to write RAID component info!\n");
   2276 #endif
   2277 	}
   2278 
   2279 	return(error);
   2280 }
   2281 
   2282 void
   2283 rf_markalldirty(raidPtr)
   2284 	RF_Raid_t *raidPtr;
   2285 {
   2286 	RF_ComponentLabel_t clabel;
   2287 	int r,c;
   2288 
   2289 	raidPtr->mod_counter++;
   2290 	for (r = 0; r < raidPtr->numRow; r++) {
   2291 		for (c = 0; c < raidPtr->numCol; c++) {
   2292 			/* we don't want to touch (at all) a disk that has
   2293 			   failed */
   2294 			if (!RF_DEAD_DISK(raidPtr->Disks[r][c].status)) {
   2295 				raidread_component_label(
   2296 					raidPtr->Disks[r][c].dev,
   2297 					raidPtr->raid_cinfo[r][c].ci_vp,
   2298 					&clabel);
   2299 				if (clabel.status == rf_ds_spared) {
   2300 					/* XXX do something special...
   2301 					 but whatever you do, don't
   2302 					 try to access it!! */
   2303 				} else {
   2304 #if 0
   2305 				clabel.status =
   2306 					raidPtr->Disks[r][c].status;
   2307 				raidwrite_component_label(
   2308 					raidPtr->Disks[r][c].dev,
   2309 					raidPtr->raid_cinfo[r][c].ci_vp,
   2310 					&clabel);
   2311 #endif
   2312 				raidmarkdirty(
   2313 				       raidPtr->Disks[r][c].dev,
   2314 				       raidPtr->raid_cinfo[r][c].ci_vp,
   2315 				       raidPtr->mod_counter);
   2316 				}
   2317 			}
   2318 		}
   2319 	}
   2320 	/* printf("Component labels marked dirty.\n"); */
   2321 #if 0
   2322 	for( c = 0; c < raidPtr->numSpare ; c++) {
   2323 		sparecol = raidPtr->numCol + c;
   2324 		if (raidPtr->Disks[r][sparecol].status == rf_ds_used_spare) {
   2325 			/*
   2326 
   2327 			   XXX this is where we get fancy and map this spare
   2328 			   into it's correct spot in the array.
   2329 
   2330 			 */
   2331 			/*
   2332 
   2333 			   we claim this disk is "optimal" if it's
   2334 			   rf_ds_used_spare, as that means it should be
   2335 			   directly substitutable for the disk it replaced.
   2336 			   We note that too...
   2337 
   2338 			 */
   2339 
   2340 			for(i=0;i<raidPtr->numRow;i++) {
   2341 				for(j=0;j<raidPtr->numCol;j++) {
   2342 					if ((raidPtr->Disks[i][j].spareRow ==
   2343 					     r) &&
   2344 					    (raidPtr->Disks[i][j].spareCol ==
   2345 					     sparecol)) {
   2346 						srow = r;
   2347 						scol = sparecol;
   2348 						break;
   2349 					}
   2350 				}
   2351 			}
   2352 
   2353 			raidread_component_label(
   2354 				      raidPtr->Disks[r][sparecol].dev,
   2355 				      raidPtr->raid_cinfo[r][sparecol].ci_vp,
   2356 				      &clabel);
   2357 			/* make sure status is noted */
   2358 			clabel.version = RF_COMPONENT_LABEL_VERSION;
   2359 			clabel.mod_counter = raidPtr->mod_counter;
   2360 			clabel.serial_number = raidPtr->serial_number;
   2361 			clabel.row = srow;
   2362 			clabel.column = scol;
   2363 			clabel.num_rows = raidPtr->numRow;
   2364 			clabel.num_columns = raidPtr->numCol;
   2365 			clabel.clean = RF_RAID_DIRTY; /* changed in a bit*/
   2366 			clabel.status = rf_ds_optimal;
   2367 			raidwrite_component_label(
   2368 				      raidPtr->Disks[r][sparecol].dev,
   2369 				      raidPtr->raid_cinfo[r][sparecol].ci_vp,
   2370 				      &clabel);
   2371 			raidmarkclean( raidPtr->Disks[r][sparecol].dev,
   2372 			              raidPtr->raid_cinfo[r][sparecol].ci_vp);
   2373 		}
   2374 	}
   2375 
   2376 #endif
   2377 }
   2378 
   2379 
   2380 void
   2381 rf_update_component_labels(raidPtr, final)
   2382 	RF_Raid_t *raidPtr;
   2383 	int final;
   2384 {
   2385 	RF_ComponentLabel_t clabel;
   2386 	int sparecol;
   2387 	int r,c;
   2388 	int i,j;
   2389 	int srow, scol;
   2390 
   2391 	srow = -1;
   2392 	scol = -1;
   2393 
   2394 	/* XXX should do extra checks to make sure things really are clean,
   2395 	   rather than blindly setting the clean bit... */
   2396 
   2397 	raidPtr->mod_counter++;
   2398 
   2399 	for (r = 0; r < raidPtr->numRow; r++) {
   2400 		for (c = 0; c < raidPtr->numCol; c++) {
   2401 			if (raidPtr->Disks[r][c].status == rf_ds_optimal) {
   2402 				raidread_component_label(
   2403 					raidPtr->Disks[r][c].dev,
   2404 					raidPtr->raid_cinfo[r][c].ci_vp,
   2405 					&clabel);
   2406 				/* make sure status is noted */
   2407 				clabel.status = rf_ds_optimal;
   2408 				/* bump the counter */
   2409 				clabel.mod_counter = raidPtr->mod_counter;
   2410 
   2411 				raidwrite_component_label(
   2412 					raidPtr->Disks[r][c].dev,
   2413 					raidPtr->raid_cinfo[r][c].ci_vp,
   2414 					&clabel);
   2415 				if (final == RF_FINAL_COMPONENT_UPDATE) {
   2416 					if (raidPtr->parity_good == RF_RAID_CLEAN) {
   2417 						raidmarkclean(
   2418 							      raidPtr->Disks[r][c].dev,
   2419 							      raidPtr->raid_cinfo[r][c].ci_vp,
   2420 							      raidPtr->mod_counter);
   2421 					}
   2422 				}
   2423 			}
   2424 			/* else we don't touch it.. */
   2425 		}
   2426 	}
   2427 
   2428 	for( c = 0; c < raidPtr->numSpare ; c++) {
   2429 		sparecol = raidPtr->numCol + c;
   2430 		/* Need to ensure that the reconstruct actually completed! */
   2431 		if (raidPtr->Disks[0][sparecol].status == rf_ds_used_spare) {
   2432 			/*
   2433 
   2434 			   we claim this disk is "optimal" if it's
   2435 			   rf_ds_used_spare, as that means it should be
   2436 			   directly substitutable for the disk it replaced.
   2437 			   We note that too...
   2438 
   2439 			 */
   2440 
   2441 			for(i=0;i<raidPtr->numRow;i++) {
   2442 				for(j=0;j<raidPtr->numCol;j++) {
   2443 					if ((raidPtr->Disks[i][j].spareRow ==
   2444 					     0) &&
   2445 					    (raidPtr->Disks[i][j].spareCol ==
   2446 					     sparecol)) {
   2447 						srow = i;
   2448 						scol = j;
   2449 						break;
   2450 					}
   2451 				}
   2452 			}
   2453 
   2454 			/* XXX shouldn't *really* need this... */
   2455 			raidread_component_label(
   2456 				      raidPtr->Disks[0][sparecol].dev,
   2457 				      raidPtr->raid_cinfo[0][sparecol].ci_vp,
   2458 				      &clabel);
   2459 			/* make sure status is noted */
   2460 
   2461 			raid_init_component_label(raidPtr, &clabel);
   2462 
   2463 			clabel.mod_counter = raidPtr->mod_counter;
   2464 			clabel.row = srow;
   2465 			clabel.column = scol;
   2466 			clabel.status = rf_ds_optimal;
   2467 
   2468 			raidwrite_component_label(
   2469 				      raidPtr->Disks[0][sparecol].dev,
   2470 				      raidPtr->raid_cinfo[0][sparecol].ci_vp,
   2471 				      &clabel);
   2472 			if (final == RF_FINAL_COMPONENT_UPDATE) {
   2473 				if (raidPtr->parity_good == RF_RAID_CLEAN) {
   2474 					raidmarkclean( raidPtr->Disks[0][sparecol].dev,
   2475 						       raidPtr->raid_cinfo[0][sparecol].ci_vp,
   2476 						       raidPtr->mod_counter);
   2477 				}
   2478 			}
   2479 		}
   2480 	}
   2481 	/* 	printf("Component labels updated\n"); */
   2482 }
   2483 
   2484 void
   2485 rf_close_component(raidPtr, vp, auto_configured)
   2486 	RF_Raid_t *raidPtr;
   2487 	struct vnode *vp;
   2488 	int auto_configured;
   2489 {
   2490 	struct proc *p;
   2491 
   2492 	p = raidPtr->engine_thread;
   2493 
   2494 	if (vp != NULL) {
   2495 		if (auto_configured == 1) {
   2496 			vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
   2497 			VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
   2498 			vput(vp);
   2499 
   2500 		} else {
   2501 			(void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
   2502 		}
   2503 	} else {
   2504 #if 0
   2505 		printf("vnode was NULL\n");
   2506 #endif
   2507 	}
   2508 }
   2509 
   2510 
   2511 void
   2512 rf_UnconfigureVnodes(raidPtr)
   2513 	RF_Raid_t *raidPtr;
   2514 {
   2515 	int r,c;
   2516 	struct proc *p;
   2517 	struct vnode *vp;
   2518 	int acd;
   2519 
   2520 
   2521 	/* We take this opportunity to close the vnodes like we should.. */
   2522 
   2523 	p = raidPtr->engine_thread;
   2524 
   2525 	for (r = 0; r < raidPtr->numRow; r++) {
   2526 		for (c = 0; c < raidPtr->numCol; c++) {
   2527 #if 0
   2528 			printf("raid%d: Closing vnode for row: %d col: %d\n",
   2529 			       raidPtr->raidid, r, c);
   2530 #endif
   2531 			vp = raidPtr->raid_cinfo[r][c].ci_vp;
   2532 			acd = raidPtr->Disks[r][c].auto_configured;
   2533 			rf_close_component(raidPtr, vp, acd);
   2534 			raidPtr->raid_cinfo[r][c].ci_vp = NULL;
   2535 			raidPtr->Disks[r][c].auto_configured = 0;
   2536 		}
   2537 	}
   2538 	for (r = 0; r < raidPtr->numSpare; r++) {
   2539 #if 0
   2540 		printf("raid%d: Closing vnode for spare: %d\n",
   2541 		       raidPtr->raidid, r);
   2542 #endif
   2543 		vp = raidPtr->raid_cinfo[0][raidPtr->numCol + r].ci_vp;
   2544 		acd = raidPtr->Disks[0][raidPtr->numCol + r].auto_configured;
   2545 		rf_close_component(raidPtr, vp, acd);
   2546 		raidPtr->raid_cinfo[0][raidPtr->numCol + r].ci_vp = NULL;
   2547 		raidPtr->Disks[0][raidPtr->numCol + r].auto_configured = 0;
   2548 	}
   2549 }
   2550 
   2551 
   2552 void
   2553 rf_ReconThread(req)
   2554 	struct rf_recon_req *req;
   2555 {
   2556 	int     s;
   2557 	RF_Raid_t *raidPtr;
   2558 
   2559 	s = splbio();
   2560 	raidPtr = (RF_Raid_t *) req->raidPtr;
   2561 	raidPtr->recon_in_progress = 1;
   2562 
   2563 	rf_FailDisk((RF_Raid_t *) req->raidPtr, req->row, req->col,
   2564 		    ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
   2565 
   2566 	/* XXX get rid of this! we don't need it at all.. */
   2567 	RF_Free(req, sizeof(*req));
   2568 
   2569 	raidPtr->recon_in_progress = 0;
   2570 	splx(s);
   2571 
   2572 	/* That's all... */
   2573 	kthread_exit(0);        /* does not return */
   2574 }
   2575 
   2576 void
   2577 rf_RewriteParityThread(raidPtr)
   2578 	RF_Raid_t *raidPtr;
   2579 {
   2580 	int retcode;
   2581 	int s;
   2582 
   2583 	raidPtr->parity_rewrite_in_progress = 1;
   2584 	s = splbio();
   2585 	retcode = rf_RewriteParity(raidPtr);
   2586 	splx(s);
   2587 	if (retcode) {
   2588 		printf("raid%d: Error re-writing parity!\n",raidPtr->raidid);
   2589 	} else {
   2590 		/* set the clean bit!  If we shutdown correctly,
   2591 		   the clean bit on each component label will get
   2592 		   set */
   2593 		raidPtr->parity_good = RF_RAID_CLEAN;
   2594 	}
   2595 	raidPtr->parity_rewrite_in_progress = 0;
   2596 
   2597 	/* Anyone waiting for us to stop?  If so, inform them... */
   2598 	if (raidPtr->waitShutdown) {
   2599 		wakeup(&raidPtr->parity_rewrite_in_progress);
   2600 	}
   2601 
   2602 	/* That's all... */
   2603 	kthread_exit(0);        /* does not return */
   2604 }
   2605 
   2606 
   2607 void
   2608 rf_CopybackThread(raidPtr)
   2609 	RF_Raid_t *raidPtr;
   2610 {
   2611 	int s;
   2612 
   2613 	raidPtr->copyback_in_progress = 1;
   2614 	s = splbio();
   2615 	rf_CopybackReconstructedData(raidPtr);
   2616 	splx(s);
   2617 	raidPtr->copyback_in_progress = 0;
   2618 
   2619 	/* That's all... */
   2620 	kthread_exit(0);        /* does not return */
   2621 }
   2622 
   2623 
   2624 void
   2625 rf_ReconstructInPlaceThread(req)
   2626 	struct rf_recon_req *req;
   2627 {
   2628 	int retcode;
   2629 	int s;
   2630 	RF_Raid_t *raidPtr;
   2631 
   2632 	s = splbio();
   2633 	raidPtr = req->raidPtr;
   2634 	raidPtr->recon_in_progress = 1;
   2635 	retcode = rf_ReconstructInPlace(raidPtr, req->row, req->col);
   2636 	RF_Free(req, sizeof(*req));
   2637 	raidPtr->recon_in_progress = 0;
   2638 	splx(s);
   2639 
   2640 	/* That's all... */
   2641 	kthread_exit(0);        /* does not return */
   2642 }
   2643 
   2644 RF_AutoConfig_t *
   2645 rf_find_raid_components()
   2646 {
   2647 	struct vnode *vp;
   2648 	struct disklabel label;
   2649 	struct device *dv;
   2650 	dev_t dev;
   2651 	int bmajor;
   2652 	int error;
   2653 	int i;
   2654 	int good_one;
   2655 	RF_ComponentLabel_t *clabel;
   2656 	RF_AutoConfig_t *ac_list;
   2657 	RF_AutoConfig_t *ac;
   2658 
   2659 
   2660 	/* initialize the AutoConfig list */
   2661 	ac_list = NULL;
   2662 
   2663 	/* we begin by trolling through *all* the devices on the system */
   2664 
   2665 	for (dv = alldevs.tqh_first; dv != NULL;
   2666 	     dv = dv->dv_list.tqe_next) {
   2667 
   2668 		/* we are only interested in disks... */
   2669 		if (dv->dv_class != DV_DISK)
   2670 			continue;
   2671 
   2672 		/* we don't care about floppies... */
   2673 		if (!strcmp(dv->dv_cfdata->cf_driver->cd_name,"fd")) {
   2674 			continue;
   2675 		}
   2676 
   2677 		/* we don't care about CD's... */
   2678 		if (!strcmp(dv->dv_cfdata->cf_driver->cd_name,"cd")) {
   2679 			continue;
   2680 		}
   2681 
   2682 		/* hdfd is the Atari/Hades floppy driver */
   2683 		if (!strcmp(dv->dv_cfdata->cf_driver->cd_name,"hdfd")) {
   2684 			continue;
   2685 		}
   2686 		/* fdisa is the Atari/Milan floppy driver */
   2687 		if (!strcmp(dv->dv_cfdata->cf_driver->cd_name,"fdisa")) {
   2688 			continue;
   2689 		}
   2690 
   2691 		/* need to find the device_name_to_block_device_major stuff */
   2692 		bmajor = devsw_name2blk(dv->dv_xname, NULL, 0);
   2693 
   2694 		/* get a vnode for the raw partition of this disk */
   2695 
   2696 		dev = MAKEDISKDEV(bmajor, dv->dv_unit, RAW_PART);
   2697 		if (bdevvp(dev, &vp))
   2698 			panic("RAID can't alloc vnode");
   2699 
   2700 		error = VOP_OPEN(vp, FREAD, NOCRED, 0);
   2701 
   2702 		if (error) {
   2703 			/* "Who cares."  Continue looking
   2704 			   for something that exists*/
   2705 			vput(vp);
   2706 			continue;
   2707 		}
   2708 
   2709 		/* Ok, the disk exists.  Go get the disklabel. */
   2710 		error = VOP_IOCTL(vp, DIOCGDINFO, (caddr_t)&label,
   2711 				  FREAD, NOCRED, 0);
   2712 		if (error) {
   2713 			/*
   2714 			 * XXX can't happen - open() would
   2715 			 * have errored out (or faked up one)
   2716 			 */
   2717 			printf("can't get label for dev %s%c (%d)!?!?\n",
   2718 			       dv->dv_xname, 'a' + RAW_PART, error);
   2719 		}
   2720 
   2721 		/* don't need this any more.  We'll allocate it again
   2722 		   a little later if we really do... */
   2723 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
   2724 		VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
   2725 		vput(vp);
   2726 
   2727 		for (i=0; i < label.d_npartitions; i++) {
   2728 			/* We only support partitions marked as RAID */
   2729 			if (label.d_partitions[i].p_fstype != FS_RAID)
   2730 				continue;
   2731 
   2732 			dev = MAKEDISKDEV(bmajor, dv->dv_unit, i);
   2733 			if (bdevvp(dev, &vp))
   2734 				panic("RAID can't alloc vnode");
   2735 
   2736 			error = VOP_OPEN(vp, FREAD, NOCRED, 0);
   2737 			if (error) {
   2738 				/* Whatever... */
   2739 				vput(vp);
   2740 				continue;
   2741 			}
   2742 
   2743 			good_one = 0;
   2744 
   2745 			clabel = (RF_ComponentLabel_t *)
   2746 				malloc(sizeof(RF_ComponentLabel_t),
   2747 				       M_RAIDFRAME, M_NOWAIT);
   2748 			if (clabel == NULL) {
   2749 				/* XXX CLEANUP HERE */
   2750 				printf("RAID auto config: out of memory!\n");
   2751 				return(NULL); /* XXX probably should panic? */
   2752 			}
   2753 
   2754 			if (!raidread_component_label(dev, vp, clabel)) {
   2755 				/* Got the label.  Does it look reasonable? */
   2756 				if (rf_reasonable_label(clabel) &&
   2757 				    (clabel->partitionSize <=
   2758 				     label.d_partitions[i].p_size)) {
   2759 #if DEBUG
   2760 					printf("Component on: %s%c: %d\n",
   2761 					       dv->dv_xname, 'a'+i,
   2762 					       label.d_partitions[i].p_size);
   2763 					rf_print_component_label(clabel);
   2764 #endif
   2765 					/* if it's reasonable, add it,
   2766 					   else ignore it. */
   2767 					ac = (RF_AutoConfig_t *)
   2768 						malloc(sizeof(RF_AutoConfig_t),
   2769 						       M_RAIDFRAME,
   2770 						       M_NOWAIT);
   2771 					if (ac == NULL) {
   2772 						/* XXX should panic?? */
   2773 						return(NULL);
   2774 					}
   2775 
   2776 					sprintf(ac->devname, "%s%c",
   2777 						dv->dv_xname, 'a'+i);
   2778 					ac->dev = dev;
   2779 					ac->vp = vp;
   2780 					ac->clabel = clabel;
   2781 					ac->next = ac_list;
   2782 					ac_list = ac;
   2783 					good_one = 1;
   2784 				}
   2785 			}
   2786 			if (!good_one) {
   2787 				/* cleanup */
   2788 				free(clabel, M_RAIDFRAME);
   2789 				vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
   2790 				VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
   2791 				vput(vp);
   2792 			}
   2793 		}
   2794 	}
   2795 	return(ac_list);
   2796 }
   2797 
   2798 static int
   2799 rf_reasonable_label(clabel)
   2800 	RF_ComponentLabel_t *clabel;
   2801 {
   2802 
   2803 	if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) ||
   2804 	     (clabel->version==RF_COMPONENT_LABEL_VERSION)) &&
   2805 	    ((clabel->clean == RF_RAID_CLEAN) ||
   2806 	     (clabel->clean == RF_RAID_DIRTY)) &&
   2807 	    clabel->row >=0 &&
   2808 	    clabel->column >= 0 &&
   2809 	    clabel->num_rows > 0 &&
   2810 	    clabel->num_columns > 0 &&
   2811 	    clabel->row < clabel->num_rows &&
   2812 	    clabel->column < clabel->num_columns &&
   2813 	    clabel->blockSize > 0 &&
   2814 	    clabel->numBlocks > 0) {
   2815 		/* label looks reasonable enough... */
   2816 		return(1);
   2817 	}
   2818 	return(0);
   2819 }
   2820 
   2821 
   2822 #if 0
   2823 void
   2824 rf_print_component_label(clabel)
   2825 	RF_ComponentLabel_t *clabel;
   2826 {
   2827 	printf("   Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
   2828 	       clabel->row, clabel->column,
   2829 	       clabel->num_rows, clabel->num_columns);
   2830 	printf("   Version: %d Serial Number: %d Mod Counter: %d\n",
   2831 	       clabel->version, clabel->serial_number,
   2832 	       clabel->mod_counter);
   2833 	printf("   Clean: %s Status: %d\n",
   2834 	       clabel->clean ? "Yes" : "No", clabel->status );
   2835 	printf("   sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
   2836 	       clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
   2837 	printf("   RAID Level: %c  blocksize: %d numBlocks: %d\n",
   2838 	       (char) clabel->parityConfig, clabel->blockSize,
   2839 	       clabel->numBlocks);
   2840 	printf("   Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No" );
   2841 	printf("   Contains root partition: %s\n",
   2842 	       clabel->root_partition ? "Yes" : "No" );
   2843 	printf("   Last configured as: raid%d\n", clabel->last_unit );
   2844 #if 0
   2845 	   printf("   Config order: %d\n", clabel->config_order);
   2846 #endif
   2847 
   2848 }
   2849 #endif
   2850 
   2851 RF_ConfigSet_t *
   2852 rf_create_auto_sets(ac_list)
   2853 	RF_AutoConfig_t *ac_list;
   2854 {
   2855 	RF_AutoConfig_t *ac;
   2856 	RF_ConfigSet_t *config_sets;
   2857 	RF_ConfigSet_t *cset;
   2858 	RF_AutoConfig_t *ac_next;
   2859 
   2860 
   2861 	config_sets = NULL;
   2862 
   2863 	/* Go through the AutoConfig list, and figure out which components
   2864 	   belong to what sets.  */
   2865 	ac = ac_list;
   2866 	while(ac!=NULL) {
   2867 		/* we're going to putz with ac->next, so save it here
   2868 		   for use at the end of the loop */
   2869 		ac_next = ac->next;
   2870 
   2871 		if (config_sets == NULL) {
   2872 			/* will need at least this one... */
   2873 			config_sets = (RF_ConfigSet_t *)
   2874 				malloc(sizeof(RF_ConfigSet_t),
   2875 				       M_RAIDFRAME, M_NOWAIT);
   2876 			if (config_sets == NULL) {
   2877 				panic("rf_create_auto_sets: No memory!\n");
   2878 			}
   2879 			/* this one is easy :) */
   2880 			config_sets->ac = ac;
   2881 			config_sets->next = NULL;
   2882 			config_sets->rootable = 0;
   2883 			ac->next = NULL;
   2884 		} else {
   2885 			/* which set does this component fit into? */
   2886 			cset = config_sets;
   2887 			while(cset!=NULL) {
   2888 				if (rf_does_it_fit(cset, ac)) {
   2889 					/* looks like it matches... */
   2890 					ac->next = cset->ac;
   2891 					cset->ac = ac;
   2892 					break;
   2893 				}
   2894 				cset = cset->next;
   2895 			}
   2896 			if (cset==NULL) {
   2897 				/* didn't find a match above... new set..*/
   2898 				cset = (RF_ConfigSet_t *)
   2899 					malloc(sizeof(RF_ConfigSet_t),
   2900 					       M_RAIDFRAME, M_NOWAIT);
   2901 				if (cset == NULL) {
   2902 					panic("rf_create_auto_sets: No memory!\n");
   2903 				}
   2904 				cset->ac = ac;
   2905 				ac->next = NULL;
   2906 				cset->next = config_sets;
   2907 				cset->rootable = 0;
   2908 				config_sets = cset;
   2909 			}
   2910 		}
   2911 		ac = ac_next;
   2912 	}
   2913 
   2914 
   2915 	return(config_sets);
   2916 }
   2917 
   2918 static int
   2919 rf_does_it_fit(cset, ac)
   2920 	RF_ConfigSet_t *cset;
   2921 	RF_AutoConfig_t *ac;
   2922 {
   2923 	RF_ComponentLabel_t *clabel1, *clabel2;
   2924 
   2925 	/* If this one matches the *first* one in the set, that's good
   2926 	   enough, since the other members of the set would have been
   2927 	   through here too... */
   2928 	/* note that we are not checking partitionSize here..
   2929 
   2930 	   Note that we are also not checking the mod_counters here.
   2931 	   If everything else matches execpt the mod_counter, that's
   2932 	   good enough for this test.  We will deal with the mod_counters
   2933 	   a little later in the autoconfiguration process.
   2934 
   2935 	    (clabel1->mod_counter == clabel2->mod_counter) &&
   2936 
   2937 	   The reason we don't check for this is that failed disks
   2938 	   will have lower modification counts.  If those disks are
   2939 	   not added to the set they used to belong to, then they will
   2940 	   form their own set, which may result in 2 different sets,
   2941 	   for example, competing to be configured at raid0, and
   2942 	   perhaps competing to be the root filesystem set.  If the
   2943 	   wrong ones get configured, or both attempt to become /,
   2944 	   weird behaviour and or serious lossage will occur.  Thus we
   2945 	   need to bring them into the fold here, and kick them out at
   2946 	   a later point.
   2947 
   2948 	*/
   2949 
   2950 	clabel1 = cset->ac->clabel;
   2951 	clabel2 = ac->clabel;
   2952 	if ((clabel1->version == clabel2->version) &&
   2953 	    (clabel1->serial_number == clabel2->serial_number) &&
   2954 	    (clabel1->num_rows == clabel2->num_rows) &&
   2955 	    (clabel1->num_columns == clabel2->num_columns) &&
   2956 	    (clabel1->sectPerSU == clabel2->sectPerSU) &&
   2957 	    (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
   2958 	    (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
   2959 	    (clabel1->parityConfig == clabel2->parityConfig) &&
   2960 	    (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
   2961 	    (clabel1->blockSize == clabel2->blockSize) &&
   2962 	    (clabel1->numBlocks == clabel2->numBlocks) &&
   2963 	    (clabel1->autoconfigure == clabel2->autoconfigure) &&
   2964 	    (clabel1->root_partition == clabel2->root_partition) &&
   2965 	    (clabel1->last_unit == clabel2->last_unit) &&
   2966 	    (clabel1->config_order == clabel2->config_order)) {
   2967 		/* if it get's here, it almost *has* to be a match */
   2968 	} else {
   2969 		/* it's not consistent with somebody in the set..
   2970 		   punt */
   2971 		return(0);
   2972 	}
   2973 	/* all was fine.. it must fit... */
   2974 	return(1);
   2975 }
   2976 
   2977 int
   2978 rf_have_enough_components(cset)
   2979 	RF_ConfigSet_t *cset;
   2980 {
   2981 	RF_AutoConfig_t *ac;
   2982 	RF_AutoConfig_t *auto_config;
   2983 	RF_ComponentLabel_t *clabel;
   2984 	int r,c;
   2985 	int num_rows;
   2986 	int num_cols;
   2987 	int num_missing;
   2988 	int mod_counter;
   2989 	int mod_counter_found;
   2990 	int even_pair_failed;
   2991 	char parity_type;
   2992 
   2993 
   2994 	/* check to see that we have enough 'live' components
   2995 	   of this set.  If so, we can configure it if necessary */
   2996 
   2997 	num_rows = cset->ac->clabel->num_rows;
   2998 	num_cols = cset->ac->clabel->num_columns;
   2999 	parity_type = cset->ac->clabel->parityConfig;
   3000 
   3001 	/* XXX Check for duplicate components!?!?!? */
   3002 
   3003 	/* Determine what the mod_counter is supposed to be for this set. */
   3004 
   3005 	mod_counter_found = 0;
   3006 	mod_counter = 0;
   3007 	ac = cset->ac;
   3008 	while(ac!=NULL) {
   3009 		if (mod_counter_found==0) {
   3010 			mod_counter = ac->clabel->mod_counter;
   3011 			mod_counter_found = 1;
   3012 		} else {
   3013 			if (ac->clabel->mod_counter > mod_counter) {
   3014 				mod_counter = ac->clabel->mod_counter;
   3015 			}
   3016 		}
   3017 		ac = ac->next;
   3018 	}
   3019 
   3020 	num_missing = 0;
   3021 	auto_config = cset->ac;
   3022 
   3023 	for(r=0; r<num_rows; r++) {
   3024 		even_pair_failed = 0;
   3025 		for(c=0; c<num_cols; c++) {
   3026 			ac = auto_config;
   3027 			while(ac!=NULL) {
   3028 				if ((ac->clabel->row == r) &&
   3029 				    (ac->clabel->column == c) &&
   3030 				    (ac->clabel->mod_counter == mod_counter)) {
   3031 					/* it's this one... */
   3032 #if DEBUG
   3033 					printf("Found: %s at %d,%d\n",
   3034 					       ac->devname,r,c);
   3035 #endif
   3036 					break;
   3037 				}
   3038 				ac=ac->next;
   3039 			}
   3040 			if (ac==NULL) {
   3041 				/* Didn't find one here! */
   3042 				/* special case for RAID 1, especially
   3043 				   where there are more than 2
   3044 				   components (where RAIDframe treats
   3045 				   things a little differently :( ) */
   3046 				if (parity_type == '1') {
   3047 					if (c%2 == 0) { /* even component */
   3048 						even_pair_failed = 1;
   3049 					} else { /* odd component.  If
   3050                                                     we're failed, and
   3051                                                     so is the even
   3052                                                     component, it's
   3053                                                     "Good Night, Charlie" */
   3054 						if (even_pair_failed == 1) {
   3055 							return(0);
   3056 						}
   3057 					}
   3058 				} else {
   3059 					/* normal accounting */
   3060 					num_missing++;
   3061 				}
   3062 			}
   3063 			if ((parity_type == '1') && (c%2 == 1)) {
   3064 				/* Just did an even component, and we didn't
   3065 				   bail.. reset the even_pair_failed flag,
   3066 				   and go on to the next component.... */
   3067 				even_pair_failed = 0;
   3068 			}
   3069 		}
   3070 	}
   3071 
   3072 	clabel = cset->ac->clabel;
   3073 
   3074 	if (((clabel->parityConfig == '0') && (num_missing > 0)) ||
   3075 	    ((clabel->parityConfig == '4') && (num_missing > 1)) ||
   3076 	    ((clabel->parityConfig == '5') && (num_missing > 1))) {
   3077 		/* XXX this needs to be made *much* more general */
   3078 		/* Too many failures */
   3079 		return(0);
   3080 	}
   3081 	/* otherwise, all is well, and we've got enough to take a kick
   3082 	   at autoconfiguring this set */
   3083 	return(1);
   3084 }
   3085 
   3086 void
   3087 rf_create_configuration(ac,config,raidPtr)
   3088 	RF_AutoConfig_t *ac;
   3089 	RF_Config_t *config;
   3090 	RF_Raid_t *raidPtr;
   3091 {
   3092 	RF_ComponentLabel_t *clabel;
   3093 	int i;
   3094 
   3095 	clabel = ac->clabel;
   3096 
   3097 	/* 1. Fill in the common stuff */
   3098 	config->numRow = clabel->num_rows;
   3099 	config->numCol = clabel->num_columns;
   3100 	config->numSpare = 0; /* XXX should this be set here? */
   3101 	config->sectPerSU = clabel->sectPerSU;
   3102 	config->SUsPerPU = clabel->SUsPerPU;
   3103 	config->SUsPerRU = clabel->SUsPerRU;
   3104 	config->parityConfig = clabel->parityConfig;
   3105 	/* XXX... */
   3106 	strcpy(config->diskQueueType,"fifo");
   3107 	config->maxOutstandingDiskReqs = clabel->maxOutstanding;
   3108 	config->layoutSpecificSize = 0; /* XXX ?? */
   3109 
   3110 	while(ac!=NULL) {
   3111 		/* row/col values will be in range due to the checks
   3112 		   in reasonable_label() */
   3113 		strcpy(config->devnames[ac->clabel->row][ac->clabel->column],
   3114 		       ac->devname);
   3115 		ac = ac->next;
   3116 	}
   3117 
   3118 	for(i=0;i<RF_MAXDBGV;i++) {
   3119 		config->debugVars[i][0] = NULL;
   3120 	}
   3121 }
   3122 
   3123 int
   3124 rf_set_autoconfig(raidPtr, new_value)
   3125 	RF_Raid_t *raidPtr;
   3126 	int new_value;
   3127 {
   3128 	RF_ComponentLabel_t clabel;
   3129 	struct vnode *vp;
   3130 	dev_t dev;
   3131 	int row, column;
   3132 
   3133 	raidPtr->autoconfigure = new_value;
   3134 	for(row=0; row<raidPtr->numRow; row++) {
   3135 		for(column=0; column<raidPtr->numCol; column++) {
   3136 			if (raidPtr->Disks[row][column].status ==
   3137 			    rf_ds_optimal) {
   3138 				dev = raidPtr->Disks[row][column].dev;
   3139 				vp = raidPtr->raid_cinfo[row][column].ci_vp;
   3140 				raidread_component_label(dev, vp, &clabel);
   3141 				clabel.autoconfigure = new_value;
   3142 				raidwrite_component_label(dev, vp, &clabel);
   3143 			}
   3144 		}
   3145 	}
   3146 	return(new_value);
   3147 }
   3148 
   3149 int
   3150 rf_set_rootpartition(raidPtr, new_value)
   3151 	RF_Raid_t *raidPtr;
   3152 	int new_value;
   3153 {
   3154 	RF_ComponentLabel_t clabel;
   3155 	struct vnode *vp;
   3156 	dev_t dev;
   3157 	int row, column;
   3158 
   3159 	raidPtr->root_partition = new_value;
   3160 	for(row=0; row<raidPtr->numRow; row++) {
   3161 		for(column=0; column<raidPtr->numCol; column++) {
   3162 			if (raidPtr->Disks[row][column].status ==
   3163 			    rf_ds_optimal) {
   3164 				dev = raidPtr->Disks[row][column].dev;
   3165 				vp = raidPtr->raid_cinfo[row][column].ci_vp;
   3166 				raidread_component_label(dev, vp, &clabel);
   3167 				clabel.root_partition = new_value;
   3168 				raidwrite_component_label(dev, vp, &clabel);
   3169 			}
   3170 		}
   3171 	}
   3172 	return(new_value);
   3173 }
   3174 
   3175 void
   3176 rf_release_all_vps(cset)
   3177 	RF_ConfigSet_t *cset;
   3178 {
   3179 	RF_AutoConfig_t *ac;
   3180 
   3181 	ac = cset->ac;
   3182 	while(ac!=NULL) {
   3183 		/* Close the vp, and give it back */
   3184 		if (ac->vp) {
   3185 			vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY);
   3186 			VOP_CLOSE(ac->vp, FREAD, NOCRED, 0);
   3187 			vput(ac->vp);
   3188 			ac->vp = NULL;
   3189 		}
   3190 		ac = ac->next;
   3191 	}
   3192 }
   3193 
   3194 
   3195 void
   3196 rf_cleanup_config_set(cset)
   3197 	RF_ConfigSet_t *cset;
   3198 {
   3199 	RF_AutoConfig_t *ac;
   3200 	RF_AutoConfig_t *next_ac;
   3201 
   3202 	ac = cset->ac;
   3203 	while(ac!=NULL) {
   3204 		next_ac = ac->next;
   3205 		/* nuke the label */
   3206 		free(ac->clabel, M_RAIDFRAME);
   3207 		/* cleanup the config structure */
   3208 		free(ac, M_RAIDFRAME);
   3209 		/* "next.." */
   3210 		ac = next_ac;
   3211 	}
   3212 	/* and, finally, nuke the config set */
   3213 	free(cset, M_RAIDFRAME);
   3214 }
   3215 
   3216 
   3217 void
   3218 raid_init_component_label(raidPtr, clabel)
   3219 	RF_Raid_t *raidPtr;
   3220 	RF_ComponentLabel_t *clabel;
   3221 {
   3222 	/* current version number */
   3223 	clabel->version = RF_COMPONENT_LABEL_VERSION;
   3224 	clabel->serial_number = raidPtr->serial_number;
   3225 	clabel->mod_counter = raidPtr->mod_counter;
   3226 	clabel->num_rows = raidPtr->numRow;
   3227 	clabel->num_columns = raidPtr->numCol;
   3228 	clabel->clean = RF_RAID_DIRTY; /* not clean */
   3229 	clabel->status = rf_ds_optimal; /* "It's good!" */
   3230 
   3231 	clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
   3232 	clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
   3233 	clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
   3234 
   3235 	clabel->blockSize = raidPtr->bytesPerSector;
   3236 	clabel->numBlocks = raidPtr->sectorsPerDisk;
   3237 
   3238 	/* XXX not portable */
   3239 	clabel->parityConfig = raidPtr->Layout.map->parityConfig;
   3240 	clabel->maxOutstanding = raidPtr->maxOutstanding;
   3241 	clabel->autoconfigure = raidPtr->autoconfigure;
   3242 	clabel->root_partition = raidPtr->root_partition;
   3243 	clabel->last_unit = raidPtr->raidid;
   3244 	clabel->config_order = raidPtr->config_order;
   3245 }
   3246 
   3247 int
   3248 rf_auto_config_set(cset,unit)
   3249 	RF_ConfigSet_t *cset;
   3250 	int *unit;
   3251 {
   3252 	RF_Raid_t *raidPtr;
   3253 	RF_Config_t *config;
   3254 	int raidID;
   3255 	int retcode;
   3256 
   3257 #if DEBUG
   3258 	printf("RAID autoconfigure\n");
   3259 #endif
   3260 
   3261 	retcode = 0;
   3262 	*unit = -1;
   3263 
   3264 	/* 1. Create a config structure */
   3265 
   3266 	config = (RF_Config_t *)malloc(sizeof(RF_Config_t),
   3267 				       M_RAIDFRAME,
   3268 				       M_NOWAIT);
   3269 	if (config==NULL) {
   3270 		printf("Out of mem!?!?\n");
   3271 				/* XXX do something more intelligent here. */
   3272 		return(1);
   3273 	}
   3274 
   3275 	memset(config, 0, sizeof(RF_Config_t));
   3276 
   3277 	/*
   3278 	   2. Figure out what RAID ID this one is supposed to live at
   3279 	   See if we can get the same RAID dev that it was configured
   3280 	   on last time..
   3281 	*/
   3282 
   3283 	raidID = cset->ac->clabel->last_unit;
   3284 	if ((raidID < 0) || (raidID >= numraid)) {
   3285 		/* let's not wander off into lala land. */
   3286 		raidID = numraid - 1;
   3287 	}
   3288 	if (raidPtrs[raidID]->valid != 0) {
   3289 
   3290 		/*
   3291 		   Nope... Go looking for an alternative...
   3292 		   Start high so we don't immediately use raid0 if that's
   3293 		   not taken.
   3294 		*/
   3295 
   3296 		for(raidID = numraid - 1; raidID >= 0; raidID--) {
   3297 			if (raidPtrs[raidID]->valid == 0) {
   3298 				/* can use this one! */
   3299 				break;
   3300 			}
   3301 		}
   3302 	}
   3303 
   3304 	if (raidID < 0) {
   3305 		/* punt... */
   3306 		printf("Unable to auto configure this set!\n");
   3307 		printf("(Out of RAID devs!)\n");
   3308 		return(1);
   3309 	}
   3310 
   3311 #if DEBUG
   3312 	printf("Configuring raid%d:\n",raidID);
   3313 #endif
   3314 
   3315 	raidPtr = raidPtrs[raidID];
   3316 
   3317 	/* XXX all this stuff should be done SOMEWHERE ELSE! */
   3318 	raidPtr->raidid = raidID;
   3319 	raidPtr->openings = RAIDOUTSTANDING;
   3320 
   3321 	/* 3. Build the configuration structure */
   3322 	rf_create_configuration(cset->ac, config, raidPtr);
   3323 
   3324 	/* 4. Do the configuration */
   3325 	retcode = rf_Configure(raidPtr, config, cset->ac);
   3326 
   3327 	if (retcode == 0) {
   3328 
   3329 		raidinit(raidPtrs[raidID]);
   3330 
   3331 		rf_markalldirty(raidPtrs[raidID]);
   3332 		raidPtrs[raidID]->autoconfigure = 1; /* XXX do this here? */
   3333 		if (cset->ac->clabel->root_partition==1) {
   3334 			/* everything configured just fine.  Make a note
   3335 			   that this set is eligible to be root. */
   3336 			cset->rootable = 1;
   3337 			/* XXX do this here? */
   3338 			raidPtrs[raidID]->root_partition = 1;
   3339 		}
   3340 	}
   3341 
   3342 	/* 5. Cleanup */
   3343 	free(config, M_RAIDFRAME);
   3344 
   3345 	*unit = raidID;
   3346 	return(retcode);
   3347 }
   3348 
   3349 void
   3350 rf_disk_unbusy(desc)
   3351 	RF_RaidAccessDesc_t *desc;
   3352 {
   3353 	struct buf *bp;
   3354 
   3355 	bp = (struct buf *)desc->bp;
   3356 	disk_unbusy(&raid_softc[desc->raidPtr->raidid].sc_dkdev,
   3357 			    (bp->b_bcount - bp->b_resid));
   3358 }
   3359