rf_netbsdkintf.c revision 1.136 1 /* $NetBSD: rf_netbsdkintf.c,v 1.136 2002/09/22 03:56:08 oster Exp $ */
2 /*-
3 * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
4 * All rights reserved.
5 *
6 * This code is derived from software contributed to The NetBSD Foundation
7 * by Greg Oster; Jason R. Thorpe.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed by the NetBSD
20 * Foundation, Inc. and its contributors.
21 * 4. Neither the name of The NetBSD Foundation nor the names of its
22 * contributors may be used to endorse or promote products derived
23 * from this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
26 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
36 */
37
38 /*
39 * Copyright (c) 1988 University of Utah.
40 * Copyright (c) 1990, 1993
41 * The Regents of the University of California. All rights reserved.
42 *
43 * This code is derived from software contributed to Berkeley by
44 * the Systems Programming Group of the University of Utah Computer
45 * Science Department.
46 *
47 * Redistribution and use in source and binary forms, with or without
48 * modification, are permitted provided that the following conditions
49 * are met:
50 * 1. Redistributions of source code must retain the above copyright
51 * notice, this list of conditions and the following disclaimer.
52 * 2. Redistributions in binary form must reproduce the above copyright
53 * notice, this list of conditions and the following disclaimer in the
54 * documentation and/or other materials provided with the distribution.
55 * 3. All advertising materials mentioning features or use of this software
56 * must display the following acknowledgement:
57 * This product includes software developed by the University of
58 * California, Berkeley and its contributors.
59 * 4. Neither the name of the University nor the names of its contributors
60 * may be used to endorse or promote products derived from this software
61 * without specific prior written permission.
62 *
63 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
64 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
65 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
66 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
67 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
68 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
69 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
70 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
71 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
72 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
73 * SUCH DAMAGE.
74 *
75 * from: Utah $Hdr: cd.c 1.6 90/11/28$
76 *
77 * @(#)cd.c 8.2 (Berkeley) 11/16/93
78 */
79
80
81
82
83 /*
84 * Copyright (c) 1995 Carnegie-Mellon University.
85 * All rights reserved.
86 *
87 * Authors: Mark Holland, Jim Zelenka
88 *
89 * Permission to use, copy, modify and distribute this software and
90 * its documentation is hereby granted, provided that both the copyright
91 * notice and this permission notice appear in all copies of the
92 * software, derivative works or modified versions, and any portions
93 * thereof, and that both notices appear in supporting documentation.
94 *
95 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
96 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
97 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
98 *
99 * Carnegie Mellon requests users of this software to return to
100 *
101 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
102 * School of Computer Science
103 * Carnegie Mellon University
104 * Pittsburgh PA 15213-3890
105 *
106 * any improvements or extensions that they make and grant Carnegie the
107 * rights to redistribute these changes.
108 */
109
110 /***********************************************************
111 *
112 * rf_kintf.c -- the kernel interface routines for RAIDframe
113 *
114 ***********************************************************/
115
116 #include <sys/cdefs.h>
117 __KERNEL_RCSID(0, "$NetBSD: rf_netbsdkintf.c,v 1.136 2002/09/22 03:56:08 oster Exp $");
118
119 #include <sys/param.h>
120 #include <sys/errno.h>
121 #include <sys/pool.h>
122 #include <sys/queue.h>
123 #include <sys/disk.h>
124 #include <sys/device.h>
125 #include <sys/stat.h>
126 #include <sys/ioctl.h>
127 #include <sys/fcntl.h>
128 #include <sys/systm.h>
129 #include <sys/namei.h>
130 #include <sys/vnode.h>
131 #include <sys/disklabel.h>
132 #include <sys/conf.h>
133 #include <sys/lock.h>
134 #include <sys/buf.h>
135 #include <sys/user.h>
136 #include <sys/reboot.h>
137
138 #include <dev/raidframe/raidframevar.h>
139 #include <dev/raidframe/raidframeio.h>
140 #include "raid.h"
141 #include "opt_raid_autoconfig.h"
142 #include "rf_raid.h"
143 #include "rf_copyback.h"
144 #include "rf_dag.h"
145 #include "rf_dagflags.h"
146 #include "rf_desc.h"
147 #include "rf_diskqueue.h"
148 #include "rf_etimer.h"
149 #include "rf_general.h"
150 #include "rf_kintf.h"
151 #include "rf_options.h"
152 #include "rf_driver.h"
153 #include "rf_parityscan.h"
154 #include "rf_threadstuff.h"
155
156 #ifdef DEBUG
157 int rf_kdebug_level = 0;
158 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
159 #else /* DEBUG */
160 #define db1_printf(a) { }
161 #endif /* DEBUG */
162
163 static RF_Raid_t **raidPtrs; /* global raid device descriptors */
164
165 RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex)
166
167 static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a
168 * spare table */
169 static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from
170 * installation process */
171
172 /* prototypes */
173 static void KernelWakeupFunc(struct buf * bp);
174 static void InitBP(struct buf * bp, struct vnode *, unsigned rw_flag,
175 dev_t dev, RF_SectorNum_t startSect,
176 RF_SectorCount_t numSect, caddr_t buf,
177 void (*cbFunc) (struct buf *), void *cbArg,
178 int logBytesPerSector, struct proc * b_proc);
179 static void raidinit(RF_Raid_t *);
180
181 void raidattach(int);
182
183 dev_type_open(raidopen);
184 dev_type_close(raidclose);
185 dev_type_read(raidread);
186 dev_type_write(raidwrite);
187 dev_type_ioctl(raidioctl);
188 dev_type_strategy(raidstrategy);
189 dev_type_dump(raiddump);
190 dev_type_size(raidsize);
191
192 const struct bdevsw raid_bdevsw = {
193 raidopen, raidclose, raidstrategy, raidioctl,
194 raiddump, raidsize, D_DISK
195 };
196
197 const struct cdevsw raid_cdevsw = {
198 raidopen, raidclose, raidread, raidwrite, raidioctl,
199 nostop, notty, nopoll, nommap, D_DISK
200 };
201
202 /*
203 * Pilfered from ccd.c
204 */
205
206 struct raidbuf {
207 struct buf rf_buf; /* new I/O buf. MUST BE FIRST!!! */
208 struct buf *rf_obp; /* ptr. to original I/O buf */
209 RF_DiskQueueData_t *req;/* the request that this was part of.. */
210 };
211
212 /* component buffer pool */
213 struct pool raidframe_cbufpool;
214
215 /* XXX Not sure if the following should be replacing the raidPtrs above,
216 or if it should be used in conjunction with that...
217 */
218
219 struct raid_softc {
220 int sc_flags; /* flags */
221 int sc_cflags; /* configuration flags */
222 size_t sc_size; /* size of the raid device */
223 char sc_xname[20]; /* XXX external name */
224 struct disk sc_dkdev; /* generic disk device info */
225 struct bufq_state buf_queue; /* used for the device queue */
226 };
227 /* sc_flags */
228 #define RAIDF_INITED 0x01 /* unit has been initialized */
229 #define RAIDF_WLABEL 0x02 /* label area is writable */
230 #define RAIDF_LABELLING 0x04 /* unit is currently being labelled */
231 #define RAIDF_WANTED 0x40 /* someone is waiting to obtain a lock */
232 #define RAIDF_LOCKED 0x80 /* unit is locked */
233
234 #define raidunit(x) DISKUNIT(x)
235 int numraid = 0;
236
237 /*
238 * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
239 * Be aware that large numbers can allow the driver to consume a lot of
240 * kernel memory, especially on writes, and in degraded mode reads.
241 *
242 * For example: with a stripe width of 64 blocks (32k) and 5 disks,
243 * a single 64K write will typically require 64K for the old data,
244 * 64K for the old parity, and 64K for the new parity, for a total
245 * of 192K (if the parity buffer is not re-used immediately).
246 * Even it if is used immediately, that's still 128K, which when multiplied
247 * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
248 *
249 * Now in degraded mode, for example, a 64K read on the above setup may
250 * require data reconstruction, which will require *all* of the 4 remaining
251 * disks to participate -- 4 * 32K/disk == 128K again.
252 */
253
254 #ifndef RAIDOUTSTANDING
255 #define RAIDOUTSTANDING 6
256 #endif
257
258 #define RAIDLABELDEV(dev) \
259 (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
260
261 /* declared here, and made public, for the benefit of KVM stuff.. */
262 struct raid_softc *raid_softc;
263
264 static void raidgetdefaultlabel(RF_Raid_t *, struct raid_softc *,
265 struct disklabel *);
266 static void raidgetdisklabel(dev_t);
267 static void raidmakedisklabel(struct raid_softc *);
268
269 static int raidlock(struct raid_softc *);
270 static void raidunlock(struct raid_softc *);
271
272 static void rf_markalldirty(RF_Raid_t *);
273
274 struct device *raidrootdev;
275
276 void rf_ReconThread(struct rf_recon_req *);
277 /* XXX what I want is: */
278 /*void rf_ReconThread(RF_Raid_t *raidPtr); */
279 void rf_RewriteParityThread(RF_Raid_t *raidPtr);
280 void rf_CopybackThread(RF_Raid_t *raidPtr);
281 void rf_ReconstructInPlaceThread(struct rf_recon_req *);
282 void rf_buildroothack(void *);
283
284 RF_AutoConfig_t *rf_find_raid_components(void);
285 RF_ConfigSet_t *rf_create_auto_sets(RF_AutoConfig_t *);
286 static int rf_does_it_fit(RF_ConfigSet_t *,RF_AutoConfig_t *);
287 static int rf_reasonable_label(RF_ComponentLabel_t *);
288 void rf_create_configuration(RF_AutoConfig_t *,RF_Config_t *, RF_Raid_t *);
289 int rf_set_autoconfig(RF_Raid_t *, int);
290 int rf_set_rootpartition(RF_Raid_t *, int);
291 void rf_release_all_vps(RF_ConfigSet_t *);
292 void rf_cleanup_config_set(RF_ConfigSet_t *);
293 int rf_have_enough_components(RF_ConfigSet_t *);
294 int rf_auto_config_set(RF_ConfigSet_t *, int *);
295
296 static int raidautoconfig = 0; /* Debugging, mostly. Set to 0 to not
297 allow autoconfig to take place.
298 Note that this is overridden by having
299 RAID_AUTOCONFIG as an option in the
300 kernel config file. */
301
302 void
303 raidattach(num)
304 int num;
305 {
306 int raidID;
307 int i, rc;
308 RF_AutoConfig_t *ac_list; /* autoconfig list */
309 RF_ConfigSet_t *config_sets;
310
311 #ifdef DEBUG
312 printf("raidattach: Asked for %d units\n", num);
313 #endif
314
315 if (num <= 0) {
316 #ifdef DIAGNOSTIC
317 panic("raidattach: count <= 0");
318 #endif
319 return;
320 }
321 /* This is where all the initialization stuff gets done. */
322
323 numraid = num;
324
325 /* Make some space for requested number of units... */
326
327 RF_Calloc(raidPtrs, num, sizeof(RF_Raid_t *), (RF_Raid_t **));
328 if (raidPtrs == NULL) {
329 panic("raidPtrs is NULL!!\n");
330 }
331
332 /* Initialize the component buffer pool. */
333 pool_init(&raidframe_cbufpool, sizeof(struct raidbuf), 0,
334 0, 0, "raidpl", NULL);
335
336 rc = rf_mutex_init(&rf_sparet_wait_mutex);
337 if (rc) {
338 RF_PANIC();
339 }
340
341 rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
342
343 for (i = 0; i < num; i++)
344 raidPtrs[i] = NULL;
345 rc = rf_BootRaidframe();
346 if (rc == 0)
347 printf("Kernelized RAIDframe activated\n");
348 else
349 panic("Serious error booting RAID!!\n");
350
351 /* put together some datastructures like the CCD device does.. This
352 * lets us lock the device and what-not when it gets opened. */
353
354 raid_softc = (struct raid_softc *)
355 malloc(num * sizeof(struct raid_softc),
356 M_RAIDFRAME, M_NOWAIT);
357 if (raid_softc == NULL) {
358 printf("WARNING: no memory for RAIDframe driver\n");
359 return;
360 }
361
362 memset(raid_softc, 0, num * sizeof(struct raid_softc));
363
364 raidrootdev = (struct device *)malloc(num * sizeof(struct device),
365 M_RAIDFRAME, M_NOWAIT);
366 if (raidrootdev == NULL) {
367 panic("No memory for RAIDframe driver!!?!?!\n");
368 }
369
370 for (raidID = 0; raidID < num; raidID++) {
371 bufq_alloc(&raid_softc[raidID].buf_queue, BUFQ_FCFS);
372
373 raidrootdev[raidID].dv_class = DV_DISK;
374 raidrootdev[raidID].dv_cfdata = NULL;
375 raidrootdev[raidID].dv_unit = raidID;
376 raidrootdev[raidID].dv_parent = NULL;
377 raidrootdev[raidID].dv_flags = 0;
378 sprintf(raidrootdev[raidID].dv_xname,"raid%d",raidID);
379
380 RF_Calloc(raidPtrs[raidID], 1, sizeof(RF_Raid_t),
381 (RF_Raid_t *));
382 if (raidPtrs[raidID] == NULL) {
383 printf("WARNING: raidPtrs[%d] is NULL\n", raidID);
384 numraid = raidID;
385 return;
386 }
387 }
388
389 #ifdef RAID_AUTOCONFIG
390 raidautoconfig = 1;
391 #endif
392
393 if (raidautoconfig) {
394 /* 1. locate all RAID components on the system */
395
396 #if DEBUG
397 printf("Searching for raid components...\n");
398 #endif
399 ac_list = rf_find_raid_components();
400
401 /* 2. sort them into their respective sets */
402
403 config_sets = rf_create_auto_sets(ac_list);
404
405 /* 3. evaluate each set and configure the valid ones
406 This gets done in rf_buildroothack() */
407
408 /* schedule the creation of the thread to do the
409 "/ on RAID" stuff */
410
411 kthread_create(rf_buildroothack,config_sets);
412
413 }
414
415 }
416
417 void
418 rf_buildroothack(arg)
419 void *arg;
420 {
421 RF_ConfigSet_t *config_sets = arg;
422 RF_ConfigSet_t *cset;
423 RF_ConfigSet_t *next_cset;
424 int retcode;
425 int raidID;
426 int rootID;
427 int num_root;
428
429 rootID = 0;
430 num_root = 0;
431 cset = config_sets;
432 while(cset != NULL ) {
433 next_cset = cset->next;
434 if (rf_have_enough_components(cset) &&
435 cset->ac->clabel->autoconfigure==1) {
436 retcode = rf_auto_config_set(cset,&raidID);
437 if (!retcode) {
438 if (cset->rootable) {
439 rootID = raidID;
440 num_root++;
441 }
442 } else {
443 /* The autoconfig didn't work :( */
444 #if DEBUG
445 printf("Autoconfig failed with code %d for raid%d\n", retcode, raidID);
446 #endif
447 rf_release_all_vps(cset);
448 }
449 } else {
450 /* we're not autoconfiguring this set...
451 release the associated resources */
452 rf_release_all_vps(cset);
453 }
454 /* cleanup */
455 rf_cleanup_config_set(cset);
456 cset = next_cset;
457 }
458
459 /* we found something bootable... */
460
461 if (num_root == 1) {
462 booted_device = &raidrootdev[rootID];
463 } else if (num_root > 1) {
464 /* we can't guess.. require the user to answer... */
465 boothowto |= RB_ASKNAME;
466 }
467 }
468
469
470 int
471 raidsize(dev)
472 dev_t dev;
473 {
474 struct raid_softc *rs;
475 struct disklabel *lp;
476 int part, unit, omask, size;
477
478 unit = raidunit(dev);
479 if (unit >= numraid)
480 return (-1);
481 rs = &raid_softc[unit];
482
483 if ((rs->sc_flags & RAIDF_INITED) == 0)
484 return (-1);
485
486 part = DISKPART(dev);
487 omask = rs->sc_dkdev.dk_openmask & (1 << part);
488 lp = rs->sc_dkdev.dk_label;
489
490 if (omask == 0 && raidopen(dev, 0, S_IFBLK, curproc))
491 return (-1);
492
493 if (lp->d_partitions[part].p_fstype != FS_SWAP)
494 size = -1;
495 else
496 size = lp->d_partitions[part].p_size *
497 (lp->d_secsize / DEV_BSIZE);
498
499 if (omask == 0 && raidclose(dev, 0, S_IFBLK, curproc))
500 return (-1);
501
502 return (size);
503
504 }
505
506 int
507 raiddump(dev, blkno, va, size)
508 dev_t dev;
509 daddr_t blkno;
510 caddr_t va;
511 size_t size;
512 {
513 /* Not implemented. */
514 return ENXIO;
515 }
516 /* ARGSUSED */
517 int
518 raidopen(dev, flags, fmt, p)
519 dev_t dev;
520 int flags, fmt;
521 struct proc *p;
522 {
523 int unit = raidunit(dev);
524 struct raid_softc *rs;
525 struct disklabel *lp;
526 int part, pmask;
527 int error = 0;
528
529 if (unit >= numraid)
530 return (ENXIO);
531 rs = &raid_softc[unit];
532
533 if ((error = raidlock(rs)) != 0)
534 return (error);
535 lp = rs->sc_dkdev.dk_label;
536
537 part = DISKPART(dev);
538 pmask = (1 << part);
539
540 db1_printf(("Opening raid device number: %d partition: %d\n",
541 unit, part));
542
543
544 if ((rs->sc_flags & RAIDF_INITED) &&
545 (rs->sc_dkdev.dk_openmask == 0))
546 raidgetdisklabel(dev);
547
548 /* make sure that this partition exists */
549
550 if (part != RAW_PART) {
551 db1_printf(("Not a raw partition..\n"));
552 if (((rs->sc_flags & RAIDF_INITED) == 0) ||
553 ((part >= lp->d_npartitions) ||
554 (lp->d_partitions[part].p_fstype == FS_UNUSED))) {
555 error = ENXIO;
556 raidunlock(rs);
557 db1_printf(("Bailing out...\n"));
558 return (error);
559 }
560 }
561 /* Prevent this unit from being unconfigured while open. */
562 switch (fmt) {
563 case S_IFCHR:
564 rs->sc_dkdev.dk_copenmask |= pmask;
565 break;
566
567 case S_IFBLK:
568 rs->sc_dkdev.dk_bopenmask |= pmask;
569 break;
570 }
571
572 if ((rs->sc_dkdev.dk_openmask == 0) &&
573 ((rs->sc_flags & RAIDF_INITED) != 0)) {
574 /* First one... mark things as dirty... Note that we *MUST*
575 have done a configure before this. I DO NOT WANT TO BE
576 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
577 THAT THEY BELONG TOGETHER!!!!! */
578 /* XXX should check to see if we're only open for reading
579 here... If so, we needn't do this, but then need some
580 other way of keeping track of what's happened.. */
581
582 rf_markalldirty( raidPtrs[unit] );
583 }
584
585
586 rs->sc_dkdev.dk_openmask =
587 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
588
589 raidunlock(rs);
590
591 return (error);
592
593
594 }
595 /* ARGSUSED */
596 int
597 raidclose(dev, flags, fmt, p)
598 dev_t dev;
599 int flags, fmt;
600 struct proc *p;
601 {
602 int unit = raidunit(dev);
603 struct raid_softc *rs;
604 int error = 0;
605 int part;
606
607 if (unit >= numraid)
608 return (ENXIO);
609 rs = &raid_softc[unit];
610
611 if ((error = raidlock(rs)) != 0)
612 return (error);
613
614 part = DISKPART(dev);
615
616 /* ...that much closer to allowing unconfiguration... */
617 switch (fmt) {
618 case S_IFCHR:
619 rs->sc_dkdev.dk_copenmask &= ~(1 << part);
620 break;
621
622 case S_IFBLK:
623 rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
624 break;
625 }
626 rs->sc_dkdev.dk_openmask =
627 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
628
629 if ((rs->sc_dkdev.dk_openmask == 0) &&
630 ((rs->sc_flags & RAIDF_INITED) != 0)) {
631 /* Last one... device is not unconfigured yet.
632 Device shutdown has taken care of setting the
633 clean bits if RAIDF_INITED is not set
634 mark things as clean... */
635 #if 0
636 printf("Last one on raid%d. Updating status.\n",unit);
637 #endif
638 rf_update_component_labels(raidPtrs[unit],
639 RF_FINAL_COMPONENT_UPDATE);
640 if (doing_shutdown) {
641 /* last one, and we're going down, so
642 lights out for this RAID set too. */
643 error = rf_Shutdown(raidPtrs[unit]);
644
645 /* It's no longer initialized... */
646 rs->sc_flags &= ~RAIDF_INITED;
647
648 /* Detach the disk. */
649 disk_detach(&rs->sc_dkdev);
650 }
651 }
652
653 raidunlock(rs);
654 return (0);
655
656 }
657
658 void
659 raidstrategy(bp)
660 struct buf *bp;
661 {
662 int s;
663
664 unsigned int raidID = raidunit(bp->b_dev);
665 RF_Raid_t *raidPtr;
666 struct raid_softc *rs = &raid_softc[raidID];
667 struct disklabel *lp;
668 int wlabel;
669
670 if ((rs->sc_flags & RAIDF_INITED) ==0) {
671 bp->b_error = ENXIO;
672 bp->b_flags |= B_ERROR;
673 bp->b_resid = bp->b_bcount;
674 biodone(bp);
675 return;
676 }
677 if (raidID >= numraid || !raidPtrs[raidID]) {
678 bp->b_error = ENODEV;
679 bp->b_flags |= B_ERROR;
680 bp->b_resid = bp->b_bcount;
681 biodone(bp);
682 return;
683 }
684 raidPtr = raidPtrs[raidID];
685 if (!raidPtr->valid) {
686 bp->b_error = ENODEV;
687 bp->b_flags |= B_ERROR;
688 bp->b_resid = bp->b_bcount;
689 biodone(bp);
690 return;
691 }
692 if (bp->b_bcount == 0) {
693 db1_printf(("b_bcount is zero..\n"));
694 biodone(bp);
695 return;
696 }
697 lp = rs->sc_dkdev.dk_label;
698
699 /*
700 * Do bounds checking and adjust transfer. If there's an
701 * error, the bounds check will flag that for us.
702 */
703
704 wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
705 if (DISKPART(bp->b_dev) != RAW_PART)
706 if (bounds_check_with_label(bp, lp, wlabel) <= 0) {
707 db1_printf(("Bounds check failed!!:%d %d\n",
708 (int) bp->b_blkno, (int) wlabel));
709 biodone(bp);
710 return;
711 }
712 s = splbio();
713
714 bp->b_resid = 0;
715
716 /* stuff it onto our queue */
717 BUFQ_PUT(&rs->buf_queue, bp);
718
719 raidstart(raidPtrs[raidID]);
720
721 splx(s);
722 }
723 /* ARGSUSED */
724 int
725 raidread(dev, uio, flags)
726 dev_t dev;
727 struct uio *uio;
728 int flags;
729 {
730 int unit = raidunit(dev);
731 struct raid_softc *rs;
732 int part;
733
734 if (unit >= numraid)
735 return (ENXIO);
736 rs = &raid_softc[unit];
737
738 if ((rs->sc_flags & RAIDF_INITED) == 0)
739 return (ENXIO);
740 part = DISKPART(dev);
741
742 db1_printf(("raidread: unit: %d partition: %d\n", unit, part));
743
744 return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
745
746 }
747 /* ARGSUSED */
748 int
749 raidwrite(dev, uio, flags)
750 dev_t dev;
751 struct uio *uio;
752 int flags;
753 {
754 int unit = raidunit(dev);
755 struct raid_softc *rs;
756
757 if (unit >= numraid)
758 return (ENXIO);
759 rs = &raid_softc[unit];
760
761 if ((rs->sc_flags & RAIDF_INITED) == 0)
762 return (ENXIO);
763 db1_printf(("raidwrite\n"));
764 return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
765
766 }
767
768 int
769 raidioctl(dev, cmd, data, flag, p)
770 dev_t dev;
771 u_long cmd;
772 caddr_t data;
773 int flag;
774 struct proc *p;
775 {
776 int unit = raidunit(dev);
777 int error = 0;
778 int part, pmask;
779 struct raid_softc *rs;
780 RF_Config_t *k_cfg, *u_cfg;
781 RF_Raid_t *raidPtr;
782 RF_RaidDisk_t *diskPtr;
783 RF_AccTotals_t *totals;
784 RF_DeviceConfig_t *d_cfg, **ucfgp;
785 u_char *specific_buf;
786 int retcode = 0;
787 int row;
788 int column;
789 int raidid;
790 struct rf_recon_req *rrcopy, *rr;
791 RF_ComponentLabel_t *clabel;
792 RF_ComponentLabel_t ci_label;
793 RF_ComponentLabel_t **clabel_ptr;
794 RF_SingleComponent_t *sparePtr,*componentPtr;
795 RF_SingleComponent_t hot_spare;
796 RF_SingleComponent_t component;
797 RF_ProgressInfo_t progressInfo, **progressInfoPtr;
798 int i, j, d;
799 #ifdef __HAVE_OLD_DISKLABEL
800 struct disklabel newlabel;
801 #endif
802
803 if (unit >= numraid)
804 return (ENXIO);
805 rs = &raid_softc[unit];
806 raidPtr = raidPtrs[unit];
807
808 db1_printf(("raidioctl: %d %d %d %d\n", (int) dev,
809 (int) DISKPART(dev), (int) unit, (int) cmd));
810
811 /* Must be open for writes for these commands... */
812 switch (cmd) {
813 case DIOCSDINFO:
814 case DIOCWDINFO:
815 #ifdef __HAVE_OLD_DISKLABEL
816 case ODIOCWDINFO:
817 case ODIOCSDINFO:
818 #endif
819 case DIOCWLABEL:
820 if ((flag & FWRITE) == 0)
821 return (EBADF);
822 }
823
824 /* Must be initialized for these... */
825 switch (cmd) {
826 case DIOCGDINFO:
827 case DIOCSDINFO:
828 case DIOCWDINFO:
829 #ifdef __HAVE_OLD_DISKLABEL
830 case ODIOCGDINFO:
831 case ODIOCWDINFO:
832 case ODIOCSDINFO:
833 case ODIOCGDEFLABEL:
834 #endif
835 case DIOCGPART:
836 case DIOCWLABEL:
837 case DIOCGDEFLABEL:
838 case RAIDFRAME_SHUTDOWN:
839 case RAIDFRAME_REWRITEPARITY:
840 case RAIDFRAME_GET_INFO:
841 case RAIDFRAME_RESET_ACCTOTALS:
842 case RAIDFRAME_GET_ACCTOTALS:
843 case RAIDFRAME_KEEP_ACCTOTALS:
844 case RAIDFRAME_GET_SIZE:
845 case RAIDFRAME_FAIL_DISK:
846 case RAIDFRAME_COPYBACK:
847 case RAIDFRAME_CHECK_RECON_STATUS:
848 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
849 case RAIDFRAME_GET_COMPONENT_LABEL:
850 case RAIDFRAME_SET_COMPONENT_LABEL:
851 case RAIDFRAME_ADD_HOT_SPARE:
852 case RAIDFRAME_REMOVE_HOT_SPARE:
853 case RAIDFRAME_INIT_LABELS:
854 case RAIDFRAME_REBUILD_IN_PLACE:
855 case RAIDFRAME_CHECK_PARITY:
856 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
857 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
858 case RAIDFRAME_CHECK_COPYBACK_STATUS:
859 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
860 case RAIDFRAME_SET_AUTOCONFIG:
861 case RAIDFRAME_SET_ROOT:
862 case RAIDFRAME_DELETE_COMPONENT:
863 case RAIDFRAME_INCORPORATE_HOT_SPARE:
864 if ((rs->sc_flags & RAIDF_INITED) == 0)
865 return (ENXIO);
866 }
867
868 switch (cmd) {
869
870 /* configure the system */
871 case RAIDFRAME_CONFIGURE:
872
873 if (raidPtr->valid) {
874 /* There is a valid RAID set running on this unit! */
875 printf("raid%d: Device already configured!\n",unit);
876 return(EINVAL);
877 }
878
879 /* copy-in the configuration information */
880 /* data points to a pointer to the configuration structure */
881
882 u_cfg = *((RF_Config_t **) data);
883 RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
884 if (k_cfg == NULL) {
885 return (ENOMEM);
886 }
887 retcode = copyin((caddr_t) u_cfg, (caddr_t) k_cfg,
888 sizeof(RF_Config_t));
889 if (retcode) {
890 RF_Free(k_cfg, sizeof(RF_Config_t));
891 db1_printf(("rf_ioctl: retcode=%d copyin.1\n",
892 retcode));
893 return (retcode);
894 }
895 /* allocate a buffer for the layout-specific data, and copy it
896 * in */
897 if (k_cfg->layoutSpecificSize) {
898 if (k_cfg->layoutSpecificSize > 10000) {
899 /* sanity check */
900 RF_Free(k_cfg, sizeof(RF_Config_t));
901 return (EINVAL);
902 }
903 RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
904 (u_char *));
905 if (specific_buf == NULL) {
906 RF_Free(k_cfg, sizeof(RF_Config_t));
907 return (ENOMEM);
908 }
909 retcode = copyin(k_cfg->layoutSpecific,
910 (caddr_t) specific_buf,
911 k_cfg->layoutSpecificSize);
912 if (retcode) {
913 RF_Free(k_cfg, sizeof(RF_Config_t));
914 RF_Free(specific_buf,
915 k_cfg->layoutSpecificSize);
916 db1_printf(("rf_ioctl: retcode=%d copyin.2\n",
917 retcode));
918 return (retcode);
919 }
920 } else
921 specific_buf = NULL;
922 k_cfg->layoutSpecific = specific_buf;
923
924 /* should do some kind of sanity check on the configuration.
925 * Store the sum of all the bytes in the last byte? */
926
927 /* configure the system */
928
929 /*
930 * Clear the entire RAID descriptor, just to make sure
931 * there is no stale data left in the case of a
932 * reconfiguration
933 */
934 memset((char *) raidPtr, 0, sizeof(RF_Raid_t));
935 raidPtr->raidid = unit;
936
937 retcode = rf_Configure(raidPtr, k_cfg, NULL);
938
939 if (retcode == 0) {
940
941 /* allow this many simultaneous IO's to
942 this RAID device */
943 raidPtr->openings = RAIDOUTSTANDING;
944
945 raidinit(raidPtr);
946 rf_markalldirty(raidPtr);
947 }
948 /* free the buffers. No return code here. */
949 if (k_cfg->layoutSpecificSize) {
950 RF_Free(specific_buf, k_cfg->layoutSpecificSize);
951 }
952 RF_Free(k_cfg, sizeof(RF_Config_t));
953
954 return (retcode);
955
956 /* shutdown the system */
957 case RAIDFRAME_SHUTDOWN:
958
959 if ((error = raidlock(rs)) != 0)
960 return (error);
961
962 /*
963 * If somebody has a partition mounted, we shouldn't
964 * shutdown.
965 */
966
967 part = DISKPART(dev);
968 pmask = (1 << part);
969 if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
970 ((rs->sc_dkdev.dk_bopenmask & pmask) &&
971 (rs->sc_dkdev.dk_copenmask & pmask))) {
972 raidunlock(rs);
973 return (EBUSY);
974 }
975
976 retcode = rf_Shutdown(raidPtr);
977
978 /* It's no longer initialized... */
979 rs->sc_flags &= ~RAIDF_INITED;
980
981 /* Detach the disk. */
982 disk_detach(&rs->sc_dkdev);
983
984 raidunlock(rs);
985
986 return (retcode);
987 case RAIDFRAME_GET_COMPONENT_LABEL:
988 clabel_ptr = (RF_ComponentLabel_t **) data;
989 /* need to read the component label for the disk indicated
990 by row,column in clabel */
991
992 /* For practice, let's get it directly fromdisk, rather
993 than from the in-core copy */
994 RF_Malloc( clabel, sizeof( RF_ComponentLabel_t ),
995 (RF_ComponentLabel_t *));
996 if (clabel == NULL)
997 return (ENOMEM);
998
999 memset((char *) clabel, 0, sizeof(RF_ComponentLabel_t));
1000
1001 retcode = copyin( *clabel_ptr, clabel,
1002 sizeof(RF_ComponentLabel_t));
1003
1004 if (retcode) {
1005 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
1006 return(retcode);
1007 }
1008
1009 row = clabel->row;
1010 column = clabel->column;
1011
1012 if ((row < 0) || (row >= raidPtr->numRow) ||
1013 (column < 0) || (column >= raidPtr->numCol +
1014 raidPtr->numSpare)) {
1015 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
1016 return(EINVAL);
1017 }
1018
1019 raidread_component_label(raidPtr->Disks[row][column].dev,
1020 raidPtr->raid_cinfo[row][column].ci_vp,
1021 clabel );
1022
1023 retcode = copyout((caddr_t) clabel,
1024 (caddr_t) *clabel_ptr,
1025 sizeof(RF_ComponentLabel_t));
1026 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
1027 return (retcode);
1028
1029 case RAIDFRAME_SET_COMPONENT_LABEL:
1030 clabel = (RF_ComponentLabel_t *) data;
1031
1032 /* XXX check the label for valid stuff... */
1033 /* Note that some things *should not* get modified --
1034 the user should be re-initing the labels instead of
1035 trying to patch things.
1036 */
1037
1038 raidid = raidPtr->raidid;
1039 printf("raid%d: Got component label:\n", raidid);
1040 printf("raid%d: Version: %d\n", raidid, clabel->version);
1041 printf("raid%d: Serial Number: %d\n", raidid, clabel->serial_number);
1042 printf("raid%d: Mod counter: %d\n", raidid, clabel->mod_counter);
1043 printf("raid%d: Row: %d\n", raidid, clabel->row);
1044 printf("raid%d: Column: %d\n", raidid, clabel->column);
1045 printf("raid%d: Num Rows: %d\n", raidid, clabel->num_rows);
1046 printf("raid%d: Num Columns: %d\n", raidid, clabel->num_columns);
1047 printf("raid%d: Clean: %d\n", raidid, clabel->clean);
1048 printf("raid%d: Status: %d\n", raidid, clabel->status);
1049
1050 row = clabel->row;
1051 column = clabel->column;
1052
1053 if ((row < 0) || (row >= raidPtr->numRow) ||
1054 (column < 0) || (column >= raidPtr->numCol)) {
1055 return(EINVAL);
1056 }
1057
1058 /* XXX this isn't allowed to do anything for now :-) */
1059
1060 /* XXX and before it is, we need to fill in the rest
1061 of the fields!?!?!?! */
1062 #if 0
1063 raidwrite_component_label(
1064 raidPtr->Disks[row][column].dev,
1065 raidPtr->raid_cinfo[row][column].ci_vp,
1066 clabel );
1067 #endif
1068 return (0);
1069
1070 case RAIDFRAME_INIT_LABELS:
1071 clabel = (RF_ComponentLabel_t *) data;
1072 /*
1073 we only want the serial number from
1074 the above. We get all the rest of the information
1075 from the config that was used to create this RAID
1076 set.
1077 */
1078
1079 raidPtr->serial_number = clabel->serial_number;
1080
1081 raid_init_component_label(raidPtr, &ci_label);
1082 ci_label.serial_number = clabel->serial_number;
1083
1084 for(row=0;row<raidPtr->numRow;row++) {
1085 ci_label.row = row;
1086 for(column=0;column<raidPtr->numCol;column++) {
1087 diskPtr = &raidPtr->Disks[row][column];
1088 if (!RF_DEAD_DISK(diskPtr->status)) {
1089 ci_label.partitionSize = diskPtr->partitionSize;
1090 ci_label.column = column;
1091 raidwrite_component_label(
1092 raidPtr->Disks[row][column].dev,
1093 raidPtr->raid_cinfo[row][column].ci_vp,
1094 &ci_label );
1095 }
1096 }
1097 }
1098
1099 return (retcode);
1100 case RAIDFRAME_SET_AUTOCONFIG:
1101 d = rf_set_autoconfig(raidPtr, *(int *) data);
1102 printf("raid%d: New autoconfig value is: %d\n",
1103 raidPtr->raidid, d);
1104 *(int *) data = d;
1105 return (retcode);
1106
1107 case RAIDFRAME_SET_ROOT:
1108 d = rf_set_rootpartition(raidPtr, *(int *) data);
1109 printf("raid%d: New rootpartition value is: %d\n",
1110 raidPtr->raidid, d);
1111 *(int *) data = d;
1112 return (retcode);
1113
1114 /* initialize all parity */
1115 case RAIDFRAME_REWRITEPARITY:
1116
1117 if (raidPtr->Layout.map->faultsTolerated == 0) {
1118 /* Parity for RAID 0 is trivially correct */
1119 raidPtr->parity_good = RF_RAID_CLEAN;
1120 return(0);
1121 }
1122
1123 if (raidPtr->parity_rewrite_in_progress == 1) {
1124 /* Re-write is already in progress! */
1125 return(EINVAL);
1126 }
1127
1128 retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
1129 rf_RewriteParityThread,
1130 raidPtr,"raid_parity");
1131 return (retcode);
1132
1133
1134 case RAIDFRAME_ADD_HOT_SPARE:
1135 sparePtr = (RF_SingleComponent_t *) data;
1136 memcpy( &hot_spare, sparePtr, sizeof(RF_SingleComponent_t));
1137 retcode = rf_add_hot_spare(raidPtr, &hot_spare);
1138 return(retcode);
1139
1140 case RAIDFRAME_REMOVE_HOT_SPARE:
1141 return(retcode);
1142
1143 case RAIDFRAME_DELETE_COMPONENT:
1144 componentPtr = (RF_SingleComponent_t *)data;
1145 memcpy( &component, componentPtr,
1146 sizeof(RF_SingleComponent_t));
1147 retcode = rf_delete_component(raidPtr, &component);
1148 return(retcode);
1149
1150 case RAIDFRAME_INCORPORATE_HOT_SPARE:
1151 componentPtr = (RF_SingleComponent_t *)data;
1152 memcpy( &component, componentPtr,
1153 sizeof(RF_SingleComponent_t));
1154 retcode = rf_incorporate_hot_spare(raidPtr, &component);
1155 return(retcode);
1156
1157 case RAIDFRAME_REBUILD_IN_PLACE:
1158
1159 if (raidPtr->Layout.map->faultsTolerated == 0) {
1160 /* Can't do this on a RAID 0!! */
1161 return(EINVAL);
1162 }
1163
1164 if (raidPtr->recon_in_progress == 1) {
1165 /* a reconstruct is already in progress! */
1166 return(EINVAL);
1167 }
1168
1169 componentPtr = (RF_SingleComponent_t *) data;
1170 memcpy( &component, componentPtr,
1171 sizeof(RF_SingleComponent_t));
1172 row = component.row;
1173 column = component.column;
1174 printf("raid%d: Rebuild: %d %d\n", raidPtr->raidid,
1175 row, column);
1176 if ((row < 0) || (row >= raidPtr->numRow) ||
1177 (column < 0) || (column >= raidPtr->numCol)) {
1178 return(EINVAL);
1179 }
1180
1181 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1182 if (rrcopy == NULL)
1183 return(ENOMEM);
1184
1185 rrcopy->raidPtr = (void *) raidPtr;
1186 rrcopy->row = row;
1187 rrcopy->col = column;
1188
1189 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1190 rf_ReconstructInPlaceThread,
1191 rrcopy,"raid_reconip");
1192 return(retcode);
1193
1194 case RAIDFRAME_GET_INFO:
1195 if (!raidPtr->valid)
1196 return (ENODEV);
1197 ucfgp = (RF_DeviceConfig_t **) data;
1198 RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t),
1199 (RF_DeviceConfig_t *));
1200 if (d_cfg == NULL)
1201 return (ENOMEM);
1202 memset((char *) d_cfg, 0, sizeof(RF_DeviceConfig_t));
1203 d_cfg->rows = raidPtr->numRow;
1204 d_cfg->cols = raidPtr->numCol;
1205 d_cfg->ndevs = raidPtr->numRow * raidPtr->numCol;
1206 if (d_cfg->ndevs >= RF_MAX_DISKS) {
1207 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1208 return (ENOMEM);
1209 }
1210 d_cfg->nspares = raidPtr->numSpare;
1211 if (d_cfg->nspares >= RF_MAX_DISKS) {
1212 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1213 return (ENOMEM);
1214 }
1215 d_cfg->maxqdepth = raidPtr->maxQueueDepth;
1216 d = 0;
1217 for (i = 0; i < d_cfg->rows; i++) {
1218 for (j = 0; j < d_cfg->cols; j++) {
1219 d_cfg->devs[d] = raidPtr->Disks[i][j];
1220 d++;
1221 }
1222 }
1223 for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) {
1224 d_cfg->spares[i] = raidPtr->Disks[0][j];
1225 }
1226 retcode = copyout((caddr_t) d_cfg, (caddr_t) * ucfgp,
1227 sizeof(RF_DeviceConfig_t));
1228 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1229
1230 return (retcode);
1231
1232 case RAIDFRAME_CHECK_PARITY:
1233 *(int *) data = raidPtr->parity_good;
1234 return (0);
1235
1236 case RAIDFRAME_RESET_ACCTOTALS:
1237 memset(&raidPtr->acc_totals, 0, sizeof(raidPtr->acc_totals));
1238 return (0);
1239
1240 case RAIDFRAME_GET_ACCTOTALS:
1241 totals = (RF_AccTotals_t *) data;
1242 *totals = raidPtr->acc_totals;
1243 return (0);
1244
1245 case RAIDFRAME_KEEP_ACCTOTALS:
1246 raidPtr->keep_acc_totals = *(int *)data;
1247 return (0);
1248
1249 case RAIDFRAME_GET_SIZE:
1250 *(int *) data = raidPtr->totalSectors;
1251 return (0);
1252
1253 /* fail a disk & optionally start reconstruction */
1254 case RAIDFRAME_FAIL_DISK:
1255
1256 if (raidPtr->Layout.map->faultsTolerated == 0) {
1257 /* Can't do this on a RAID 0!! */
1258 return(EINVAL);
1259 }
1260
1261 rr = (struct rf_recon_req *) data;
1262
1263 if (rr->row < 0 || rr->row >= raidPtr->numRow
1264 || rr->col < 0 || rr->col >= raidPtr->numCol)
1265 return (EINVAL);
1266
1267 printf("raid%d: Failing the disk: row: %d col: %d\n",
1268 unit, rr->row, rr->col);
1269
1270 /* make a copy of the recon request so that we don't rely on
1271 * the user's buffer */
1272 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1273 if (rrcopy == NULL)
1274 return(ENOMEM);
1275 memcpy(rrcopy, rr, sizeof(*rr));
1276 rrcopy->raidPtr = (void *) raidPtr;
1277
1278 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1279 rf_ReconThread,
1280 rrcopy,"raid_recon");
1281 return (0);
1282
1283 /* invoke a copyback operation after recon on whatever disk
1284 * needs it, if any */
1285 case RAIDFRAME_COPYBACK:
1286
1287 if (raidPtr->Layout.map->faultsTolerated == 0) {
1288 /* This makes no sense on a RAID 0!! */
1289 return(EINVAL);
1290 }
1291
1292 if (raidPtr->copyback_in_progress == 1) {
1293 /* Copyback is already in progress! */
1294 return(EINVAL);
1295 }
1296
1297 retcode = RF_CREATE_THREAD(raidPtr->copyback_thread,
1298 rf_CopybackThread,
1299 raidPtr,"raid_copyback");
1300 return (retcode);
1301
1302 /* return the percentage completion of reconstruction */
1303 case RAIDFRAME_CHECK_RECON_STATUS:
1304 if (raidPtr->Layout.map->faultsTolerated == 0) {
1305 /* This makes no sense on a RAID 0, so tell the
1306 user it's done. */
1307 *(int *) data = 100;
1308 return(0);
1309 }
1310 row = 0; /* XXX we only consider a single row... */
1311 if (raidPtr->status[row] != rf_rs_reconstructing)
1312 *(int *) data = 100;
1313 else
1314 *(int *) data = raidPtr->reconControl[row]->percentComplete;
1315 return (0);
1316 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
1317 progressInfoPtr = (RF_ProgressInfo_t **) data;
1318 row = 0; /* XXX we only consider a single row... */
1319 if (raidPtr->status[row] != rf_rs_reconstructing) {
1320 progressInfo.remaining = 0;
1321 progressInfo.completed = 100;
1322 progressInfo.total = 100;
1323 } else {
1324 progressInfo.total =
1325 raidPtr->reconControl[row]->numRUsTotal;
1326 progressInfo.completed =
1327 raidPtr->reconControl[row]->numRUsComplete;
1328 progressInfo.remaining = progressInfo.total -
1329 progressInfo.completed;
1330 }
1331 retcode = copyout((caddr_t) &progressInfo,
1332 (caddr_t) *progressInfoPtr,
1333 sizeof(RF_ProgressInfo_t));
1334 return (retcode);
1335
1336 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1337 if (raidPtr->Layout.map->faultsTolerated == 0) {
1338 /* This makes no sense on a RAID 0, so tell the
1339 user it's done. */
1340 *(int *) data = 100;
1341 return(0);
1342 }
1343 if (raidPtr->parity_rewrite_in_progress == 1) {
1344 *(int *) data = 100 *
1345 raidPtr->parity_rewrite_stripes_done /
1346 raidPtr->Layout.numStripe;
1347 } else {
1348 *(int *) data = 100;
1349 }
1350 return (0);
1351
1352 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
1353 progressInfoPtr = (RF_ProgressInfo_t **) data;
1354 if (raidPtr->parity_rewrite_in_progress == 1) {
1355 progressInfo.total = raidPtr->Layout.numStripe;
1356 progressInfo.completed =
1357 raidPtr->parity_rewrite_stripes_done;
1358 progressInfo.remaining = progressInfo.total -
1359 progressInfo.completed;
1360 } else {
1361 progressInfo.remaining = 0;
1362 progressInfo.completed = 100;
1363 progressInfo.total = 100;
1364 }
1365 retcode = copyout((caddr_t) &progressInfo,
1366 (caddr_t) *progressInfoPtr,
1367 sizeof(RF_ProgressInfo_t));
1368 return (retcode);
1369
1370 case RAIDFRAME_CHECK_COPYBACK_STATUS:
1371 if (raidPtr->Layout.map->faultsTolerated == 0) {
1372 /* This makes no sense on a RAID 0 */
1373 *(int *) data = 100;
1374 return(0);
1375 }
1376 if (raidPtr->copyback_in_progress == 1) {
1377 *(int *) data = 100 * raidPtr->copyback_stripes_done /
1378 raidPtr->Layout.numStripe;
1379 } else {
1380 *(int *) data = 100;
1381 }
1382 return (0);
1383
1384 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
1385 progressInfoPtr = (RF_ProgressInfo_t **) data;
1386 if (raidPtr->copyback_in_progress == 1) {
1387 progressInfo.total = raidPtr->Layout.numStripe;
1388 progressInfo.completed =
1389 raidPtr->copyback_stripes_done;
1390 progressInfo.remaining = progressInfo.total -
1391 progressInfo.completed;
1392 } else {
1393 progressInfo.remaining = 0;
1394 progressInfo.completed = 100;
1395 progressInfo.total = 100;
1396 }
1397 retcode = copyout((caddr_t) &progressInfo,
1398 (caddr_t) *progressInfoPtr,
1399 sizeof(RF_ProgressInfo_t));
1400 return (retcode);
1401
1402 /* the sparetable daemon calls this to wait for the kernel to
1403 * need a spare table. this ioctl does not return until a
1404 * spare table is needed. XXX -- calling mpsleep here in the
1405 * ioctl code is almost certainly wrong and evil. -- XXX XXX
1406 * -- I should either compute the spare table in the kernel,
1407 * or have a different -- XXX XXX -- interface (a different
1408 * character device) for delivering the table -- XXX */
1409 #if 0
1410 case RAIDFRAME_SPARET_WAIT:
1411 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1412 while (!rf_sparet_wait_queue)
1413 mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE);
1414 waitreq = rf_sparet_wait_queue;
1415 rf_sparet_wait_queue = rf_sparet_wait_queue->next;
1416 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1417
1418 /* structure assignment */
1419 *((RF_SparetWait_t *) data) = *waitreq;
1420
1421 RF_Free(waitreq, sizeof(*waitreq));
1422 return (0);
1423
1424 /* wakes up a process waiting on SPARET_WAIT and puts an error
1425 * code in it that will cause the dameon to exit */
1426 case RAIDFRAME_ABORT_SPARET_WAIT:
1427 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1428 waitreq->fcol = -1;
1429 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1430 waitreq->next = rf_sparet_wait_queue;
1431 rf_sparet_wait_queue = waitreq;
1432 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1433 wakeup(&rf_sparet_wait_queue);
1434 return (0);
1435
1436 /* used by the spare table daemon to deliver a spare table
1437 * into the kernel */
1438 case RAIDFRAME_SEND_SPARET:
1439
1440 /* install the spare table */
1441 retcode = rf_SetSpareTable(raidPtr, *(void **) data);
1442
1443 /* respond to the requestor. the return status of the spare
1444 * table installation is passed in the "fcol" field */
1445 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1446 waitreq->fcol = retcode;
1447 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1448 waitreq->next = rf_sparet_resp_queue;
1449 rf_sparet_resp_queue = waitreq;
1450 wakeup(&rf_sparet_resp_queue);
1451 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1452
1453 return (retcode);
1454 #endif
1455
1456 default:
1457 break; /* fall through to the os-specific code below */
1458
1459 }
1460
1461 if (!raidPtr->valid)
1462 return (EINVAL);
1463
1464 /*
1465 * Add support for "regular" device ioctls here.
1466 */
1467
1468 switch (cmd) {
1469 case DIOCGDINFO:
1470 *(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
1471 break;
1472 #ifdef __HAVE_OLD_DISKLABEL
1473 case ODIOCGDINFO:
1474 newlabel = *(rs->sc_dkdev.dk_label);
1475 if (newlabel.d_npartitions > OLDMAXPARTITIONS)
1476 return ENOTTY;
1477 memcpy(data, &newlabel, sizeof (struct olddisklabel));
1478 break;
1479 #endif
1480
1481 case DIOCGPART:
1482 ((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
1483 ((struct partinfo *) data)->part =
1484 &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
1485 break;
1486
1487 case DIOCWDINFO:
1488 case DIOCSDINFO:
1489 #ifdef __HAVE_OLD_DISKLABEL
1490 case ODIOCWDINFO:
1491 case ODIOCSDINFO:
1492 #endif
1493 {
1494 struct disklabel *lp;
1495 #ifdef __HAVE_OLD_DISKLABEL
1496 if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) {
1497 memset(&newlabel, 0, sizeof newlabel);
1498 memcpy(&newlabel, data, sizeof (struct olddisklabel));
1499 lp = &newlabel;
1500 } else
1501 #endif
1502 lp = (struct disklabel *)data;
1503
1504 if ((error = raidlock(rs)) != 0)
1505 return (error);
1506
1507 rs->sc_flags |= RAIDF_LABELLING;
1508
1509 error = setdisklabel(rs->sc_dkdev.dk_label,
1510 lp, 0, rs->sc_dkdev.dk_cpulabel);
1511 if (error == 0) {
1512 if (cmd == DIOCWDINFO
1513 #ifdef __HAVE_OLD_DISKLABEL
1514 || cmd == ODIOCWDINFO
1515 #endif
1516 )
1517 error = writedisklabel(RAIDLABELDEV(dev),
1518 raidstrategy, rs->sc_dkdev.dk_label,
1519 rs->sc_dkdev.dk_cpulabel);
1520 }
1521 rs->sc_flags &= ~RAIDF_LABELLING;
1522
1523 raidunlock(rs);
1524
1525 if (error)
1526 return (error);
1527 break;
1528 }
1529
1530 case DIOCWLABEL:
1531 if (*(int *) data != 0)
1532 rs->sc_flags |= RAIDF_WLABEL;
1533 else
1534 rs->sc_flags &= ~RAIDF_WLABEL;
1535 break;
1536
1537 case DIOCGDEFLABEL:
1538 raidgetdefaultlabel(raidPtr, rs, (struct disklabel *) data);
1539 break;
1540
1541 #ifdef __HAVE_OLD_DISKLABEL
1542 case ODIOCGDEFLABEL:
1543 raidgetdefaultlabel(raidPtr, rs, &newlabel);
1544 if (newlabel.d_npartitions > OLDMAXPARTITIONS)
1545 return ENOTTY;
1546 memcpy(data, &newlabel, sizeof (struct olddisklabel));
1547 break;
1548 #endif
1549
1550 default:
1551 retcode = ENOTTY;
1552 }
1553 return (retcode);
1554
1555 }
1556
1557
1558 /* raidinit -- complete the rest of the initialization for the
1559 RAIDframe device. */
1560
1561
1562 static void
1563 raidinit(raidPtr)
1564 RF_Raid_t *raidPtr;
1565 {
1566 struct raid_softc *rs;
1567 int unit;
1568
1569 unit = raidPtr->raidid;
1570
1571 rs = &raid_softc[unit];
1572
1573 /* XXX should check return code first... */
1574 rs->sc_flags |= RAIDF_INITED;
1575
1576 sprintf(rs->sc_xname, "raid%d", unit); /* XXX doesn't check bounds. */
1577
1578 rs->sc_dkdev.dk_name = rs->sc_xname;
1579
1580 /* disk_attach actually creates space for the CPU disklabel, among
1581 * other things, so it's critical to call this *BEFORE* we try putzing
1582 * with disklabels. */
1583
1584 disk_attach(&rs->sc_dkdev);
1585
1586 /* XXX There may be a weird interaction here between this, and
1587 * protectedSectors, as used in RAIDframe. */
1588
1589 rs->sc_size = raidPtr->totalSectors;
1590
1591 }
1592
1593 /* wake up the daemon & tell it to get us a spare table
1594 * XXX
1595 * the entries in the queues should be tagged with the raidPtr
1596 * so that in the extremely rare case that two recons happen at once,
1597 * we know for which device were requesting a spare table
1598 * XXX
1599 *
1600 * XXX This code is not currently used. GO
1601 */
1602 int
1603 rf_GetSpareTableFromDaemon(req)
1604 RF_SparetWait_t *req;
1605 {
1606 int retcode;
1607
1608 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1609 req->next = rf_sparet_wait_queue;
1610 rf_sparet_wait_queue = req;
1611 wakeup(&rf_sparet_wait_queue);
1612
1613 /* mpsleep unlocks the mutex */
1614 while (!rf_sparet_resp_queue) {
1615 tsleep(&rf_sparet_resp_queue, PRIBIO,
1616 "raidframe getsparetable", 0);
1617 }
1618 req = rf_sparet_resp_queue;
1619 rf_sparet_resp_queue = req->next;
1620 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1621
1622 retcode = req->fcol;
1623 RF_Free(req, sizeof(*req)); /* this is not the same req as we
1624 * alloc'd */
1625 return (retcode);
1626 }
1627
1628 /* a wrapper around rf_DoAccess that extracts appropriate info from the
1629 * bp & passes it down.
1630 * any calls originating in the kernel must use non-blocking I/O
1631 * do some extra sanity checking to return "appropriate" error values for
1632 * certain conditions (to make some standard utilities work)
1633 *
1634 * Formerly known as: rf_DoAccessKernel
1635 */
1636 void
1637 raidstart(raidPtr)
1638 RF_Raid_t *raidPtr;
1639 {
1640 RF_SectorCount_t num_blocks, pb, sum;
1641 RF_RaidAddr_t raid_addr;
1642 int retcode;
1643 struct partition *pp;
1644 daddr_t blocknum;
1645 int unit;
1646 struct raid_softc *rs;
1647 int do_async;
1648 struct buf *bp;
1649
1650 unit = raidPtr->raidid;
1651 rs = &raid_softc[unit];
1652
1653 /* quick check to see if anything has died recently */
1654 RF_LOCK_MUTEX(raidPtr->mutex);
1655 if (raidPtr->numNewFailures > 0) {
1656 rf_update_component_labels(raidPtr,
1657 RF_NORMAL_COMPONENT_UPDATE);
1658 raidPtr->numNewFailures--;
1659 }
1660
1661 /* Check to see if we're at the limit... */
1662 while (raidPtr->openings > 0) {
1663 RF_UNLOCK_MUTEX(raidPtr->mutex);
1664
1665 /* get the next item, if any, from the queue */
1666 if ((bp = BUFQ_GET(&rs->buf_queue)) == NULL) {
1667 /* nothing more to do */
1668 return;
1669 }
1670
1671 /* Ok, for the bp we have here, bp->b_blkno is relative to the
1672 * partition.. Need to make it absolute to the underlying
1673 * device.. */
1674
1675 blocknum = bp->b_blkno;
1676 if (DISKPART(bp->b_dev) != RAW_PART) {
1677 pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
1678 blocknum += pp->p_offset;
1679 }
1680
1681 db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
1682 (int) blocknum));
1683
1684 db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
1685 db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
1686
1687 /* *THIS* is where we adjust what block we're going to...
1688 * but DO NOT TOUCH bp->b_blkno!!! */
1689 raid_addr = blocknum;
1690
1691 num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
1692 pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
1693 sum = raid_addr + num_blocks + pb;
1694 if (1 || rf_debugKernelAccess) {
1695 db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
1696 (int) raid_addr, (int) sum, (int) num_blocks,
1697 (int) pb, (int) bp->b_resid));
1698 }
1699 if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
1700 || (sum < num_blocks) || (sum < pb)) {
1701 bp->b_error = ENOSPC;
1702 bp->b_flags |= B_ERROR;
1703 bp->b_resid = bp->b_bcount;
1704 biodone(bp);
1705 RF_LOCK_MUTEX(raidPtr->mutex);
1706 continue;
1707 }
1708 /*
1709 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
1710 */
1711
1712 if (bp->b_bcount & raidPtr->sectorMask) {
1713 bp->b_error = EINVAL;
1714 bp->b_flags |= B_ERROR;
1715 bp->b_resid = bp->b_bcount;
1716 biodone(bp);
1717 RF_LOCK_MUTEX(raidPtr->mutex);
1718 continue;
1719
1720 }
1721 db1_printf(("Calling DoAccess..\n"));
1722
1723
1724 RF_LOCK_MUTEX(raidPtr->mutex);
1725 raidPtr->openings--;
1726 RF_UNLOCK_MUTEX(raidPtr->mutex);
1727
1728 /*
1729 * Everything is async.
1730 */
1731 do_async = 1;
1732
1733 disk_busy(&rs->sc_dkdev);
1734
1735 /* XXX we're still at splbio() here... do we *really*
1736 need to be? */
1737
1738 /* don't ever condition on bp->b_flags & B_WRITE.
1739 * always condition on B_READ instead */
1740
1741 retcode = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
1742 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
1743 do_async, raid_addr, num_blocks,
1744 bp->b_data, bp, RF_DAG_NONBLOCKING_IO);
1745
1746 RF_LOCK_MUTEX(raidPtr->mutex);
1747 }
1748 RF_UNLOCK_MUTEX(raidPtr->mutex);
1749 }
1750
1751
1752
1753
1754 /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */
1755
1756 int
1757 rf_DispatchKernelIO(queue, req)
1758 RF_DiskQueue_t *queue;
1759 RF_DiskQueueData_t *req;
1760 {
1761 int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
1762 struct buf *bp;
1763 struct raidbuf *raidbp = NULL;
1764
1765 req->queue = queue;
1766
1767 #if DIAGNOSTIC
1768 if (queue->raidPtr->raidid >= numraid) {
1769 printf("Invalid unit number: %d %d\n", unit, numraid);
1770 panic("Invalid Unit number in rf_DispatchKernelIO\n");
1771 }
1772 #endif
1773
1774 bp = req->bp;
1775 #if 1
1776 /* XXX when there is a physical disk failure, someone is passing us a
1777 * buffer that contains old stuff!! Attempt to deal with this problem
1778 * without taking a performance hit... (not sure where the real bug
1779 * is. It's buried in RAIDframe somewhere) :-( GO ) */
1780
1781 if (bp->b_flags & B_ERROR) {
1782 bp->b_flags &= ~B_ERROR;
1783 }
1784 if (bp->b_error != 0) {
1785 bp->b_error = 0;
1786 }
1787 #endif
1788 raidbp = pool_get(&raidframe_cbufpool, PR_NOWAIT);
1789
1790 /*
1791 * context for raidiodone
1792 */
1793 raidbp->rf_obp = bp;
1794 raidbp->req = req;
1795
1796 LIST_INIT(&raidbp->rf_buf.b_dep);
1797
1798 switch (req->type) {
1799 case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */
1800 /* XXX need to do something extra here.. */
1801 /* I'm leaving this in, as I've never actually seen it used,
1802 * and I'd like folks to report it... GO */
1803 printf(("WAKEUP CALLED\n"));
1804 queue->numOutstanding++;
1805
1806 /* XXX need to glue the original buffer into this?? */
1807
1808 KernelWakeupFunc(&raidbp->rf_buf);
1809 break;
1810
1811 case RF_IO_TYPE_READ:
1812 case RF_IO_TYPE_WRITE:
1813
1814 if (req->tracerec) {
1815 RF_ETIMER_START(req->tracerec->timer);
1816 }
1817 InitBP(&raidbp->rf_buf, queue->rf_cinfo->ci_vp,
1818 op | bp->b_flags, queue->rf_cinfo->ci_dev,
1819 req->sectorOffset, req->numSector,
1820 req->buf, KernelWakeupFunc, (void *) req,
1821 queue->raidPtr->logBytesPerSector, req->b_proc);
1822
1823 if (rf_debugKernelAccess) {
1824 db1_printf(("dispatch: bp->b_blkno = %ld\n",
1825 (long) bp->b_blkno));
1826 }
1827 queue->numOutstanding++;
1828 queue->last_deq_sector = req->sectorOffset;
1829 /* acc wouldn't have been let in if there were any pending
1830 * reqs at any other priority */
1831 queue->curPriority = req->priority;
1832
1833 db1_printf(("Going for %c to unit %d row %d col %d\n",
1834 req->type, queue->raidPtr->raidid,
1835 queue->row, queue->col));
1836 db1_printf(("sector %d count %d (%d bytes) %d\n",
1837 (int) req->sectorOffset, (int) req->numSector,
1838 (int) (req->numSector <<
1839 queue->raidPtr->logBytesPerSector),
1840 (int) queue->raidPtr->logBytesPerSector));
1841 if ((raidbp->rf_buf.b_flags & B_READ) == 0) {
1842 raidbp->rf_buf.b_vp->v_numoutput++;
1843 }
1844 VOP_STRATEGY(&raidbp->rf_buf);
1845
1846 break;
1847
1848 default:
1849 panic("bad req->type in rf_DispatchKernelIO");
1850 }
1851 db1_printf(("Exiting from DispatchKernelIO\n"));
1852
1853 return (0);
1854 }
1855 /* this is the callback function associated with a I/O invoked from
1856 kernel code.
1857 */
1858 static void
1859 KernelWakeupFunc(vbp)
1860 struct buf *vbp;
1861 {
1862 RF_DiskQueueData_t *req = NULL;
1863 RF_DiskQueue_t *queue;
1864 struct raidbuf *raidbp = (struct raidbuf *) vbp;
1865 struct buf *bp;
1866 int s;
1867
1868 s = splbio();
1869 db1_printf(("recovering the request queue:\n"));
1870 req = raidbp->req;
1871
1872 bp = raidbp->rf_obp;
1873
1874 queue = (RF_DiskQueue_t *) req->queue;
1875
1876 if (raidbp->rf_buf.b_flags & B_ERROR) {
1877 bp->b_flags |= B_ERROR;
1878 bp->b_error = raidbp->rf_buf.b_error ?
1879 raidbp->rf_buf.b_error : EIO;
1880 }
1881
1882 /* XXX methinks this could be wrong... */
1883 #if 1
1884 bp->b_resid = raidbp->rf_buf.b_resid;
1885 #endif
1886
1887 if (req->tracerec) {
1888 RF_ETIMER_STOP(req->tracerec->timer);
1889 RF_ETIMER_EVAL(req->tracerec->timer);
1890 RF_LOCK_MUTEX(rf_tracing_mutex);
1891 req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1892 req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1893 req->tracerec->num_phys_ios++;
1894 RF_UNLOCK_MUTEX(rf_tracing_mutex);
1895 }
1896 bp->b_bcount = raidbp->rf_buf.b_bcount; /* XXXX ?? */
1897
1898 /* XXX Ok, let's get aggressive... If B_ERROR is set, let's go
1899 * ballistic, and mark the component as hosed... */
1900
1901 if (bp->b_flags & B_ERROR) {
1902 /* Mark the disk as dead */
1903 /* but only mark it once... */
1904 if (queue->raidPtr->Disks[queue->row][queue->col].status ==
1905 rf_ds_optimal) {
1906 printf("raid%d: IO Error. Marking %s as failed.\n",
1907 queue->raidPtr->raidid,
1908 queue->raidPtr->Disks[queue->row][queue->col].devname);
1909 queue->raidPtr->Disks[queue->row][queue->col].status =
1910 rf_ds_failed;
1911 queue->raidPtr->status[queue->row] = rf_rs_degraded;
1912 queue->raidPtr->numFailures++;
1913 queue->raidPtr->numNewFailures++;
1914 } else { /* Disk is already dead... */
1915 /* printf("Disk already marked as dead!\n"); */
1916 }
1917
1918 }
1919
1920 pool_put(&raidframe_cbufpool, raidbp);
1921
1922 rf_DiskIOComplete(queue, req, (bp->b_flags & B_ERROR) ? 1 : 0);
1923 (req->CompleteFunc) (req->argument, (bp->b_flags & B_ERROR) ? 1 : 0);
1924
1925 splx(s);
1926 }
1927
1928
1929
1930 /*
1931 * initialize a buf structure for doing an I/O in the kernel.
1932 */
1933 static void
1934 InitBP(bp, b_vp, rw_flag, dev, startSect, numSect, buf, cbFunc, cbArg,
1935 logBytesPerSector, b_proc)
1936 struct buf *bp;
1937 struct vnode *b_vp;
1938 unsigned rw_flag;
1939 dev_t dev;
1940 RF_SectorNum_t startSect;
1941 RF_SectorCount_t numSect;
1942 caddr_t buf;
1943 void (*cbFunc) (struct buf *);
1944 void *cbArg;
1945 int logBytesPerSector;
1946 struct proc *b_proc;
1947 {
1948 /* bp->b_flags = B_PHYS | rw_flag; */
1949 bp->b_flags = B_CALL | rw_flag; /* XXX need B_PHYS here too??? */
1950 bp->b_bcount = numSect << logBytesPerSector;
1951 bp->b_bufsize = bp->b_bcount;
1952 bp->b_error = 0;
1953 bp->b_dev = dev;
1954 bp->b_data = buf;
1955 bp->b_blkno = startSect;
1956 bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */
1957 if (bp->b_bcount == 0) {
1958 panic("bp->b_bcount is zero in InitBP!!\n");
1959 }
1960 bp->b_proc = b_proc;
1961 bp->b_iodone = cbFunc;
1962 bp->b_vp = b_vp;
1963
1964 }
1965
1966 static void
1967 raidgetdefaultlabel(raidPtr, rs, lp)
1968 RF_Raid_t *raidPtr;
1969 struct raid_softc *rs;
1970 struct disklabel *lp;
1971 {
1972 db1_printf(("Building a default label...\n"));
1973 memset(lp, 0, sizeof(*lp));
1974
1975 /* fabricate a label... */
1976 lp->d_secperunit = raidPtr->totalSectors;
1977 lp->d_secsize = raidPtr->bytesPerSector;
1978 lp->d_nsectors = raidPtr->Layout.dataSectorsPerStripe;
1979 lp->d_ntracks = 4 * raidPtr->numCol;
1980 lp->d_ncylinders = raidPtr->totalSectors /
1981 (lp->d_nsectors * lp->d_ntracks);
1982 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
1983
1984 strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
1985 lp->d_type = DTYPE_RAID;
1986 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
1987 lp->d_rpm = 3600;
1988 lp->d_interleave = 1;
1989 lp->d_flags = 0;
1990
1991 lp->d_partitions[RAW_PART].p_offset = 0;
1992 lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors;
1993 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
1994 lp->d_npartitions = RAW_PART + 1;
1995
1996 lp->d_magic = DISKMAGIC;
1997 lp->d_magic2 = DISKMAGIC;
1998 lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
1999
2000 }
2001 /*
2002 * Read the disklabel from the raid device. If one is not present, fake one
2003 * up.
2004 */
2005 static void
2006 raidgetdisklabel(dev)
2007 dev_t dev;
2008 {
2009 int unit = raidunit(dev);
2010 struct raid_softc *rs = &raid_softc[unit];
2011 char *errstring;
2012 struct disklabel *lp = rs->sc_dkdev.dk_label;
2013 struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel;
2014 RF_Raid_t *raidPtr;
2015
2016 db1_printf(("Getting the disklabel...\n"));
2017
2018 memset(clp, 0, sizeof(*clp));
2019
2020 raidPtr = raidPtrs[unit];
2021
2022 raidgetdefaultlabel(raidPtr, rs, lp);
2023
2024 /*
2025 * Call the generic disklabel extraction routine.
2026 */
2027 errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
2028 rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
2029 if (errstring)
2030 raidmakedisklabel(rs);
2031 else {
2032 int i;
2033 struct partition *pp;
2034
2035 /*
2036 * Sanity check whether the found disklabel is valid.
2037 *
2038 * This is necessary since total size of the raid device
2039 * may vary when an interleave is changed even though exactly
2040 * same componets are used, and old disklabel may used
2041 * if that is found.
2042 */
2043 if (lp->d_secperunit != rs->sc_size)
2044 printf("raid%d: WARNING: %s: "
2045 "total sector size in disklabel (%d) != "
2046 "the size of raid (%ld)\n", unit, rs->sc_xname,
2047 lp->d_secperunit, (long) rs->sc_size);
2048 for (i = 0; i < lp->d_npartitions; i++) {
2049 pp = &lp->d_partitions[i];
2050 if (pp->p_offset + pp->p_size > rs->sc_size)
2051 printf("raid%d: WARNING: %s: end of partition `%c' "
2052 "exceeds the size of raid (%ld)\n",
2053 unit, rs->sc_xname, 'a' + i, (long) rs->sc_size);
2054 }
2055 }
2056
2057 }
2058 /*
2059 * Take care of things one might want to take care of in the event
2060 * that a disklabel isn't present.
2061 */
2062 static void
2063 raidmakedisklabel(rs)
2064 struct raid_softc *rs;
2065 {
2066 struct disklabel *lp = rs->sc_dkdev.dk_label;
2067 db1_printf(("Making a label..\n"));
2068
2069 /*
2070 * For historical reasons, if there's no disklabel present
2071 * the raw partition must be marked FS_BSDFFS.
2072 */
2073
2074 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
2075
2076 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
2077
2078 lp->d_checksum = dkcksum(lp);
2079 }
2080 /*
2081 * Lookup the provided name in the filesystem. If the file exists,
2082 * is a valid block device, and isn't being used by anyone else,
2083 * set *vpp to the file's vnode.
2084 * You'll find the original of this in ccd.c
2085 */
2086 int
2087 raidlookup(path, p, vpp)
2088 char *path;
2089 struct proc *p;
2090 struct vnode **vpp; /* result */
2091 {
2092 struct nameidata nd;
2093 struct vnode *vp;
2094 struct vattr va;
2095 int error;
2096
2097 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, path, p);
2098 if ((error = vn_open(&nd, FREAD | FWRITE, 0)) != 0) {
2099 #if 0
2100 printf("RAIDframe: vn_open returned %d\n", error);
2101 #endif
2102 return (error);
2103 }
2104 vp = nd.ni_vp;
2105 if (vp->v_usecount > 1) {
2106 VOP_UNLOCK(vp, 0);
2107 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2108 return (EBUSY);
2109 }
2110 if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)) != 0) {
2111 VOP_UNLOCK(vp, 0);
2112 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2113 return (error);
2114 }
2115 /* XXX: eventually we should handle VREG, too. */
2116 if (va.va_type != VBLK) {
2117 VOP_UNLOCK(vp, 0);
2118 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2119 return (ENOTBLK);
2120 }
2121 VOP_UNLOCK(vp, 0);
2122 *vpp = vp;
2123 return (0);
2124 }
2125 /*
2126 * Wait interruptibly for an exclusive lock.
2127 *
2128 * XXX
2129 * Several drivers do this; it should be abstracted and made MP-safe.
2130 * (Hmm... where have we seen this warning before :-> GO )
2131 */
2132 static int
2133 raidlock(rs)
2134 struct raid_softc *rs;
2135 {
2136 int error;
2137
2138 while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
2139 rs->sc_flags |= RAIDF_WANTED;
2140 if ((error =
2141 tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
2142 return (error);
2143 }
2144 rs->sc_flags |= RAIDF_LOCKED;
2145 return (0);
2146 }
2147 /*
2148 * Unlock and wake up any waiters.
2149 */
2150 static void
2151 raidunlock(rs)
2152 struct raid_softc *rs;
2153 {
2154
2155 rs->sc_flags &= ~RAIDF_LOCKED;
2156 if ((rs->sc_flags & RAIDF_WANTED) != 0) {
2157 rs->sc_flags &= ~RAIDF_WANTED;
2158 wakeup(rs);
2159 }
2160 }
2161
2162
2163 #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */
2164 #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */
2165
2166 int
2167 raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter)
2168 {
2169 RF_ComponentLabel_t clabel;
2170 raidread_component_label(dev, b_vp, &clabel);
2171 clabel.mod_counter = mod_counter;
2172 clabel.clean = RF_RAID_CLEAN;
2173 raidwrite_component_label(dev, b_vp, &clabel);
2174 return(0);
2175 }
2176
2177
2178 int
2179 raidmarkdirty(dev_t dev, struct vnode *b_vp, int mod_counter)
2180 {
2181 RF_ComponentLabel_t clabel;
2182 raidread_component_label(dev, b_vp, &clabel);
2183 clabel.mod_counter = mod_counter;
2184 clabel.clean = RF_RAID_DIRTY;
2185 raidwrite_component_label(dev, b_vp, &clabel);
2186 return(0);
2187 }
2188
2189 /* ARGSUSED */
2190 int
2191 raidread_component_label(dev, b_vp, clabel)
2192 dev_t dev;
2193 struct vnode *b_vp;
2194 RF_ComponentLabel_t *clabel;
2195 {
2196 struct buf *bp;
2197 const struct bdevsw *bdev;
2198 int error;
2199
2200 /* XXX should probably ensure that we don't try to do this if
2201 someone has changed rf_protected_sectors. */
2202
2203 if (b_vp == NULL) {
2204 /* For whatever reason, this component is not valid.
2205 Don't try to read a component label from it. */
2206 return(EINVAL);
2207 }
2208
2209 /* get a block of the appropriate size... */
2210 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2211 bp->b_dev = dev;
2212
2213 /* get our ducks in a row for the read */
2214 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2215 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2216 bp->b_flags |= B_READ;
2217 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2218
2219 bdev = bdevsw_lookup(bp->b_dev);
2220 if (bdev == NULL)
2221 return (ENXIO);
2222 (*bdev->d_strategy)(bp);
2223
2224 error = biowait(bp);
2225
2226 if (!error) {
2227 memcpy(clabel, bp->b_data,
2228 sizeof(RF_ComponentLabel_t));
2229 #if 0
2230 rf_print_component_label( clabel );
2231 #endif
2232 } else {
2233 #if 0
2234 printf("Failed to read RAID component label!\n");
2235 #endif
2236 }
2237
2238 brelse(bp);
2239 return(error);
2240 }
2241 /* ARGSUSED */
2242 int
2243 raidwrite_component_label(dev, b_vp, clabel)
2244 dev_t dev;
2245 struct vnode *b_vp;
2246 RF_ComponentLabel_t *clabel;
2247 {
2248 struct buf *bp;
2249 const struct bdevsw *bdev;
2250 int error;
2251
2252 /* get a block of the appropriate size... */
2253 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2254 bp->b_dev = dev;
2255
2256 /* get our ducks in a row for the write */
2257 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2258 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2259 bp->b_flags |= B_WRITE;
2260 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2261
2262 memset(bp->b_data, 0, RF_COMPONENT_INFO_SIZE );
2263
2264 memcpy(bp->b_data, clabel, sizeof(RF_ComponentLabel_t));
2265
2266 bdev = bdevsw_lookup(bp->b_dev);
2267 if (bdev == NULL)
2268 return (ENXIO);
2269 (*bdev->d_strategy)(bp);
2270 error = biowait(bp);
2271 brelse(bp);
2272 if (error) {
2273 #if 1
2274 printf("Failed to write RAID component info!\n");
2275 #endif
2276 }
2277
2278 return(error);
2279 }
2280
2281 void
2282 rf_markalldirty(raidPtr)
2283 RF_Raid_t *raidPtr;
2284 {
2285 RF_ComponentLabel_t clabel;
2286 int r,c;
2287
2288 raidPtr->mod_counter++;
2289 for (r = 0; r < raidPtr->numRow; r++) {
2290 for (c = 0; c < raidPtr->numCol; c++) {
2291 /* we don't want to touch (at all) a disk that has
2292 failed */
2293 if (!RF_DEAD_DISK(raidPtr->Disks[r][c].status)) {
2294 raidread_component_label(
2295 raidPtr->Disks[r][c].dev,
2296 raidPtr->raid_cinfo[r][c].ci_vp,
2297 &clabel);
2298 if (clabel.status == rf_ds_spared) {
2299 /* XXX do something special...
2300 but whatever you do, don't
2301 try to access it!! */
2302 } else {
2303 #if 0
2304 clabel.status =
2305 raidPtr->Disks[r][c].status;
2306 raidwrite_component_label(
2307 raidPtr->Disks[r][c].dev,
2308 raidPtr->raid_cinfo[r][c].ci_vp,
2309 &clabel);
2310 #endif
2311 raidmarkdirty(
2312 raidPtr->Disks[r][c].dev,
2313 raidPtr->raid_cinfo[r][c].ci_vp,
2314 raidPtr->mod_counter);
2315 }
2316 }
2317 }
2318 }
2319 /* printf("Component labels marked dirty.\n"); */
2320 #if 0
2321 for( c = 0; c < raidPtr->numSpare ; c++) {
2322 sparecol = raidPtr->numCol + c;
2323 if (raidPtr->Disks[r][sparecol].status == rf_ds_used_spare) {
2324 /*
2325
2326 XXX this is where we get fancy and map this spare
2327 into it's correct spot in the array.
2328
2329 */
2330 /*
2331
2332 we claim this disk is "optimal" if it's
2333 rf_ds_used_spare, as that means it should be
2334 directly substitutable for the disk it replaced.
2335 We note that too...
2336
2337 */
2338
2339 for(i=0;i<raidPtr->numRow;i++) {
2340 for(j=0;j<raidPtr->numCol;j++) {
2341 if ((raidPtr->Disks[i][j].spareRow ==
2342 r) &&
2343 (raidPtr->Disks[i][j].spareCol ==
2344 sparecol)) {
2345 srow = r;
2346 scol = sparecol;
2347 break;
2348 }
2349 }
2350 }
2351
2352 raidread_component_label(
2353 raidPtr->Disks[r][sparecol].dev,
2354 raidPtr->raid_cinfo[r][sparecol].ci_vp,
2355 &clabel);
2356 /* make sure status is noted */
2357 clabel.version = RF_COMPONENT_LABEL_VERSION;
2358 clabel.mod_counter = raidPtr->mod_counter;
2359 clabel.serial_number = raidPtr->serial_number;
2360 clabel.row = srow;
2361 clabel.column = scol;
2362 clabel.num_rows = raidPtr->numRow;
2363 clabel.num_columns = raidPtr->numCol;
2364 clabel.clean = RF_RAID_DIRTY; /* changed in a bit*/
2365 clabel.status = rf_ds_optimal;
2366 raidwrite_component_label(
2367 raidPtr->Disks[r][sparecol].dev,
2368 raidPtr->raid_cinfo[r][sparecol].ci_vp,
2369 &clabel);
2370 raidmarkclean( raidPtr->Disks[r][sparecol].dev,
2371 raidPtr->raid_cinfo[r][sparecol].ci_vp);
2372 }
2373 }
2374
2375 #endif
2376 }
2377
2378
2379 void
2380 rf_update_component_labels(raidPtr, final)
2381 RF_Raid_t *raidPtr;
2382 int final;
2383 {
2384 RF_ComponentLabel_t clabel;
2385 int sparecol;
2386 int r,c;
2387 int i,j;
2388 int srow, scol;
2389
2390 srow = -1;
2391 scol = -1;
2392
2393 /* XXX should do extra checks to make sure things really are clean,
2394 rather than blindly setting the clean bit... */
2395
2396 raidPtr->mod_counter++;
2397
2398 for (r = 0; r < raidPtr->numRow; r++) {
2399 for (c = 0; c < raidPtr->numCol; c++) {
2400 if (raidPtr->Disks[r][c].status == rf_ds_optimal) {
2401 raidread_component_label(
2402 raidPtr->Disks[r][c].dev,
2403 raidPtr->raid_cinfo[r][c].ci_vp,
2404 &clabel);
2405 /* make sure status is noted */
2406 clabel.status = rf_ds_optimal;
2407 /* bump the counter */
2408 clabel.mod_counter = raidPtr->mod_counter;
2409
2410 raidwrite_component_label(
2411 raidPtr->Disks[r][c].dev,
2412 raidPtr->raid_cinfo[r][c].ci_vp,
2413 &clabel);
2414 if (final == RF_FINAL_COMPONENT_UPDATE) {
2415 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2416 raidmarkclean(
2417 raidPtr->Disks[r][c].dev,
2418 raidPtr->raid_cinfo[r][c].ci_vp,
2419 raidPtr->mod_counter);
2420 }
2421 }
2422 }
2423 /* else we don't touch it.. */
2424 }
2425 }
2426
2427 for( c = 0; c < raidPtr->numSpare ; c++) {
2428 sparecol = raidPtr->numCol + c;
2429 /* Need to ensure that the reconstruct actually completed! */
2430 if (raidPtr->Disks[0][sparecol].status == rf_ds_used_spare) {
2431 /*
2432
2433 we claim this disk is "optimal" if it's
2434 rf_ds_used_spare, as that means it should be
2435 directly substitutable for the disk it replaced.
2436 We note that too...
2437
2438 */
2439
2440 for(i=0;i<raidPtr->numRow;i++) {
2441 for(j=0;j<raidPtr->numCol;j++) {
2442 if ((raidPtr->Disks[i][j].spareRow ==
2443 0) &&
2444 (raidPtr->Disks[i][j].spareCol ==
2445 sparecol)) {
2446 srow = i;
2447 scol = j;
2448 break;
2449 }
2450 }
2451 }
2452
2453 /* XXX shouldn't *really* need this... */
2454 raidread_component_label(
2455 raidPtr->Disks[0][sparecol].dev,
2456 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2457 &clabel);
2458 /* make sure status is noted */
2459
2460 raid_init_component_label(raidPtr, &clabel);
2461
2462 clabel.mod_counter = raidPtr->mod_counter;
2463 clabel.row = srow;
2464 clabel.column = scol;
2465 clabel.status = rf_ds_optimal;
2466
2467 raidwrite_component_label(
2468 raidPtr->Disks[0][sparecol].dev,
2469 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2470 &clabel);
2471 if (final == RF_FINAL_COMPONENT_UPDATE) {
2472 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2473 raidmarkclean( raidPtr->Disks[0][sparecol].dev,
2474 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2475 raidPtr->mod_counter);
2476 }
2477 }
2478 }
2479 }
2480 /* printf("Component labels updated\n"); */
2481 }
2482
2483 void
2484 rf_close_component(raidPtr, vp, auto_configured)
2485 RF_Raid_t *raidPtr;
2486 struct vnode *vp;
2487 int auto_configured;
2488 {
2489 struct proc *p;
2490
2491 p = raidPtr->engine_thread;
2492
2493 if (vp != NULL) {
2494 if (auto_configured == 1) {
2495 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2496 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
2497 vput(vp);
2498
2499 } else {
2500 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2501 }
2502 } else {
2503 #if 0
2504 printf("vnode was NULL\n");
2505 #endif
2506 }
2507 }
2508
2509
2510 void
2511 rf_UnconfigureVnodes(raidPtr)
2512 RF_Raid_t *raidPtr;
2513 {
2514 int r,c;
2515 struct proc *p;
2516 struct vnode *vp;
2517 int acd;
2518
2519
2520 /* We take this opportunity to close the vnodes like we should.. */
2521
2522 p = raidPtr->engine_thread;
2523
2524 for (r = 0; r < raidPtr->numRow; r++) {
2525 for (c = 0; c < raidPtr->numCol; c++) {
2526 #if 0
2527 printf("raid%d: Closing vnode for row: %d col: %d\n",
2528 raidPtr->raidid, r, c);
2529 #endif
2530 vp = raidPtr->raid_cinfo[r][c].ci_vp;
2531 acd = raidPtr->Disks[r][c].auto_configured;
2532 rf_close_component(raidPtr, vp, acd);
2533 raidPtr->raid_cinfo[r][c].ci_vp = NULL;
2534 raidPtr->Disks[r][c].auto_configured = 0;
2535 }
2536 }
2537 for (r = 0; r < raidPtr->numSpare; r++) {
2538 #if 0
2539 printf("raid%d: Closing vnode for spare: %d\n",
2540 raidPtr->raidid, r);
2541 #endif
2542 vp = raidPtr->raid_cinfo[0][raidPtr->numCol + r].ci_vp;
2543 acd = raidPtr->Disks[0][raidPtr->numCol + r].auto_configured;
2544 rf_close_component(raidPtr, vp, acd);
2545 raidPtr->raid_cinfo[0][raidPtr->numCol + r].ci_vp = NULL;
2546 raidPtr->Disks[0][raidPtr->numCol + r].auto_configured = 0;
2547 }
2548 }
2549
2550
2551 void
2552 rf_ReconThread(req)
2553 struct rf_recon_req *req;
2554 {
2555 int s;
2556 RF_Raid_t *raidPtr;
2557
2558 s = splbio();
2559 raidPtr = (RF_Raid_t *) req->raidPtr;
2560 raidPtr->recon_in_progress = 1;
2561
2562 rf_FailDisk((RF_Raid_t *) req->raidPtr, req->row, req->col,
2563 ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
2564
2565 /* XXX get rid of this! we don't need it at all.. */
2566 RF_Free(req, sizeof(*req));
2567
2568 raidPtr->recon_in_progress = 0;
2569 splx(s);
2570
2571 /* That's all... */
2572 kthread_exit(0); /* does not return */
2573 }
2574
2575 void
2576 rf_RewriteParityThread(raidPtr)
2577 RF_Raid_t *raidPtr;
2578 {
2579 int retcode;
2580 int s;
2581
2582 raidPtr->parity_rewrite_in_progress = 1;
2583 s = splbio();
2584 retcode = rf_RewriteParity(raidPtr);
2585 splx(s);
2586 if (retcode) {
2587 printf("raid%d: Error re-writing parity!\n",raidPtr->raidid);
2588 } else {
2589 /* set the clean bit! If we shutdown correctly,
2590 the clean bit on each component label will get
2591 set */
2592 raidPtr->parity_good = RF_RAID_CLEAN;
2593 }
2594 raidPtr->parity_rewrite_in_progress = 0;
2595
2596 /* Anyone waiting for us to stop? If so, inform them... */
2597 if (raidPtr->waitShutdown) {
2598 wakeup(&raidPtr->parity_rewrite_in_progress);
2599 }
2600
2601 /* That's all... */
2602 kthread_exit(0); /* does not return */
2603 }
2604
2605
2606 void
2607 rf_CopybackThread(raidPtr)
2608 RF_Raid_t *raidPtr;
2609 {
2610 int s;
2611
2612 raidPtr->copyback_in_progress = 1;
2613 s = splbio();
2614 rf_CopybackReconstructedData(raidPtr);
2615 splx(s);
2616 raidPtr->copyback_in_progress = 0;
2617
2618 /* That's all... */
2619 kthread_exit(0); /* does not return */
2620 }
2621
2622
2623 void
2624 rf_ReconstructInPlaceThread(req)
2625 struct rf_recon_req *req;
2626 {
2627 int retcode;
2628 int s;
2629 RF_Raid_t *raidPtr;
2630
2631 s = splbio();
2632 raidPtr = req->raidPtr;
2633 raidPtr->recon_in_progress = 1;
2634 retcode = rf_ReconstructInPlace(raidPtr, req->row, req->col);
2635 RF_Free(req, sizeof(*req));
2636 raidPtr->recon_in_progress = 0;
2637 splx(s);
2638
2639 /* That's all... */
2640 kthread_exit(0); /* does not return */
2641 }
2642
2643 RF_AutoConfig_t *
2644 rf_find_raid_components()
2645 {
2646 struct vnode *vp;
2647 struct disklabel label;
2648 struct device *dv;
2649 dev_t dev;
2650 int bmajor;
2651 int error;
2652 int i;
2653 int good_one;
2654 RF_ComponentLabel_t *clabel;
2655 RF_AutoConfig_t *ac_list;
2656 RF_AutoConfig_t *ac;
2657
2658
2659 /* initialize the AutoConfig list */
2660 ac_list = NULL;
2661
2662 /* we begin by trolling through *all* the devices on the system */
2663
2664 for (dv = alldevs.tqh_first; dv != NULL;
2665 dv = dv->dv_list.tqe_next) {
2666
2667 /* we are only interested in disks... */
2668 if (dv->dv_class != DV_DISK)
2669 continue;
2670
2671 /* we don't care about floppies... */
2672 if (!strcmp(dv->dv_cfdata->cf_driver->cd_name,"fd")) {
2673 continue;
2674 }
2675
2676 /* we don't care about CD's... */
2677 if (!strcmp(dv->dv_cfdata->cf_driver->cd_name,"cd")) {
2678 continue;
2679 }
2680
2681 /* hdfd is the Atari/Hades floppy driver */
2682 if (!strcmp(dv->dv_cfdata->cf_driver->cd_name,"hdfd")) {
2683 continue;
2684 }
2685 /* fdisa is the Atari/Milan floppy driver */
2686 if (!strcmp(dv->dv_cfdata->cf_driver->cd_name,"fdisa")) {
2687 continue;
2688 }
2689
2690 /* need to find the device_name_to_block_device_major stuff */
2691 bmajor = devsw_name2blk(dv->dv_xname, NULL, 0);
2692
2693 /* get a vnode for the raw partition of this disk */
2694
2695 dev = MAKEDISKDEV(bmajor, dv->dv_unit, RAW_PART);
2696 if (bdevvp(dev, &vp))
2697 panic("RAID can't alloc vnode");
2698
2699 error = VOP_OPEN(vp, FREAD, NOCRED, 0);
2700
2701 if (error) {
2702 /* "Who cares." Continue looking
2703 for something that exists*/
2704 vput(vp);
2705 continue;
2706 }
2707
2708 /* Ok, the disk exists. Go get the disklabel. */
2709 error = VOP_IOCTL(vp, DIOCGDINFO, (caddr_t)&label,
2710 FREAD, NOCRED, 0);
2711 if (error) {
2712 /*
2713 * XXX can't happen - open() would
2714 * have errored out (or faked up one)
2715 */
2716 printf("can't get label for dev %s%c (%d)!?!?\n",
2717 dv->dv_xname, 'a' + RAW_PART, error);
2718 }
2719
2720 /* don't need this any more. We'll allocate it again
2721 a little later if we really do... */
2722 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2723 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
2724 vput(vp);
2725
2726 for (i=0; i < label.d_npartitions; i++) {
2727 /* We only support partitions marked as RAID */
2728 if (label.d_partitions[i].p_fstype != FS_RAID)
2729 continue;
2730
2731 dev = MAKEDISKDEV(bmajor, dv->dv_unit, i);
2732 if (bdevvp(dev, &vp))
2733 panic("RAID can't alloc vnode");
2734
2735 error = VOP_OPEN(vp, FREAD, NOCRED, 0);
2736 if (error) {
2737 /* Whatever... */
2738 vput(vp);
2739 continue;
2740 }
2741
2742 good_one = 0;
2743
2744 clabel = (RF_ComponentLabel_t *)
2745 malloc(sizeof(RF_ComponentLabel_t),
2746 M_RAIDFRAME, M_NOWAIT);
2747 if (clabel == NULL) {
2748 /* XXX CLEANUP HERE */
2749 printf("RAID auto config: out of memory!\n");
2750 return(NULL); /* XXX probably should panic? */
2751 }
2752
2753 if (!raidread_component_label(dev, vp, clabel)) {
2754 /* Got the label. Does it look reasonable? */
2755 if (rf_reasonable_label(clabel) &&
2756 (clabel->partitionSize <=
2757 label.d_partitions[i].p_size)) {
2758 #if DEBUG
2759 printf("Component on: %s%c: %d\n",
2760 dv->dv_xname, 'a'+i,
2761 label.d_partitions[i].p_size);
2762 rf_print_component_label(clabel);
2763 #endif
2764 /* if it's reasonable, add it,
2765 else ignore it. */
2766 ac = (RF_AutoConfig_t *)
2767 malloc(sizeof(RF_AutoConfig_t),
2768 M_RAIDFRAME,
2769 M_NOWAIT);
2770 if (ac == NULL) {
2771 /* XXX should panic?? */
2772 return(NULL);
2773 }
2774
2775 sprintf(ac->devname, "%s%c",
2776 dv->dv_xname, 'a'+i);
2777 ac->dev = dev;
2778 ac->vp = vp;
2779 ac->clabel = clabel;
2780 ac->next = ac_list;
2781 ac_list = ac;
2782 good_one = 1;
2783 }
2784 }
2785 if (!good_one) {
2786 /* cleanup */
2787 free(clabel, M_RAIDFRAME);
2788 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2789 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
2790 vput(vp);
2791 }
2792 }
2793 }
2794 return(ac_list);
2795 }
2796
2797 static int
2798 rf_reasonable_label(clabel)
2799 RF_ComponentLabel_t *clabel;
2800 {
2801
2802 if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) ||
2803 (clabel->version==RF_COMPONENT_LABEL_VERSION)) &&
2804 ((clabel->clean == RF_RAID_CLEAN) ||
2805 (clabel->clean == RF_RAID_DIRTY)) &&
2806 clabel->row >=0 &&
2807 clabel->column >= 0 &&
2808 clabel->num_rows > 0 &&
2809 clabel->num_columns > 0 &&
2810 clabel->row < clabel->num_rows &&
2811 clabel->column < clabel->num_columns &&
2812 clabel->blockSize > 0 &&
2813 clabel->numBlocks > 0) {
2814 /* label looks reasonable enough... */
2815 return(1);
2816 }
2817 return(0);
2818 }
2819
2820
2821 #if 0
2822 void
2823 rf_print_component_label(clabel)
2824 RF_ComponentLabel_t *clabel;
2825 {
2826 printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
2827 clabel->row, clabel->column,
2828 clabel->num_rows, clabel->num_columns);
2829 printf(" Version: %d Serial Number: %d Mod Counter: %d\n",
2830 clabel->version, clabel->serial_number,
2831 clabel->mod_counter);
2832 printf(" Clean: %s Status: %d\n",
2833 clabel->clean ? "Yes" : "No", clabel->status );
2834 printf(" sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
2835 clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
2836 printf(" RAID Level: %c blocksize: %d numBlocks: %d\n",
2837 (char) clabel->parityConfig, clabel->blockSize,
2838 clabel->numBlocks);
2839 printf(" Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No" );
2840 printf(" Contains root partition: %s\n",
2841 clabel->root_partition ? "Yes" : "No" );
2842 printf(" Last configured as: raid%d\n", clabel->last_unit );
2843 #if 0
2844 printf(" Config order: %d\n", clabel->config_order);
2845 #endif
2846
2847 }
2848 #endif
2849
2850 RF_ConfigSet_t *
2851 rf_create_auto_sets(ac_list)
2852 RF_AutoConfig_t *ac_list;
2853 {
2854 RF_AutoConfig_t *ac;
2855 RF_ConfigSet_t *config_sets;
2856 RF_ConfigSet_t *cset;
2857 RF_AutoConfig_t *ac_next;
2858
2859
2860 config_sets = NULL;
2861
2862 /* Go through the AutoConfig list, and figure out which components
2863 belong to what sets. */
2864 ac = ac_list;
2865 while(ac!=NULL) {
2866 /* we're going to putz with ac->next, so save it here
2867 for use at the end of the loop */
2868 ac_next = ac->next;
2869
2870 if (config_sets == NULL) {
2871 /* will need at least this one... */
2872 config_sets = (RF_ConfigSet_t *)
2873 malloc(sizeof(RF_ConfigSet_t),
2874 M_RAIDFRAME, M_NOWAIT);
2875 if (config_sets == NULL) {
2876 panic("rf_create_auto_sets: No memory!\n");
2877 }
2878 /* this one is easy :) */
2879 config_sets->ac = ac;
2880 config_sets->next = NULL;
2881 config_sets->rootable = 0;
2882 ac->next = NULL;
2883 } else {
2884 /* which set does this component fit into? */
2885 cset = config_sets;
2886 while(cset!=NULL) {
2887 if (rf_does_it_fit(cset, ac)) {
2888 /* looks like it matches... */
2889 ac->next = cset->ac;
2890 cset->ac = ac;
2891 break;
2892 }
2893 cset = cset->next;
2894 }
2895 if (cset==NULL) {
2896 /* didn't find a match above... new set..*/
2897 cset = (RF_ConfigSet_t *)
2898 malloc(sizeof(RF_ConfigSet_t),
2899 M_RAIDFRAME, M_NOWAIT);
2900 if (cset == NULL) {
2901 panic("rf_create_auto_sets: No memory!\n");
2902 }
2903 cset->ac = ac;
2904 ac->next = NULL;
2905 cset->next = config_sets;
2906 cset->rootable = 0;
2907 config_sets = cset;
2908 }
2909 }
2910 ac = ac_next;
2911 }
2912
2913
2914 return(config_sets);
2915 }
2916
2917 static int
2918 rf_does_it_fit(cset, ac)
2919 RF_ConfigSet_t *cset;
2920 RF_AutoConfig_t *ac;
2921 {
2922 RF_ComponentLabel_t *clabel1, *clabel2;
2923
2924 /* If this one matches the *first* one in the set, that's good
2925 enough, since the other members of the set would have been
2926 through here too... */
2927 /* note that we are not checking partitionSize here..
2928
2929 Note that we are also not checking the mod_counters here.
2930 If everything else matches execpt the mod_counter, that's
2931 good enough for this test. We will deal with the mod_counters
2932 a little later in the autoconfiguration process.
2933
2934 (clabel1->mod_counter == clabel2->mod_counter) &&
2935
2936 The reason we don't check for this is that failed disks
2937 will have lower modification counts. If those disks are
2938 not added to the set they used to belong to, then they will
2939 form their own set, which may result in 2 different sets,
2940 for example, competing to be configured at raid0, and
2941 perhaps competing to be the root filesystem set. If the
2942 wrong ones get configured, or both attempt to become /,
2943 weird behaviour and or serious lossage will occur. Thus we
2944 need to bring them into the fold here, and kick them out at
2945 a later point.
2946
2947 */
2948
2949 clabel1 = cset->ac->clabel;
2950 clabel2 = ac->clabel;
2951 if ((clabel1->version == clabel2->version) &&
2952 (clabel1->serial_number == clabel2->serial_number) &&
2953 (clabel1->num_rows == clabel2->num_rows) &&
2954 (clabel1->num_columns == clabel2->num_columns) &&
2955 (clabel1->sectPerSU == clabel2->sectPerSU) &&
2956 (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
2957 (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
2958 (clabel1->parityConfig == clabel2->parityConfig) &&
2959 (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
2960 (clabel1->blockSize == clabel2->blockSize) &&
2961 (clabel1->numBlocks == clabel2->numBlocks) &&
2962 (clabel1->autoconfigure == clabel2->autoconfigure) &&
2963 (clabel1->root_partition == clabel2->root_partition) &&
2964 (clabel1->last_unit == clabel2->last_unit) &&
2965 (clabel1->config_order == clabel2->config_order)) {
2966 /* if it get's here, it almost *has* to be a match */
2967 } else {
2968 /* it's not consistent with somebody in the set..
2969 punt */
2970 return(0);
2971 }
2972 /* all was fine.. it must fit... */
2973 return(1);
2974 }
2975
2976 int
2977 rf_have_enough_components(cset)
2978 RF_ConfigSet_t *cset;
2979 {
2980 RF_AutoConfig_t *ac;
2981 RF_AutoConfig_t *auto_config;
2982 RF_ComponentLabel_t *clabel;
2983 int r,c;
2984 int num_rows;
2985 int num_cols;
2986 int num_missing;
2987 int mod_counter;
2988 int mod_counter_found;
2989 int even_pair_failed;
2990 char parity_type;
2991
2992
2993 /* check to see that we have enough 'live' components
2994 of this set. If so, we can configure it if necessary */
2995
2996 num_rows = cset->ac->clabel->num_rows;
2997 num_cols = cset->ac->clabel->num_columns;
2998 parity_type = cset->ac->clabel->parityConfig;
2999
3000 /* XXX Check for duplicate components!?!?!? */
3001
3002 /* Determine what the mod_counter is supposed to be for this set. */
3003
3004 mod_counter_found = 0;
3005 mod_counter = 0;
3006 ac = cset->ac;
3007 while(ac!=NULL) {
3008 if (mod_counter_found==0) {
3009 mod_counter = ac->clabel->mod_counter;
3010 mod_counter_found = 1;
3011 } else {
3012 if (ac->clabel->mod_counter > mod_counter) {
3013 mod_counter = ac->clabel->mod_counter;
3014 }
3015 }
3016 ac = ac->next;
3017 }
3018
3019 num_missing = 0;
3020 auto_config = cset->ac;
3021
3022 for(r=0; r<num_rows; r++) {
3023 even_pair_failed = 0;
3024 for(c=0; c<num_cols; c++) {
3025 ac = auto_config;
3026 while(ac!=NULL) {
3027 if ((ac->clabel->row == r) &&
3028 (ac->clabel->column == c) &&
3029 (ac->clabel->mod_counter == mod_counter)) {
3030 /* it's this one... */
3031 #if DEBUG
3032 printf("Found: %s at %d,%d\n",
3033 ac->devname,r,c);
3034 #endif
3035 break;
3036 }
3037 ac=ac->next;
3038 }
3039 if (ac==NULL) {
3040 /* Didn't find one here! */
3041 /* special case for RAID 1, especially
3042 where there are more than 2
3043 components (where RAIDframe treats
3044 things a little differently :( ) */
3045 if (parity_type == '1') {
3046 if (c%2 == 0) { /* even component */
3047 even_pair_failed = 1;
3048 } else { /* odd component. If
3049 we're failed, and
3050 so is the even
3051 component, it's
3052 "Good Night, Charlie" */
3053 if (even_pair_failed == 1) {
3054 return(0);
3055 }
3056 }
3057 } else {
3058 /* normal accounting */
3059 num_missing++;
3060 }
3061 }
3062 if ((parity_type == '1') && (c%2 == 1)) {
3063 /* Just did an even component, and we didn't
3064 bail.. reset the even_pair_failed flag,
3065 and go on to the next component.... */
3066 even_pair_failed = 0;
3067 }
3068 }
3069 }
3070
3071 clabel = cset->ac->clabel;
3072
3073 if (((clabel->parityConfig == '0') && (num_missing > 0)) ||
3074 ((clabel->parityConfig == '4') && (num_missing > 1)) ||
3075 ((clabel->parityConfig == '5') && (num_missing > 1))) {
3076 /* XXX this needs to be made *much* more general */
3077 /* Too many failures */
3078 return(0);
3079 }
3080 /* otherwise, all is well, and we've got enough to take a kick
3081 at autoconfiguring this set */
3082 return(1);
3083 }
3084
3085 void
3086 rf_create_configuration(ac,config,raidPtr)
3087 RF_AutoConfig_t *ac;
3088 RF_Config_t *config;
3089 RF_Raid_t *raidPtr;
3090 {
3091 RF_ComponentLabel_t *clabel;
3092 int i;
3093
3094 clabel = ac->clabel;
3095
3096 /* 1. Fill in the common stuff */
3097 config->numRow = clabel->num_rows;
3098 config->numCol = clabel->num_columns;
3099 config->numSpare = 0; /* XXX should this be set here? */
3100 config->sectPerSU = clabel->sectPerSU;
3101 config->SUsPerPU = clabel->SUsPerPU;
3102 config->SUsPerRU = clabel->SUsPerRU;
3103 config->parityConfig = clabel->parityConfig;
3104 /* XXX... */
3105 strcpy(config->diskQueueType,"fifo");
3106 config->maxOutstandingDiskReqs = clabel->maxOutstanding;
3107 config->layoutSpecificSize = 0; /* XXX ?? */
3108
3109 while(ac!=NULL) {
3110 /* row/col values will be in range due to the checks
3111 in reasonable_label() */
3112 strcpy(config->devnames[ac->clabel->row][ac->clabel->column],
3113 ac->devname);
3114 ac = ac->next;
3115 }
3116
3117 for(i=0;i<RF_MAXDBGV;i++) {
3118 config->debugVars[i][0] = NULL;
3119 }
3120 }
3121
3122 int
3123 rf_set_autoconfig(raidPtr, new_value)
3124 RF_Raid_t *raidPtr;
3125 int new_value;
3126 {
3127 RF_ComponentLabel_t clabel;
3128 struct vnode *vp;
3129 dev_t dev;
3130 int row, column;
3131
3132 raidPtr->autoconfigure = new_value;
3133 for(row=0; row<raidPtr->numRow; row++) {
3134 for(column=0; column<raidPtr->numCol; column++) {
3135 if (raidPtr->Disks[row][column].status ==
3136 rf_ds_optimal) {
3137 dev = raidPtr->Disks[row][column].dev;
3138 vp = raidPtr->raid_cinfo[row][column].ci_vp;
3139 raidread_component_label(dev, vp, &clabel);
3140 clabel.autoconfigure = new_value;
3141 raidwrite_component_label(dev, vp, &clabel);
3142 }
3143 }
3144 }
3145 return(new_value);
3146 }
3147
3148 int
3149 rf_set_rootpartition(raidPtr, new_value)
3150 RF_Raid_t *raidPtr;
3151 int new_value;
3152 {
3153 RF_ComponentLabel_t clabel;
3154 struct vnode *vp;
3155 dev_t dev;
3156 int row, column;
3157
3158 raidPtr->root_partition = new_value;
3159 for(row=0; row<raidPtr->numRow; row++) {
3160 for(column=0; column<raidPtr->numCol; column++) {
3161 if (raidPtr->Disks[row][column].status ==
3162 rf_ds_optimal) {
3163 dev = raidPtr->Disks[row][column].dev;
3164 vp = raidPtr->raid_cinfo[row][column].ci_vp;
3165 raidread_component_label(dev, vp, &clabel);
3166 clabel.root_partition = new_value;
3167 raidwrite_component_label(dev, vp, &clabel);
3168 }
3169 }
3170 }
3171 return(new_value);
3172 }
3173
3174 void
3175 rf_release_all_vps(cset)
3176 RF_ConfigSet_t *cset;
3177 {
3178 RF_AutoConfig_t *ac;
3179
3180 ac = cset->ac;
3181 while(ac!=NULL) {
3182 /* Close the vp, and give it back */
3183 if (ac->vp) {
3184 vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY);
3185 VOP_CLOSE(ac->vp, FREAD, NOCRED, 0);
3186 vput(ac->vp);
3187 ac->vp = NULL;
3188 }
3189 ac = ac->next;
3190 }
3191 }
3192
3193
3194 void
3195 rf_cleanup_config_set(cset)
3196 RF_ConfigSet_t *cset;
3197 {
3198 RF_AutoConfig_t *ac;
3199 RF_AutoConfig_t *next_ac;
3200
3201 ac = cset->ac;
3202 while(ac!=NULL) {
3203 next_ac = ac->next;
3204 /* nuke the label */
3205 free(ac->clabel, M_RAIDFRAME);
3206 /* cleanup the config structure */
3207 free(ac, M_RAIDFRAME);
3208 /* "next.." */
3209 ac = next_ac;
3210 }
3211 /* and, finally, nuke the config set */
3212 free(cset, M_RAIDFRAME);
3213 }
3214
3215
3216 void
3217 raid_init_component_label(raidPtr, clabel)
3218 RF_Raid_t *raidPtr;
3219 RF_ComponentLabel_t *clabel;
3220 {
3221 /* current version number */
3222 clabel->version = RF_COMPONENT_LABEL_VERSION;
3223 clabel->serial_number = raidPtr->serial_number;
3224 clabel->mod_counter = raidPtr->mod_counter;
3225 clabel->num_rows = raidPtr->numRow;
3226 clabel->num_columns = raidPtr->numCol;
3227 clabel->clean = RF_RAID_DIRTY; /* not clean */
3228 clabel->status = rf_ds_optimal; /* "It's good!" */
3229
3230 clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
3231 clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
3232 clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
3233
3234 clabel->blockSize = raidPtr->bytesPerSector;
3235 clabel->numBlocks = raidPtr->sectorsPerDisk;
3236
3237 /* XXX not portable */
3238 clabel->parityConfig = raidPtr->Layout.map->parityConfig;
3239 clabel->maxOutstanding = raidPtr->maxOutstanding;
3240 clabel->autoconfigure = raidPtr->autoconfigure;
3241 clabel->root_partition = raidPtr->root_partition;
3242 clabel->last_unit = raidPtr->raidid;
3243 clabel->config_order = raidPtr->config_order;
3244 }
3245
3246 int
3247 rf_auto_config_set(cset,unit)
3248 RF_ConfigSet_t *cset;
3249 int *unit;
3250 {
3251 RF_Raid_t *raidPtr;
3252 RF_Config_t *config;
3253 int raidID;
3254 int retcode;
3255
3256 #if DEBUG
3257 printf("RAID autoconfigure\n");
3258 #endif
3259
3260 retcode = 0;
3261 *unit = -1;
3262
3263 /* 1. Create a config structure */
3264
3265 config = (RF_Config_t *)malloc(sizeof(RF_Config_t),
3266 M_RAIDFRAME,
3267 M_NOWAIT);
3268 if (config==NULL) {
3269 printf("Out of mem!?!?\n");
3270 /* XXX do something more intelligent here. */
3271 return(1);
3272 }
3273
3274 memset(config, 0, sizeof(RF_Config_t));
3275
3276 /*
3277 2. Figure out what RAID ID this one is supposed to live at
3278 See if we can get the same RAID dev that it was configured
3279 on last time..
3280 */
3281
3282 raidID = cset->ac->clabel->last_unit;
3283 if ((raidID < 0) || (raidID >= numraid)) {
3284 /* let's not wander off into lala land. */
3285 raidID = numraid - 1;
3286 }
3287 if (raidPtrs[raidID]->valid != 0) {
3288
3289 /*
3290 Nope... Go looking for an alternative...
3291 Start high so we don't immediately use raid0 if that's
3292 not taken.
3293 */
3294
3295 for(raidID = numraid - 1; raidID >= 0; raidID--) {
3296 if (raidPtrs[raidID]->valid == 0) {
3297 /* can use this one! */
3298 break;
3299 }
3300 }
3301 }
3302
3303 if (raidID < 0) {
3304 /* punt... */
3305 printf("Unable to auto configure this set!\n");
3306 printf("(Out of RAID devs!)\n");
3307 return(1);
3308 }
3309
3310 #if DEBUG
3311 printf("Configuring raid%d:\n",raidID);
3312 #endif
3313
3314 raidPtr = raidPtrs[raidID];
3315
3316 /* XXX all this stuff should be done SOMEWHERE ELSE! */
3317 raidPtr->raidid = raidID;
3318 raidPtr->openings = RAIDOUTSTANDING;
3319
3320 /* 3. Build the configuration structure */
3321 rf_create_configuration(cset->ac, config, raidPtr);
3322
3323 /* 4. Do the configuration */
3324 retcode = rf_Configure(raidPtr, config, cset->ac);
3325
3326 if (retcode == 0) {
3327
3328 raidinit(raidPtrs[raidID]);
3329
3330 rf_markalldirty(raidPtrs[raidID]);
3331 raidPtrs[raidID]->autoconfigure = 1; /* XXX do this here? */
3332 if (cset->ac->clabel->root_partition==1) {
3333 /* everything configured just fine. Make a note
3334 that this set is eligible to be root. */
3335 cset->rootable = 1;
3336 /* XXX do this here? */
3337 raidPtrs[raidID]->root_partition = 1;
3338 }
3339 }
3340
3341 /* 5. Cleanup */
3342 free(config, M_RAIDFRAME);
3343
3344 *unit = raidID;
3345 return(retcode);
3346 }
3347
3348 void
3349 rf_disk_unbusy(desc)
3350 RF_RaidAccessDesc_t *desc;
3351 {
3352 struct buf *bp;
3353
3354 bp = (struct buf *)desc->bp;
3355 disk_unbusy(&raid_softc[desc->raidPtr->raidid].sc_dkdev,
3356 (bp->b_bcount - bp->b_resid));
3357 }
3358