rf_netbsdkintf.c revision 1.107 1 /* $NetBSD: rf_netbsdkintf.c,v 1.107 2001/07/16 15:50:27 oster Exp $ */
2 /*-
3 * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
4 * All rights reserved.
5 *
6 * This code is derived from software contributed to The NetBSD Foundation
7 * by Greg Oster; Jason R. Thorpe.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed by the NetBSD
20 * Foundation, Inc. and its contributors.
21 * 4. Neither the name of The NetBSD Foundation nor the names of its
22 * contributors may be used to endorse or promote products derived
23 * from this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
26 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
36 */
37
38 /*
39 * Copyright (c) 1988 University of Utah.
40 * Copyright (c) 1990, 1993
41 * The Regents of the University of California. All rights reserved.
42 *
43 * This code is derived from software contributed to Berkeley by
44 * the Systems Programming Group of the University of Utah Computer
45 * Science Department.
46 *
47 * Redistribution and use in source and binary forms, with or without
48 * modification, are permitted provided that the following conditions
49 * are met:
50 * 1. Redistributions of source code must retain the above copyright
51 * notice, this list of conditions and the following disclaimer.
52 * 2. Redistributions in binary form must reproduce the above copyright
53 * notice, this list of conditions and the following disclaimer in the
54 * documentation and/or other materials provided with the distribution.
55 * 3. All advertising materials mentioning features or use of this software
56 * must display the following acknowledgement:
57 * This product includes software developed by the University of
58 * California, Berkeley and its contributors.
59 * 4. Neither the name of the University nor the names of its contributors
60 * may be used to endorse or promote products derived from this software
61 * without specific prior written permission.
62 *
63 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
64 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
65 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
66 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
67 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
68 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
69 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
70 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
71 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
72 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
73 * SUCH DAMAGE.
74 *
75 * from: Utah $Hdr: cd.c 1.6 90/11/28$
76 *
77 * @(#)cd.c 8.2 (Berkeley) 11/16/93
78 */
79
80
81
82
83 /*
84 * Copyright (c) 1995 Carnegie-Mellon University.
85 * All rights reserved.
86 *
87 * Authors: Mark Holland, Jim Zelenka
88 *
89 * Permission to use, copy, modify and distribute this software and
90 * its documentation is hereby granted, provided that both the copyright
91 * notice and this permission notice appear in all copies of the
92 * software, derivative works or modified versions, and any portions
93 * thereof, and that both notices appear in supporting documentation.
94 *
95 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
96 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
97 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
98 *
99 * Carnegie Mellon requests users of this software to return to
100 *
101 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
102 * School of Computer Science
103 * Carnegie Mellon University
104 * Pittsburgh PA 15213-3890
105 *
106 * any improvements or extensions that they make and grant Carnegie the
107 * rights to redistribute these changes.
108 */
109
110 /***********************************************************
111 *
112 * rf_kintf.c -- the kernel interface routines for RAIDframe
113 *
114 ***********************************************************/
115
116 #include <sys/errno.h>
117 #include <sys/param.h>
118 #include <sys/pool.h>
119 #include <sys/queue.h>
120 #include <sys/disk.h>
121 #include <sys/device.h>
122 #include <sys/stat.h>
123 #include <sys/ioctl.h>
124 #include <sys/fcntl.h>
125 #include <sys/systm.h>
126 #include <sys/namei.h>
127 #include <sys/vnode.h>
128 #include <sys/param.h>
129 #include <sys/types.h>
130 #include <machine/types.h>
131 #include <sys/disklabel.h>
132 #include <sys/conf.h>
133 #include <sys/lock.h>
134 #include <sys/buf.h>
135 #include <sys/user.h>
136 #include <sys/reboot.h>
137
138 #include "raid.h"
139 #include "opt_raid_autoconfig.h"
140 #include "rf_raid.h"
141 #include "rf_raidframe.h"
142 #include "rf_copyback.h"
143 #include "rf_dag.h"
144 #include "rf_dagflags.h"
145 #include "rf_desc.h"
146 #include "rf_diskqueue.h"
147 #include "rf_acctrace.h"
148 #include "rf_etimer.h"
149 #include "rf_general.h"
150 #include "rf_debugMem.h"
151 #include "rf_kintf.h"
152 #include "rf_options.h"
153 #include "rf_driver.h"
154 #include "rf_parityscan.h"
155 #include "rf_debugprint.h"
156 #include "rf_threadstuff.h"
157 #include "rf_configure.h"
158
159 int rf_kdebug_level = 0;
160
161 #ifdef DEBUG
162 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
163 #else /* DEBUG */
164 #define db1_printf(a) { }
165 #endif /* DEBUG */
166
167 static RF_Raid_t **raidPtrs; /* global raid device descriptors */
168
169 RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex)
170
171 static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a
172 * spare table */
173 static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from
174 * installation process */
175
176 /* prototypes */
177 static void KernelWakeupFunc(struct buf * bp);
178 static void InitBP(struct buf * bp, struct vnode *, unsigned rw_flag,
179 dev_t dev, RF_SectorNum_t startSect,
180 RF_SectorCount_t numSect, caddr_t buf,
181 void (*cbFunc) (struct buf *), void *cbArg,
182 int logBytesPerSector, struct proc * b_proc);
183 static void raidinit(RF_Raid_t *);
184
185 void raidattach(int);
186 int raidsize(dev_t);
187 int raidopen(dev_t, int, int, struct proc *);
188 int raidclose(dev_t, int, int, struct proc *);
189 int raidioctl(dev_t, u_long, caddr_t, int, struct proc *);
190 int raidwrite(dev_t, struct uio *, int);
191 int raidread(dev_t, struct uio *, int);
192 void raidstrategy(struct buf *);
193 int raiddump(dev_t, daddr_t, caddr_t, size_t);
194
195 /*
196 * Pilfered from ccd.c
197 */
198
199 struct raidbuf {
200 struct buf rf_buf; /* new I/O buf. MUST BE FIRST!!! */
201 struct buf *rf_obp; /* ptr. to original I/O buf */
202 int rf_flags; /* misc. flags */
203 RF_DiskQueueData_t *req;/* the request that this was part of.. */
204 };
205
206
207 #define RAIDGETBUF(rs) pool_get(&(rs)->sc_cbufpool, PR_NOWAIT)
208 #define RAIDPUTBUF(rs, cbp) pool_put(&(rs)->sc_cbufpool, cbp)
209
210 /* XXX Not sure if the following should be replacing the raidPtrs above,
211 or if it should be used in conjunction with that...
212 */
213
214 struct raid_softc {
215 int sc_flags; /* flags */
216 int sc_cflags; /* configuration flags */
217 size_t sc_size; /* size of the raid device */
218 char sc_xname[20]; /* XXX external name */
219 struct disk sc_dkdev; /* generic disk device info */
220 struct pool sc_cbufpool; /* component buffer pool */
221 struct buf_queue buf_queue; /* used for the device queue */
222 };
223 /* sc_flags */
224 #define RAIDF_INITED 0x01 /* unit has been initialized */
225 #define RAIDF_WLABEL 0x02 /* label area is writable */
226 #define RAIDF_LABELLING 0x04 /* unit is currently being labelled */
227 #define RAIDF_WANTED 0x40 /* someone is waiting to obtain a lock */
228 #define RAIDF_LOCKED 0x80 /* unit is locked */
229
230 #define raidunit(x) DISKUNIT(x)
231 int numraid = 0;
232
233 /*
234 * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
235 * Be aware that large numbers can allow the driver to consume a lot of
236 * kernel memory, especially on writes, and in degraded mode reads.
237 *
238 * For example: with a stripe width of 64 blocks (32k) and 5 disks,
239 * a single 64K write will typically require 64K for the old data,
240 * 64K for the old parity, and 64K for the new parity, for a total
241 * of 192K (if the parity buffer is not re-used immediately).
242 * Even it if is used immedately, that's still 128K, which when multiplied
243 * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
244 *
245 * Now in degraded mode, for example, a 64K read on the above setup may
246 * require data reconstruction, which will require *all* of the 4 remaining
247 * disks to participate -- 4 * 32K/disk == 128K again.
248 */
249
250 #ifndef RAIDOUTSTANDING
251 #define RAIDOUTSTANDING 6
252 #endif
253
254 #define RAIDLABELDEV(dev) \
255 (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
256
257 /* declared here, and made public, for the benefit of KVM stuff.. */
258 struct raid_softc *raid_softc;
259
260 static void raidgetdefaultlabel(RF_Raid_t *, struct raid_softc *,
261 struct disklabel *);
262 static void raidgetdisklabel(dev_t);
263 static void raidmakedisklabel(struct raid_softc *);
264
265 static int raidlock(struct raid_softc *);
266 static void raidunlock(struct raid_softc *);
267
268 static void rf_markalldirty(RF_Raid_t *);
269 void rf_mountroot_hook(struct device *);
270
271 struct device *raidrootdev;
272
273 void rf_ReconThread(struct rf_recon_req *);
274 /* XXX what I want is: */
275 /*void rf_ReconThread(RF_Raid_t *raidPtr); */
276 void rf_RewriteParityThread(RF_Raid_t *raidPtr);
277 void rf_CopybackThread(RF_Raid_t *raidPtr);
278 void rf_ReconstructInPlaceThread(struct rf_recon_req *);
279 void rf_buildroothack(void *);
280
281 RF_AutoConfig_t *rf_find_raid_components(void);
282 RF_ConfigSet_t *rf_create_auto_sets(RF_AutoConfig_t *);
283 static int rf_does_it_fit(RF_ConfigSet_t *,RF_AutoConfig_t *);
284 static int rf_reasonable_label(RF_ComponentLabel_t *);
285 void rf_create_configuration(RF_AutoConfig_t *,RF_Config_t *, RF_Raid_t *);
286 int rf_set_autoconfig(RF_Raid_t *, int);
287 int rf_set_rootpartition(RF_Raid_t *, int);
288 void rf_release_all_vps(RF_ConfigSet_t *);
289 void rf_cleanup_config_set(RF_ConfigSet_t *);
290 int rf_have_enough_components(RF_ConfigSet_t *);
291 int rf_auto_config_set(RF_ConfigSet_t *, int *);
292
293 static int raidautoconfig = 0; /* Debugging, mostly. Set to 0 to not
294 allow autoconfig to take place.
295 Note that this is overridden by having
296 RAID_AUTOCONFIG as an option in the
297 kernel config file. */
298
299 void
300 raidattach(num)
301 int num;
302 {
303 int raidID;
304 int i, rc;
305 RF_AutoConfig_t *ac_list; /* autoconfig list */
306 RF_ConfigSet_t *config_sets;
307
308 #ifdef DEBUG
309 printf("raidattach: Asked for %d units\n", num);
310 #endif
311
312 if (num <= 0) {
313 #ifdef DIAGNOSTIC
314 panic("raidattach: count <= 0");
315 #endif
316 return;
317 }
318 /* This is where all the initialization stuff gets done. */
319
320 numraid = num;
321
322 /* Make some space for requested number of units... */
323
324 RF_Calloc(raidPtrs, num, sizeof(RF_Raid_t *), (RF_Raid_t **));
325 if (raidPtrs == NULL) {
326 panic("raidPtrs is NULL!!\n");
327 }
328
329 rc = rf_mutex_init(&rf_sparet_wait_mutex);
330 if (rc) {
331 RF_PANIC();
332 }
333
334 rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
335
336 for (i = 0; i < num; i++)
337 raidPtrs[i] = NULL;
338 rc = rf_BootRaidframe();
339 if (rc == 0)
340 printf("Kernelized RAIDframe activated\n");
341 else
342 panic("Serious error booting RAID!!\n");
343
344 /* put together some datastructures like the CCD device does.. This
345 * lets us lock the device and what-not when it gets opened. */
346
347 raid_softc = (struct raid_softc *)
348 malloc(num * sizeof(struct raid_softc),
349 M_RAIDFRAME, M_NOWAIT);
350 if (raid_softc == NULL) {
351 printf("WARNING: no memory for RAIDframe driver\n");
352 return;
353 }
354
355 bzero(raid_softc, num * sizeof(struct raid_softc));
356
357 raidrootdev = (struct device *)malloc(num * sizeof(struct device),
358 M_RAIDFRAME, M_NOWAIT);
359 if (raidrootdev == NULL) {
360 panic("No memory for RAIDframe driver!!?!?!\n");
361 }
362
363 for (raidID = 0; raidID < num; raidID++) {
364 BUFQ_INIT(&raid_softc[raidID].buf_queue);
365
366 raidrootdev[raidID].dv_class = DV_DISK;
367 raidrootdev[raidID].dv_cfdata = NULL;
368 raidrootdev[raidID].dv_unit = raidID;
369 raidrootdev[raidID].dv_parent = NULL;
370 raidrootdev[raidID].dv_flags = 0;
371 sprintf(raidrootdev[raidID].dv_xname,"raid%d",raidID);
372
373 RF_Calloc(raidPtrs[raidID], 1, sizeof(RF_Raid_t),
374 (RF_Raid_t *));
375 if (raidPtrs[raidID] == NULL) {
376 printf("WARNING: raidPtrs[%d] is NULL\n", raidID);
377 numraid = raidID;
378 return;
379 }
380 }
381
382 #if RAID_AUTOCONFIG
383 raidautoconfig = 1;
384 #endif
385
386 if (raidautoconfig) {
387 /* 1. locate all RAID components on the system */
388
389 #if DEBUG
390 printf("Searching for raid components...\n");
391 #endif
392 ac_list = rf_find_raid_components();
393
394 /* 2. sort them into their respective sets */
395
396 config_sets = rf_create_auto_sets(ac_list);
397
398 /* 3. evaluate each set and configure the valid ones
399 This gets done in rf_buildroothack() */
400
401 /* schedule the creation of the thread to do the
402 "/ on RAID" stuff */
403
404 kthread_create(rf_buildroothack,config_sets);
405
406 #if 0
407 mountroothook_establish(rf_mountroot_hook, &raidrootdev[0]);
408 #endif
409 }
410
411 }
412
413 void
414 rf_buildroothack(arg)
415 void *arg;
416 {
417 RF_ConfigSet_t *config_sets = arg;
418 RF_ConfigSet_t *cset;
419 RF_ConfigSet_t *next_cset;
420 int retcode;
421 int raidID;
422 int rootID;
423 int num_root;
424
425 rootID = 0;
426 num_root = 0;
427 cset = config_sets;
428 while(cset != NULL ) {
429 next_cset = cset->next;
430 if (rf_have_enough_components(cset) &&
431 cset->ac->clabel->autoconfigure==1) {
432 retcode = rf_auto_config_set(cset,&raidID);
433 if (!retcode) {
434 if (cset->rootable) {
435 rootID = raidID;
436 num_root++;
437 }
438 } else {
439 /* The autoconfig didn't work :( */
440 #if DEBUG
441 printf("Autoconfig failed with code %d for raid%d\n", retcode, raidID);
442 #endif
443 rf_release_all_vps(cset);
444 }
445 } else {
446 /* we're not autoconfiguring this set...
447 release the associated resources */
448 rf_release_all_vps(cset);
449 }
450 /* cleanup */
451 rf_cleanup_config_set(cset);
452 cset = next_cset;
453 }
454 if (boothowto & RB_ASKNAME) {
455 /* We don't auto-config... */
456 } else {
457 /* They didn't ask, and we found something bootable... */
458
459 if (num_root == 1) {
460 booted_device = &raidrootdev[rootID];
461 } else if (num_root > 1) {
462 /* we can't guess.. require the user to answer... */
463 boothowto |= RB_ASKNAME;
464 }
465 }
466 }
467
468
469 int
470 raidsize(dev)
471 dev_t dev;
472 {
473 struct raid_softc *rs;
474 struct disklabel *lp;
475 int part, unit, omask, size;
476
477 unit = raidunit(dev);
478 if (unit >= numraid)
479 return (-1);
480 rs = &raid_softc[unit];
481
482 if ((rs->sc_flags & RAIDF_INITED) == 0)
483 return (-1);
484
485 part = DISKPART(dev);
486 omask = rs->sc_dkdev.dk_openmask & (1 << part);
487 lp = rs->sc_dkdev.dk_label;
488
489 if (omask == 0 && raidopen(dev, 0, S_IFBLK, curproc))
490 return (-1);
491
492 if (lp->d_partitions[part].p_fstype != FS_SWAP)
493 size = -1;
494 else
495 size = lp->d_partitions[part].p_size *
496 (lp->d_secsize / DEV_BSIZE);
497
498 if (omask == 0 && raidclose(dev, 0, S_IFBLK, curproc))
499 return (-1);
500
501 return (size);
502
503 }
504
505 int
506 raiddump(dev, blkno, va, size)
507 dev_t dev;
508 daddr_t blkno;
509 caddr_t va;
510 size_t size;
511 {
512 /* Not implemented. */
513 return ENXIO;
514 }
515 /* ARGSUSED */
516 int
517 raidopen(dev, flags, fmt, p)
518 dev_t dev;
519 int flags, fmt;
520 struct proc *p;
521 {
522 int unit = raidunit(dev);
523 struct raid_softc *rs;
524 struct disklabel *lp;
525 int part, pmask;
526 int error = 0;
527
528 if (unit >= numraid)
529 return (ENXIO);
530 rs = &raid_softc[unit];
531
532 if ((error = raidlock(rs)) != 0)
533 return (error);
534 lp = rs->sc_dkdev.dk_label;
535
536 part = DISKPART(dev);
537 pmask = (1 << part);
538
539 db1_printf(("Opening raid device number: %d partition: %d\n",
540 unit, part));
541
542
543 if ((rs->sc_flags & RAIDF_INITED) &&
544 (rs->sc_dkdev.dk_openmask == 0))
545 raidgetdisklabel(dev);
546
547 /* make sure that this partition exists */
548
549 if (part != RAW_PART) {
550 db1_printf(("Not a raw partition..\n"));
551 if (((rs->sc_flags & RAIDF_INITED) == 0) ||
552 ((part >= lp->d_npartitions) ||
553 (lp->d_partitions[part].p_fstype == FS_UNUSED))) {
554 error = ENXIO;
555 raidunlock(rs);
556 db1_printf(("Bailing out...\n"));
557 return (error);
558 }
559 }
560 /* Prevent this unit from being unconfigured while open. */
561 switch (fmt) {
562 case S_IFCHR:
563 rs->sc_dkdev.dk_copenmask |= pmask;
564 break;
565
566 case S_IFBLK:
567 rs->sc_dkdev.dk_bopenmask |= pmask;
568 break;
569 }
570
571 if ((rs->sc_dkdev.dk_openmask == 0) &&
572 ((rs->sc_flags & RAIDF_INITED) != 0)) {
573 /* First one... mark things as dirty... Note that we *MUST*
574 have done a configure before this. I DO NOT WANT TO BE
575 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
576 THAT THEY BELONG TOGETHER!!!!! */
577 /* XXX should check to see if we're only open for reading
578 here... If so, we needn't do this, but then need some
579 other way of keeping track of what's happened.. */
580
581 rf_markalldirty( raidPtrs[unit] );
582 }
583
584
585 rs->sc_dkdev.dk_openmask =
586 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
587
588 raidunlock(rs);
589
590 return (error);
591
592
593 }
594 /* ARGSUSED */
595 int
596 raidclose(dev, flags, fmt, p)
597 dev_t dev;
598 int flags, fmt;
599 struct proc *p;
600 {
601 int unit = raidunit(dev);
602 struct raid_softc *rs;
603 int error = 0;
604 int part;
605
606 if (unit >= numraid)
607 return (ENXIO);
608 rs = &raid_softc[unit];
609
610 if ((error = raidlock(rs)) != 0)
611 return (error);
612
613 part = DISKPART(dev);
614
615 /* ...that much closer to allowing unconfiguration... */
616 switch (fmt) {
617 case S_IFCHR:
618 rs->sc_dkdev.dk_copenmask &= ~(1 << part);
619 break;
620
621 case S_IFBLK:
622 rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
623 break;
624 }
625 rs->sc_dkdev.dk_openmask =
626 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
627
628 if ((rs->sc_dkdev.dk_openmask == 0) &&
629 ((rs->sc_flags & RAIDF_INITED) != 0)) {
630 /* Last one... device is not unconfigured yet.
631 Device shutdown has taken care of setting the
632 clean bits if RAIDF_INITED is not set
633 mark things as clean... */
634 #if 0
635 printf("Last one on raid%d. Updating status.\n",unit);
636 #endif
637 rf_update_component_labels(raidPtrs[unit],
638 RF_FINAL_COMPONENT_UPDATE);
639 if (doing_shutdown) {
640 /* last one, and we're going down, so
641 lights out for this RAID set too. */
642 error = rf_Shutdown(raidPtrs[unit]);
643 pool_destroy(&rs->sc_cbufpool);
644
645 /* It's no longer initialized... */
646 rs->sc_flags &= ~RAIDF_INITED;
647
648 /* Detach the disk. */
649 disk_detach(&rs->sc_dkdev);
650 }
651 }
652
653 raidunlock(rs);
654 return (0);
655
656 }
657
658 void
659 raidstrategy(bp)
660 struct buf *bp;
661 {
662 int s;
663
664 unsigned int raidID = raidunit(bp->b_dev);
665 RF_Raid_t *raidPtr;
666 struct raid_softc *rs = &raid_softc[raidID];
667 struct disklabel *lp;
668 int wlabel;
669
670 if ((rs->sc_flags & RAIDF_INITED) ==0) {
671 bp->b_error = ENXIO;
672 bp->b_flags |= B_ERROR;
673 bp->b_resid = bp->b_bcount;
674 biodone(bp);
675 return;
676 }
677 if (raidID >= numraid || !raidPtrs[raidID]) {
678 bp->b_error = ENODEV;
679 bp->b_flags |= B_ERROR;
680 bp->b_resid = bp->b_bcount;
681 biodone(bp);
682 return;
683 }
684 raidPtr = raidPtrs[raidID];
685 if (!raidPtr->valid) {
686 bp->b_error = ENODEV;
687 bp->b_flags |= B_ERROR;
688 bp->b_resid = bp->b_bcount;
689 biodone(bp);
690 return;
691 }
692 if (bp->b_bcount == 0) {
693 db1_printf(("b_bcount is zero..\n"));
694 biodone(bp);
695 return;
696 }
697 lp = rs->sc_dkdev.dk_label;
698
699 /*
700 * Do bounds checking and adjust transfer. If there's an
701 * error, the bounds check will flag that for us.
702 */
703
704 wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
705 if (DISKPART(bp->b_dev) != RAW_PART)
706 if (bounds_check_with_label(bp, lp, wlabel) <= 0) {
707 db1_printf(("Bounds check failed!!:%d %d\n",
708 (int) bp->b_blkno, (int) wlabel));
709 biodone(bp);
710 return;
711 }
712 s = splbio();
713
714 bp->b_resid = 0;
715
716 /* stuff it onto our queue */
717 BUFQ_INSERT_TAIL(&rs->buf_queue, bp);
718
719 raidstart(raidPtrs[raidID]);
720
721 splx(s);
722 }
723 /* ARGSUSED */
724 int
725 raidread(dev, uio, flags)
726 dev_t dev;
727 struct uio *uio;
728 int flags;
729 {
730 int unit = raidunit(dev);
731 struct raid_softc *rs;
732 int part;
733
734 if (unit >= numraid)
735 return (ENXIO);
736 rs = &raid_softc[unit];
737
738 if ((rs->sc_flags & RAIDF_INITED) == 0)
739 return (ENXIO);
740 part = DISKPART(dev);
741
742 db1_printf(("raidread: unit: %d partition: %d\n", unit, part));
743
744 return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
745
746 }
747 /* ARGSUSED */
748 int
749 raidwrite(dev, uio, flags)
750 dev_t dev;
751 struct uio *uio;
752 int flags;
753 {
754 int unit = raidunit(dev);
755 struct raid_softc *rs;
756
757 if (unit >= numraid)
758 return (ENXIO);
759 rs = &raid_softc[unit];
760
761 if ((rs->sc_flags & RAIDF_INITED) == 0)
762 return (ENXIO);
763 db1_printf(("raidwrite\n"));
764 return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
765
766 }
767
768 int
769 raidioctl(dev, cmd, data, flag, p)
770 dev_t dev;
771 u_long cmd;
772 caddr_t data;
773 int flag;
774 struct proc *p;
775 {
776 int unit = raidunit(dev);
777 int error = 0;
778 int part, pmask;
779 struct raid_softc *rs;
780 RF_Config_t *k_cfg, *u_cfg;
781 RF_Raid_t *raidPtr;
782 RF_RaidDisk_t *diskPtr;
783 RF_AccTotals_t *totals;
784 RF_DeviceConfig_t *d_cfg, **ucfgp;
785 u_char *specific_buf;
786 int retcode = 0;
787 int row;
788 int column;
789 struct rf_recon_req *rrcopy, *rr;
790 RF_ComponentLabel_t *clabel;
791 RF_ComponentLabel_t ci_label;
792 RF_ComponentLabel_t **clabel_ptr;
793 RF_SingleComponent_t *sparePtr,*componentPtr;
794 RF_SingleComponent_t hot_spare;
795 RF_SingleComponent_t component;
796 RF_ProgressInfo_t progressInfo, **progressInfoPtr;
797 int i, j, d;
798 #ifdef __HAVE_OLD_DISKLABEL
799 struct disklabel newlabel;
800 #endif
801
802 if (unit >= numraid)
803 return (ENXIO);
804 rs = &raid_softc[unit];
805 raidPtr = raidPtrs[unit];
806
807 db1_printf(("raidioctl: %d %d %d %d\n", (int) dev,
808 (int) DISKPART(dev), (int) unit, (int) cmd));
809
810 /* Must be open for writes for these commands... */
811 switch (cmd) {
812 case DIOCSDINFO:
813 case DIOCWDINFO:
814 #ifdef __HAVE_OLD_DISKLABEL
815 case ODIOCWDINFO:
816 case ODIOCSDINFO:
817 #endif
818 case DIOCWLABEL:
819 if ((flag & FWRITE) == 0)
820 return (EBADF);
821 }
822
823 /* Must be initialized for these... */
824 switch (cmd) {
825 case DIOCGDINFO:
826 case DIOCSDINFO:
827 case DIOCWDINFO:
828 #ifdef __HAVE_OLD_DISKLABEL
829 case ODIOCGDINFO:
830 case ODIOCWDINFO:
831 case ODIOCSDINFO:
832 case ODIOCGDEFLABEL:
833 #endif
834 case DIOCGPART:
835 case DIOCWLABEL:
836 case DIOCGDEFLABEL:
837 case RAIDFRAME_SHUTDOWN:
838 case RAIDFRAME_REWRITEPARITY:
839 case RAIDFRAME_GET_INFO:
840 case RAIDFRAME_RESET_ACCTOTALS:
841 case RAIDFRAME_GET_ACCTOTALS:
842 case RAIDFRAME_KEEP_ACCTOTALS:
843 case RAIDFRAME_GET_SIZE:
844 case RAIDFRAME_FAIL_DISK:
845 case RAIDFRAME_COPYBACK:
846 case RAIDFRAME_CHECK_RECON_STATUS:
847 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
848 case RAIDFRAME_GET_COMPONENT_LABEL:
849 case RAIDFRAME_SET_COMPONENT_LABEL:
850 case RAIDFRAME_ADD_HOT_SPARE:
851 case RAIDFRAME_REMOVE_HOT_SPARE:
852 case RAIDFRAME_INIT_LABELS:
853 case RAIDFRAME_REBUILD_IN_PLACE:
854 case RAIDFRAME_CHECK_PARITY:
855 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
856 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
857 case RAIDFRAME_CHECK_COPYBACK_STATUS:
858 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
859 case RAIDFRAME_SET_AUTOCONFIG:
860 case RAIDFRAME_SET_ROOT:
861 case RAIDFRAME_DELETE_COMPONENT:
862 case RAIDFRAME_INCORPORATE_HOT_SPARE:
863 if ((rs->sc_flags & RAIDF_INITED) == 0)
864 return (ENXIO);
865 }
866
867 switch (cmd) {
868
869 /* configure the system */
870 case RAIDFRAME_CONFIGURE:
871
872 if (raidPtr->valid) {
873 /* There is a valid RAID set running on this unit! */
874 printf("raid%d: Device already configured!\n",unit);
875 return(EINVAL);
876 }
877
878 /* copy-in the configuration information */
879 /* data points to a pointer to the configuration structure */
880
881 u_cfg = *((RF_Config_t **) data);
882 RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
883 if (k_cfg == NULL) {
884 return (ENOMEM);
885 }
886 retcode = copyin((caddr_t) u_cfg, (caddr_t) k_cfg,
887 sizeof(RF_Config_t));
888 if (retcode) {
889 RF_Free(k_cfg, sizeof(RF_Config_t));
890 db1_printf(("rf_ioctl: retcode=%d copyin.1\n",
891 retcode));
892 return (retcode);
893 }
894 /* allocate a buffer for the layout-specific data, and copy it
895 * in */
896 if (k_cfg->layoutSpecificSize) {
897 if (k_cfg->layoutSpecificSize > 10000) {
898 /* sanity check */
899 RF_Free(k_cfg, sizeof(RF_Config_t));
900 return (EINVAL);
901 }
902 RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
903 (u_char *));
904 if (specific_buf == NULL) {
905 RF_Free(k_cfg, sizeof(RF_Config_t));
906 return (ENOMEM);
907 }
908 retcode = copyin(k_cfg->layoutSpecific,
909 (caddr_t) specific_buf,
910 k_cfg->layoutSpecificSize);
911 if (retcode) {
912 RF_Free(k_cfg, sizeof(RF_Config_t));
913 RF_Free(specific_buf,
914 k_cfg->layoutSpecificSize);
915 db1_printf(("rf_ioctl: retcode=%d copyin.2\n",
916 retcode));
917 return (retcode);
918 }
919 } else
920 specific_buf = NULL;
921 k_cfg->layoutSpecific = specific_buf;
922
923 /* should do some kind of sanity check on the configuration.
924 * Store the sum of all the bytes in the last byte? */
925
926 /* configure the system */
927
928 /*
929 * Clear the entire RAID descriptor, just to make sure
930 * there is no stale data left in the case of a
931 * reconfiguration
932 */
933 bzero((char *) raidPtr, sizeof(RF_Raid_t));
934 raidPtr->raidid = unit;
935
936 retcode = rf_Configure(raidPtr, k_cfg, NULL);
937
938 if (retcode == 0) {
939
940 /* allow this many simultaneous IO's to
941 this RAID device */
942 raidPtr->openings = RAIDOUTSTANDING;
943
944 raidinit(raidPtr);
945 rf_markalldirty(raidPtr);
946 }
947 /* free the buffers. No return code here. */
948 if (k_cfg->layoutSpecificSize) {
949 RF_Free(specific_buf, k_cfg->layoutSpecificSize);
950 }
951 RF_Free(k_cfg, sizeof(RF_Config_t));
952
953 return (retcode);
954
955 /* shutdown the system */
956 case RAIDFRAME_SHUTDOWN:
957
958 if ((error = raidlock(rs)) != 0)
959 return (error);
960
961 /*
962 * If somebody has a partition mounted, we shouldn't
963 * shutdown.
964 */
965
966 part = DISKPART(dev);
967 pmask = (1 << part);
968 if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
969 ((rs->sc_dkdev.dk_bopenmask & pmask) &&
970 (rs->sc_dkdev.dk_copenmask & pmask))) {
971 raidunlock(rs);
972 return (EBUSY);
973 }
974
975 retcode = rf_Shutdown(raidPtr);
976
977 pool_destroy(&rs->sc_cbufpool);
978
979 /* It's no longer initialized... */
980 rs->sc_flags &= ~RAIDF_INITED;
981
982 /* Detach the disk. */
983 disk_detach(&rs->sc_dkdev);
984
985 raidunlock(rs);
986
987 return (retcode);
988 case RAIDFRAME_GET_COMPONENT_LABEL:
989 clabel_ptr = (RF_ComponentLabel_t **) data;
990 /* need to read the component label for the disk indicated
991 by row,column in clabel */
992
993 /* For practice, let's get it directly fromdisk, rather
994 than from the in-core copy */
995 RF_Malloc( clabel, sizeof( RF_ComponentLabel_t ),
996 (RF_ComponentLabel_t *));
997 if (clabel == NULL)
998 return (ENOMEM);
999
1000 bzero((char *) clabel, sizeof(RF_ComponentLabel_t));
1001
1002 retcode = copyin( *clabel_ptr, clabel,
1003 sizeof(RF_ComponentLabel_t));
1004
1005 if (retcode) {
1006 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
1007 return(retcode);
1008 }
1009
1010 row = clabel->row;
1011 column = clabel->column;
1012
1013 if ((row < 0) || (row >= raidPtr->numRow) ||
1014 (column < 0) || (column >= raidPtr->numCol +
1015 raidPtr->numSpare)) {
1016 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
1017 return(EINVAL);
1018 }
1019
1020 raidread_component_label(raidPtr->Disks[row][column].dev,
1021 raidPtr->raid_cinfo[row][column].ci_vp,
1022 clabel );
1023
1024 retcode = copyout((caddr_t) clabel,
1025 (caddr_t) *clabel_ptr,
1026 sizeof(RF_ComponentLabel_t));
1027 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
1028 return (retcode);
1029
1030 case RAIDFRAME_SET_COMPONENT_LABEL:
1031 clabel = (RF_ComponentLabel_t *) data;
1032
1033 /* XXX check the label for valid stuff... */
1034 /* Note that some things *should not* get modified --
1035 the user should be re-initing the labels instead of
1036 trying to patch things.
1037 */
1038
1039 printf("Got component label:\n");
1040 printf("Version: %d\n",clabel->version);
1041 printf("Serial Number: %d\n",clabel->serial_number);
1042 printf("Mod counter: %d\n",clabel->mod_counter);
1043 printf("Row: %d\n", clabel->row);
1044 printf("Column: %d\n", clabel->column);
1045 printf("Num Rows: %d\n", clabel->num_rows);
1046 printf("Num Columns: %d\n", clabel->num_columns);
1047 printf("Clean: %d\n", clabel->clean);
1048 printf("Status: %d\n", clabel->status);
1049
1050 row = clabel->row;
1051 column = clabel->column;
1052
1053 if ((row < 0) || (row >= raidPtr->numRow) ||
1054 (column < 0) || (column >= raidPtr->numCol)) {
1055 return(EINVAL);
1056 }
1057
1058 /* XXX this isn't allowed to do anything for now :-) */
1059
1060 /* XXX and before it is, we need to fill in the rest
1061 of the fields!?!?!?! */
1062 #if 0
1063 raidwrite_component_label(
1064 raidPtr->Disks[row][column].dev,
1065 raidPtr->raid_cinfo[row][column].ci_vp,
1066 clabel );
1067 #endif
1068 return (0);
1069
1070 case RAIDFRAME_INIT_LABELS:
1071 clabel = (RF_ComponentLabel_t *) data;
1072 /*
1073 we only want the serial number from
1074 the above. We get all the rest of the information
1075 from the config that was used to create this RAID
1076 set.
1077 */
1078
1079 raidPtr->serial_number = clabel->serial_number;
1080
1081 raid_init_component_label(raidPtr, &ci_label);
1082 ci_label.serial_number = clabel->serial_number;
1083
1084 for(row=0;row<raidPtr->numRow;row++) {
1085 ci_label.row = row;
1086 for(column=0;column<raidPtr->numCol;column++) {
1087 diskPtr = &raidPtr->Disks[row][column];
1088 if (!RF_DEAD_DISK(diskPtr->status)) {
1089 ci_label.partitionSize = diskPtr->partitionSize;
1090 ci_label.column = column;
1091 raidwrite_component_label(
1092 raidPtr->Disks[row][column].dev,
1093 raidPtr->raid_cinfo[row][column].ci_vp,
1094 &ci_label );
1095 }
1096 }
1097 }
1098
1099 return (retcode);
1100 case RAIDFRAME_SET_AUTOCONFIG:
1101 d = rf_set_autoconfig(raidPtr, *(int *) data);
1102 printf("New autoconfig value is: %d\n", d);
1103 *(int *) data = d;
1104 return (retcode);
1105
1106 case RAIDFRAME_SET_ROOT:
1107 d = rf_set_rootpartition(raidPtr, *(int *) data);
1108 printf("New rootpartition value is: %d\n", d);
1109 *(int *) data = d;
1110 return (retcode);
1111
1112 /* initialize all parity */
1113 case RAIDFRAME_REWRITEPARITY:
1114
1115 if (raidPtr->Layout.map->faultsTolerated == 0) {
1116 /* Parity for RAID 0 is trivially correct */
1117 raidPtr->parity_good = RF_RAID_CLEAN;
1118 return(0);
1119 }
1120
1121 if (raidPtr->parity_rewrite_in_progress == 1) {
1122 /* Re-write is already in progress! */
1123 return(EINVAL);
1124 }
1125
1126 retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
1127 rf_RewriteParityThread,
1128 raidPtr,"raid_parity");
1129 return (retcode);
1130
1131
1132 case RAIDFRAME_ADD_HOT_SPARE:
1133 sparePtr = (RF_SingleComponent_t *) data;
1134 memcpy( &hot_spare, sparePtr, sizeof(RF_SingleComponent_t));
1135 retcode = rf_add_hot_spare(raidPtr, &hot_spare);
1136 return(retcode);
1137
1138 case RAIDFRAME_REMOVE_HOT_SPARE:
1139 return(retcode);
1140
1141 case RAIDFRAME_DELETE_COMPONENT:
1142 componentPtr = (RF_SingleComponent_t *)data;
1143 memcpy( &component, componentPtr,
1144 sizeof(RF_SingleComponent_t));
1145 retcode = rf_delete_component(raidPtr, &component);
1146 return(retcode);
1147
1148 case RAIDFRAME_INCORPORATE_HOT_SPARE:
1149 componentPtr = (RF_SingleComponent_t *)data;
1150 memcpy( &component, componentPtr,
1151 sizeof(RF_SingleComponent_t));
1152 retcode = rf_incorporate_hot_spare(raidPtr, &component);
1153 return(retcode);
1154
1155 case RAIDFRAME_REBUILD_IN_PLACE:
1156
1157 if (raidPtr->Layout.map->faultsTolerated == 0) {
1158 /* Can't do this on a RAID 0!! */
1159 return(EINVAL);
1160 }
1161
1162 if (raidPtr->recon_in_progress == 1) {
1163 /* a reconstruct is already in progress! */
1164 return(EINVAL);
1165 }
1166
1167 componentPtr = (RF_SingleComponent_t *) data;
1168 memcpy( &component, componentPtr,
1169 sizeof(RF_SingleComponent_t));
1170 row = component.row;
1171 column = component.column;
1172 printf("Rebuild: %d %d\n",row, column);
1173 if ((row < 0) || (row >= raidPtr->numRow) ||
1174 (column < 0) || (column >= raidPtr->numCol)) {
1175 return(EINVAL);
1176 }
1177
1178 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1179 if (rrcopy == NULL)
1180 return(ENOMEM);
1181
1182 rrcopy->raidPtr = (void *) raidPtr;
1183 rrcopy->row = row;
1184 rrcopy->col = column;
1185
1186 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1187 rf_ReconstructInPlaceThread,
1188 rrcopy,"raid_reconip");
1189 return(retcode);
1190
1191 case RAIDFRAME_GET_INFO:
1192 if (!raidPtr->valid)
1193 return (ENODEV);
1194 ucfgp = (RF_DeviceConfig_t **) data;
1195 RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t),
1196 (RF_DeviceConfig_t *));
1197 if (d_cfg == NULL)
1198 return (ENOMEM);
1199 bzero((char *) d_cfg, sizeof(RF_DeviceConfig_t));
1200 d_cfg->rows = raidPtr->numRow;
1201 d_cfg->cols = raidPtr->numCol;
1202 d_cfg->ndevs = raidPtr->numRow * raidPtr->numCol;
1203 if (d_cfg->ndevs >= RF_MAX_DISKS) {
1204 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1205 return (ENOMEM);
1206 }
1207 d_cfg->nspares = raidPtr->numSpare;
1208 if (d_cfg->nspares >= RF_MAX_DISKS) {
1209 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1210 return (ENOMEM);
1211 }
1212 d_cfg->maxqdepth = raidPtr->maxQueueDepth;
1213 d = 0;
1214 for (i = 0; i < d_cfg->rows; i++) {
1215 for (j = 0; j < d_cfg->cols; j++) {
1216 d_cfg->devs[d] = raidPtr->Disks[i][j];
1217 d++;
1218 }
1219 }
1220 for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) {
1221 d_cfg->spares[i] = raidPtr->Disks[0][j];
1222 }
1223 retcode = copyout((caddr_t) d_cfg, (caddr_t) * ucfgp,
1224 sizeof(RF_DeviceConfig_t));
1225 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1226
1227 return (retcode);
1228
1229 case RAIDFRAME_CHECK_PARITY:
1230 *(int *) data = raidPtr->parity_good;
1231 return (0);
1232
1233 case RAIDFRAME_RESET_ACCTOTALS:
1234 bzero(&raidPtr->acc_totals, sizeof(raidPtr->acc_totals));
1235 return (0);
1236
1237 case RAIDFRAME_GET_ACCTOTALS:
1238 totals = (RF_AccTotals_t *) data;
1239 *totals = raidPtr->acc_totals;
1240 return (0);
1241
1242 case RAIDFRAME_KEEP_ACCTOTALS:
1243 raidPtr->keep_acc_totals = *(int *)data;
1244 return (0);
1245
1246 case RAIDFRAME_GET_SIZE:
1247 *(int *) data = raidPtr->totalSectors;
1248 return (0);
1249
1250 /* fail a disk & optionally start reconstruction */
1251 case RAIDFRAME_FAIL_DISK:
1252
1253 if (raidPtr->Layout.map->faultsTolerated == 0) {
1254 /* Can't do this on a RAID 0!! */
1255 return(EINVAL);
1256 }
1257
1258 rr = (struct rf_recon_req *) data;
1259
1260 if (rr->row < 0 || rr->row >= raidPtr->numRow
1261 || rr->col < 0 || rr->col >= raidPtr->numCol)
1262 return (EINVAL);
1263
1264 printf("raid%d: Failing the disk: row: %d col: %d\n",
1265 unit, rr->row, rr->col);
1266
1267 /* make a copy of the recon request so that we don't rely on
1268 * the user's buffer */
1269 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1270 if (rrcopy == NULL)
1271 return(ENOMEM);
1272 bcopy(rr, rrcopy, sizeof(*rr));
1273 rrcopy->raidPtr = (void *) raidPtr;
1274
1275 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1276 rf_ReconThread,
1277 rrcopy,"raid_recon");
1278 return (0);
1279
1280 /* invoke a copyback operation after recon on whatever disk
1281 * needs it, if any */
1282 case RAIDFRAME_COPYBACK:
1283
1284 if (raidPtr->Layout.map->faultsTolerated == 0) {
1285 /* This makes no sense on a RAID 0!! */
1286 return(EINVAL);
1287 }
1288
1289 if (raidPtr->copyback_in_progress == 1) {
1290 /* Copyback is already in progress! */
1291 return(EINVAL);
1292 }
1293
1294 retcode = RF_CREATE_THREAD(raidPtr->copyback_thread,
1295 rf_CopybackThread,
1296 raidPtr,"raid_copyback");
1297 return (retcode);
1298
1299 /* return the percentage completion of reconstruction */
1300 case RAIDFRAME_CHECK_RECON_STATUS:
1301 if (raidPtr->Layout.map->faultsTolerated == 0) {
1302 /* This makes no sense on a RAID 0, so tell the
1303 user it's done. */
1304 *(int *) data = 100;
1305 return(0);
1306 }
1307 row = 0; /* XXX we only consider a single row... */
1308 if (raidPtr->status[row] != rf_rs_reconstructing)
1309 *(int *) data = 100;
1310 else
1311 *(int *) data = raidPtr->reconControl[row]->percentComplete;
1312 return (0);
1313 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
1314 progressInfoPtr = (RF_ProgressInfo_t **) data;
1315 row = 0; /* XXX we only consider a single row... */
1316 if (raidPtr->status[row] != rf_rs_reconstructing) {
1317 progressInfo.remaining = 0;
1318 progressInfo.completed = 100;
1319 progressInfo.total = 100;
1320 } else {
1321 progressInfo.total =
1322 raidPtr->reconControl[row]->numRUsTotal;
1323 progressInfo.completed =
1324 raidPtr->reconControl[row]->numRUsComplete;
1325 progressInfo.remaining = progressInfo.total -
1326 progressInfo.completed;
1327 }
1328 retcode = copyout((caddr_t) &progressInfo,
1329 (caddr_t) *progressInfoPtr,
1330 sizeof(RF_ProgressInfo_t));
1331 return (retcode);
1332
1333 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1334 if (raidPtr->Layout.map->faultsTolerated == 0) {
1335 /* This makes no sense on a RAID 0, so tell the
1336 user it's done. */
1337 *(int *) data = 100;
1338 return(0);
1339 }
1340 if (raidPtr->parity_rewrite_in_progress == 1) {
1341 *(int *) data = 100 *
1342 raidPtr->parity_rewrite_stripes_done /
1343 raidPtr->Layout.numStripe;
1344 } else {
1345 *(int *) data = 100;
1346 }
1347 return (0);
1348
1349 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
1350 progressInfoPtr = (RF_ProgressInfo_t **) data;
1351 if (raidPtr->parity_rewrite_in_progress == 1) {
1352 progressInfo.total = raidPtr->Layout.numStripe;
1353 progressInfo.completed =
1354 raidPtr->parity_rewrite_stripes_done;
1355 progressInfo.remaining = progressInfo.total -
1356 progressInfo.completed;
1357 } else {
1358 progressInfo.remaining = 0;
1359 progressInfo.completed = 100;
1360 progressInfo.total = 100;
1361 }
1362 retcode = copyout((caddr_t) &progressInfo,
1363 (caddr_t) *progressInfoPtr,
1364 sizeof(RF_ProgressInfo_t));
1365 return (retcode);
1366
1367 case RAIDFRAME_CHECK_COPYBACK_STATUS:
1368 if (raidPtr->Layout.map->faultsTolerated == 0) {
1369 /* This makes no sense on a RAID 0 */
1370 *(int *) data = 100;
1371 return(0);
1372 }
1373 if (raidPtr->copyback_in_progress == 1) {
1374 *(int *) data = 100 * raidPtr->copyback_stripes_done /
1375 raidPtr->Layout.numStripe;
1376 } else {
1377 *(int *) data = 100;
1378 }
1379 return (0);
1380
1381 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
1382 progressInfoPtr = (RF_ProgressInfo_t **) data;
1383 if (raidPtr->copyback_in_progress == 1) {
1384 progressInfo.total = raidPtr->Layout.numStripe;
1385 progressInfo.completed =
1386 raidPtr->copyback_stripes_done;
1387 progressInfo.remaining = progressInfo.total -
1388 progressInfo.completed;
1389 } else {
1390 progressInfo.remaining = 0;
1391 progressInfo.completed = 100;
1392 progressInfo.total = 100;
1393 }
1394 retcode = copyout((caddr_t) &progressInfo,
1395 (caddr_t) *progressInfoPtr,
1396 sizeof(RF_ProgressInfo_t));
1397 return (retcode);
1398
1399 /* the sparetable daemon calls this to wait for the kernel to
1400 * need a spare table. this ioctl does not return until a
1401 * spare table is needed. XXX -- calling mpsleep here in the
1402 * ioctl code is almost certainly wrong and evil. -- XXX XXX
1403 * -- I should either compute the spare table in the kernel,
1404 * or have a different -- XXX XXX -- interface (a different
1405 * character device) for delivering the table -- XXX */
1406 #if 0
1407 case RAIDFRAME_SPARET_WAIT:
1408 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1409 while (!rf_sparet_wait_queue)
1410 mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE);
1411 waitreq = rf_sparet_wait_queue;
1412 rf_sparet_wait_queue = rf_sparet_wait_queue->next;
1413 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1414
1415 /* structure assignment */
1416 *((RF_SparetWait_t *) data) = *waitreq;
1417
1418 RF_Free(waitreq, sizeof(*waitreq));
1419 return (0);
1420
1421 /* wakes up a process waiting on SPARET_WAIT and puts an error
1422 * code in it that will cause the dameon to exit */
1423 case RAIDFRAME_ABORT_SPARET_WAIT:
1424 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1425 waitreq->fcol = -1;
1426 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1427 waitreq->next = rf_sparet_wait_queue;
1428 rf_sparet_wait_queue = waitreq;
1429 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1430 wakeup(&rf_sparet_wait_queue);
1431 return (0);
1432
1433 /* used by the spare table daemon to deliver a spare table
1434 * into the kernel */
1435 case RAIDFRAME_SEND_SPARET:
1436
1437 /* install the spare table */
1438 retcode = rf_SetSpareTable(raidPtr, *(void **) data);
1439
1440 /* respond to the requestor. the return status of the spare
1441 * table installation is passed in the "fcol" field */
1442 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1443 waitreq->fcol = retcode;
1444 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1445 waitreq->next = rf_sparet_resp_queue;
1446 rf_sparet_resp_queue = waitreq;
1447 wakeup(&rf_sparet_resp_queue);
1448 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1449
1450 return (retcode);
1451 #endif
1452
1453 default:
1454 break; /* fall through to the os-specific code below */
1455
1456 }
1457
1458 if (!raidPtr->valid)
1459 return (EINVAL);
1460
1461 /*
1462 * Add support for "regular" device ioctls here.
1463 */
1464
1465 switch (cmd) {
1466 case DIOCGDINFO:
1467 *(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
1468 break;
1469 #ifdef __HAVE_OLD_DISKLABEL
1470 case ODIOCGDINFO:
1471 newlabel = *(rs->sc_dkdev.dk_label);
1472 if (newlabel.d_npartitions > OLDMAXPARTITIONS)
1473 return ENOTTY;
1474 memcpy(data, &newlabel, sizeof (struct olddisklabel));
1475 break;
1476 #endif
1477
1478 case DIOCGPART:
1479 ((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
1480 ((struct partinfo *) data)->part =
1481 &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
1482 break;
1483
1484 case DIOCWDINFO:
1485 case DIOCSDINFO:
1486 #ifdef __HAVE_OLD_DISKLABEL
1487 case ODIOCWDINFO:
1488 case ODIOCSDINFO:
1489 #endif
1490 {
1491 struct disklabel *lp;
1492 #ifdef __HAVE_OLD_DISKLABEL
1493 if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) {
1494 memset(&newlabel, 0, sizeof newlabel);
1495 memcpy(&newlabel, data, sizeof (struct olddisklabel));
1496 lp = &newlabel;
1497 } else
1498 #endif
1499 lp = (struct disklabel *)data;
1500
1501 if ((error = raidlock(rs)) != 0)
1502 return (error);
1503
1504 rs->sc_flags |= RAIDF_LABELLING;
1505
1506 error = setdisklabel(rs->sc_dkdev.dk_label,
1507 lp, 0, rs->sc_dkdev.dk_cpulabel);
1508 if (error == 0) {
1509 if (cmd == DIOCWDINFO
1510 #ifdef __HAVE_OLD_DISKLABEL
1511 || cmd == ODIOCWDINFO
1512 #endif
1513 )
1514 error = writedisklabel(RAIDLABELDEV(dev),
1515 raidstrategy, rs->sc_dkdev.dk_label,
1516 rs->sc_dkdev.dk_cpulabel);
1517 }
1518 rs->sc_flags &= ~RAIDF_LABELLING;
1519
1520 raidunlock(rs);
1521
1522 if (error)
1523 return (error);
1524 break;
1525 }
1526
1527 case DIOCWLABEL:
1528 if (*(int *) data != 0)
1529 rs->sc_flags |= RAIDF_WLABEL;
1530 else
1531 rs->sc_flags &= ~RAIDF_WLABEL;
1532 break;
1533
1534 case DIOCGDEFLABEL:
1535 raidgetdefaultlabel(raidPtr, rs, (struct disklabel *) data);
1536 break;
1537
1538 #ifdef __HAVE_OLD_DISKLABEL
1539 case ODIOCGDEFLABEL:
1540 raidgetdefaultlabel(raidPtr, rs, &newlabel);
1541 if (newlabel.d_npartitions > OLDMAXPARTITIONS)
1542 return ENOTTY;
1543 memcpy(data, &newlabel, sizeof (struct olddisklabel));
1544 break;
1545 #endif
1546
1547 default:
1548 retcode = ENOTTY;
1549 }
1550 return (retcode);
1551
1552 }
1553
1554
1555 /* raidinit -- complete the rest of the initialization for the
1556 RAIDframe device. */
1557
1558
1559 static void
1560 raidinit(raidPtr)
1561 RF_Raid_t *raidPtr;
1562 {
1563 struct raid_softc *rs;
1564 int unit;
1565
1566 unit = raidPtr->raidid;
1567
1568 rs = &raid_softc[unit];
1569 pool_init(&rs->sc_cbufpool, sizeof(struct raidbuf), 0,
1570 0, 0, "raidpl", 0, NULL, NULL, M_RAIDFRAME);
1571
1572
1573 /* XXX should check return code first... */
1574 rs->sc_flags |= RAIDF_INITED;
1575
1576 sprintf(rs->sc_xname, "raid%d", unit); /* XXX doesn't check bounds. */
1577
1578 rs->sc_dkdev.dk_name = rs->sc_xname;
1579
1580 /* disk_attach actually creates space for the CPU disklabel, among
1581 * other things, so it's critical to call this *BEFORE* we try putzing
1582 * with disklabels. */
1583
1584 disk_attach(&rs->sc_dkdev);
1585
1586 /* XXX There may be a weird interaction here between this, and
1587 * protectedSectors, as used in RAIDframe. */
1588
1589 rs->sc_size = raidPtr->totalSectors;
1590
1591 }
1592
1593 /* wake up the daemon & tell it to get us a spare table
1594 * XXX
1595 * the entries in the queues should be tagged with the raidPtr
1596 * so that in the extremely rare case that two recons happen at once,
1597 * we know for which device were requesting a spare table
1598 * XXX
1599 *
1600 * XXX This code is not currently used. GO
1601 */
1602 int
1603 rf_GetSpareTableFromDaemon(req)
1604 RF_SparetWait_t *req;
1605 {
1606 int retcode;
1607
1608 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1609 req->next = rf_sparet_wait_queue;
1610 rf_sparet_wait_queue = req;
1611 wakeup(&rf_sparet_wait_queue);
1612
1613 /* mpsleep unlocks the mutex */
1614 while (!rf_sparet_resp_queue) {
1615 tsleep(&rf_sparet_resp_queue, PRIBIO,
1616 "raidframe getsparetable", 0);
1617 }
1618 req = rf_sparet_resp_queue;
1619 rf_sparet_resp_queue = req->next;
1620 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1621
1622 retcode = req->fcol;
1623 RF_Free(req, sizeof(*req)); /* this is not the same req as we
1624 * alloc'd */
1625 return (retcode);
1626 }
1627
1628 /* a wrapper around rf_DoAccess that extracts appropriate info from the
1629 * bp & passes it down.
1630 * any calls originating in the kernel must use non-blocking I/O
1631 * do some extra sanity checking to return "appropriate" error values for
1632 * certain conditions (to make some standard utilities work)
1633 *
1634 * Formerly known as: rf_DoAccessKernel
1635 */
1636 void
1637 raidstart(raidPtr)
1638 RF_Raid_t *raidPtr;
1639 {
1640 RF_SectorCount_t num_blocks, pb, sum;
1641 RF_RaidAddr_t raid_addr;
1642 int retcode;
1643 struct partition *pp;
1644 daddr_t blocknum;
1645 int unit;
1646 struct raid_softc *rs;
1647 int do_async;
1648 struct buf *bp;
1649
1650 unit = raidPtr->raidid;
1651 rs = &raid_softc[unit];
1652
1653 /* quick check to see if anything has died recently */
1654 RF_LOCK_MUTEX(raidPtr->mutex);
1655 if (raidPtr->numNewFailures > 0) {
1656 rf_update_component_labels(raidPtr,
1657 RF_NORMAL_COMPONENT_UPDATE);
1658 raidPtr->numNewFailures--;
1659 }
1660 RF_UNLOCK_MUTEX(raidPtr->mutex);
1661
1662 /* Check to see if we're at the limit... */
1663 RF_LOCK_MUTEX(raidPtr->mutex);
1664 while (raidPtr->openings > 0) {
1665 RF_UNLOCK_MUTEX(raidPtr->mutex);
1666
1667 /* get the next item, if any, from the queue */
1668 if ((bp = BUFQ_FIRST(&rs->buf_queue)) == NULL) {
1669 /* nothing more to do */
1670 return;
1671 }
1672 BUFQ_REMOVE(&rs->buf_queue, bp);
1673
1674 /* Ok, for the bp we have here, bp->b_blkno is relative to the
1675 * partition.. Need to make it absolute to the underlying
1676 * device.. */
1677
1678 blocknum = bp->b_blkno;
1679 if (DISKPART(bp->b_dev) != RAW_PART) {
1680 pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
1681 blocknum += pp->p_offset;
1682 }
1683
1684 db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
1685 (int) blocknum));
1686
1687 db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
1688 db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
1689
1690 /* *THIS* is where we adjust what block we're going to...
1691 * but DO NOT TOUCH bp->b_blkno!!! */
1692 raid_addr = blocknum;
1693
1694 num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
1695 pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
1696 sum = raid_addr + num_blocks + pb;
1697 if (1 || rf_debugKernelAccess) {
1698 db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
1699 (int) raid_addr, (int) sum, (int) num_blocks,
1700 (int) pb, (int) bp->b_resid));
1701 }
1702 if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
1703 || (sum < num_blocks) || (sum < pb)) {
1704 bp->b_error = ENOSPC;
1705 bp->b_flags |= B_ERROR;
1706 bp->b_resid = bp->b_bcount;
1707 biodone(bp);
1708 RF_LOCK_MUTEX(raidPtr->mutex);
1709 continue;
1710 }
1711 /*
1712 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
1713 */
1714
1715 if (bp->b_bcount & raidPtr->sectorMask) {
1716 bp->b_error = EINVAL;
1717 bp->b_flags |= B_ERROR;
1718 bp->b_resid = bp->b_bcount;
1719 biodone(bp);
1720 RF_LOCK_MUTEX(raidPtr->mutex);
1721 continue;
1722
1723 }
1724 db1_printf(("Calling DoAccess..\n"));
1725
1726
1727 RF_LOCK_MUTEX(raidPtr->mutex);
1728 raidPtr->openings--;
1729 RF_UNLOCK_MUTEX(raidPtr->mutex);
1730
1731 /*
1732 * Everything is async.
1733 */
1734 do_async = 1;
1735
1736 disk_busy(&rs->sc_dkdev);
1737
1738 /* XXX we're still at splbio() here... do we *really*
1739 need to be? */
1740
1741 /* don't ever condition on bp->b_flags & B_WRITE.
1742 * always condition on B_READ instead */
1743
1744 retcode = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
1745 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
1746 do_async, raid_addr, num_blocks,
1747 bp->b_data, bp, NULL, NULL,
1748 RF_DAG_NONBLOCKING_IO, NULL, NULL, NULL);
1749
1750
1751 RF_LOCK_MUTEX(raidPtr->mutex);
1752 }
1753 RF_UNLOCK_MUTEX(raidPtr->mutex);
1754 }
1755
1756
1757
1758
1759 /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */
1760
1761 int
1762 rf_DispatchKernelIO(queue, req)
1763 RF_DiskQueue_t *queue;
1764 RF_DiskQueueData_t *req;
1765 {
1766 int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
1767 struct buf *bp;
1768 struct raidbuf *raidbp = NULL;
1769 struct raid_softc *rs;
1770 int unit;
1771 int s;
1772
1773 s=0;
1774 /* s = splbio();*/ /* want to test this */
1775 /* XXX along with the vnode, we also need the softc associated with
1776 * this device.. */
1777
1778 req->queue = queue;
1779
1780 unit = queue->raidPtr->raidid;
1781
1782 db1_printf(("DispatchKernelIO unit: %d\n", unit));
1783
1784 if (unit >= numraid) {
1785 printf("Invalid unit number: %d %d\n", unit, numraid);
1786 panic("Invalid Unit number in rf_DispatchKernelIO\n");
1787 }
1788 rs = &raid_softc[unit];
1789
1790 bp = req->bp;
1791 #if 1
1792 /* XXX when there is a physical disk failure, someone is passing us a
1793 * buffer that contains old stuff!! Attempt to deal with this problem
1794 * without taking a performance hit... (not sure where the real bug
1795 * is. It's buried in RAIDframe somewhere) :-( GO ) */
1796
1797 if (bp->b_flags & B_ERROR) {
1798 bp->b_flags &= ~B_ERROR;
1799 }
1800 if (bp->b_error != 0) {
1801 bp->b_error = 0;
1802 }
1803 #endif
1804 raidbp = RAIDGETBUF(rs);
1805
1806 raidbp->rf_flags = 0; /* XXX not really used anywhere... */
1807
1808 /*
1809 * context for raidiodone
1810 */
1811 raidbp->rf_obp = bp;
1812 raidbp->req = req;
1813
1814 LIST_INIT(&raidbp->rf_buf.b_dep);
1815
1816 switch (req->type) {
1817 case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */
1818 /* XXX need to do something extra here.. */
1819 /* I'm leaving this in, as I've never actually seen it used,
1820 * and I'd like folks to report it... GO */
1821 printf(("WAKEUP CALLED\n"));
1822 queue->numOutstanding++;
1823
1824 /* XXX need to glue the original buffer into this?? */
1825
1826 KernelWakeupFunc(&raidbp->rf_buf);
1827 break;
1828
1829 case RF_IO_TYPE_READ:
1830 case RF_IO_TYPE_WRITE:
1831
1832 if (req->tracerec) {
1833 RF_ETIMER_START(req->tracerec->timer);
1834 }
1835 InitBP(&raidbp->rf_buf, queue->rf_cinfo->ci_vp,
1836 op | bp->b_flags, queue->rf_cinfo->ci_dev,
1837 req->sectorOffset, req->numSector,
1838 req->buf, KernelWakeupFunc, (void *) req,
1839 queue->raidPtr->logBytesPerSector, req->b_proc);
1840
1841 if (rf_debugKernelAccess) {
1842 db1_printf(("dispatch: bp->b_blkno = %ld\n",
1843 (long) bp->b_blkno));
1844 }
1845 queue->numOutstanding++;
1846 queue->last_deq_sector = req->sectorOffset;
1847 /* acc wouldn't have been let in if there were any pending
1848 * reqs at any other priority */
1849 queue->curPriority = req->priority;
1850
1851 db1_printf(("Going for %c to unit %d row %d col %d\n",
1852 req->type, unit, queue->row, queue->col));
1853 db1_printf(("sector %d count %d (%d bytes) %d\n",
1854 (int) req->sectorOffset, (int) req->numSector,
1855 (int) (req->numSector <<
1856 queue->raidPtr->logBytesPerSector),
1857 (int) queue->raidPtr->logBytesPerSector));
1858 if ((raidbp->rf_buf.b_flags & B_READ) == 0) {
1859 raidbp->rf_buf.b_vp->v_numoutput++;
1860 }
1861 VOP_STRATEGY(&raidbp->rf_buf);
1862
1863 break;
1864
1865 default:
1866 panic("bad req->type in rf_DispatchKernelIO");
1867 }
1868 db1_printf(("Exiting from DispatchKernelIO\n"));
1869 /* splx(s); */ /* want to test this */
1870 return (0);
1871 }
1872 /* this is the callback function associated with a I/O invoked from
1873 kernel code.
1874 */
1875 static void
1876 KernelWakeupFunc(vbp)
1877 struct buf *vbp;
1878 {
1879 RF_DiskQueueData_t *req = NULL;
1880 RF_DiskQueue_t *queue;
1881 struct raidbuf *raidbp = (struct raidbuf *) vbp;
1882 struct buf *bp;
1883 struct raid_softc *rs;
1884 int unit;
1885 int s;
1886
1887 s = splbio();
1888 db1_printf(("recovering the request queue:\n"));
1889 req = raidbp->req;
1890
1891 bp = raidbp->rf_obp;
1892
1893 queue = (RF_DiskQueue_t *) req->queue;
1894
1895 if (raidbp->rf_buf.b_flags & B_ERROR) {
1896 bp->b_flags |= B_ERROR;
1897 bp->b_error = raidbp->rf_buf.b_error ?
1898 raidbp->rf_buf.b_error : EIO;
1899 }
1900
1901 /* XXX methinks this could be wrong... */
1902 #if 1
1903 bp->b_resid = raidbp->rf_buf.b_resid;
1904 #endif
1905
1906 if (req->tracerec) {
1907 RF_ETIMER_STOP(req->tracerec->timer);
1908 RF_ETIMER_EVAL(req->tracerec->timer);
1909 RF_LOCK_MUTEX(rf_tracing_mutex);
1910 req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1911 req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1912 req->tracerec->num_phys_ios++;
1913 RF_UNLOCK_MUTEX(rf_tracing_mutex);
1914 }
1915 bp->b_bcount = raidbp->rf_buf.b_bcount; /* XXXX ?? */
1916
1917 unit = queue->raidPtr->raidid; /* *Much* simpler :-> */
1918
1919
1920 /* XXX Ok, let's get aggressive... If B_ERROR is set, let's go
1921 * ballistic, and mark the component as hosed... */
1922
1923 if (bp->b_flags & B_ERROR) {
1924 /* Mark the disk as dead */
1925 /* but only mark it once... */
1926 if (queue->raidPtr->Disks[queue->row][queue->col].status ==
1927 rf_ds_optimal) {
1928 printf("raid%d: IO Error. Marking %s as failed.\n",
1929 unit, queue->raidPtr->Disks[queue->row][queue->col].devname);
1930 queue->raidPtr->Disks[queue->row][queue->col].status =
1931 rf_ds_failed;
1932 queue->raidPtr->status[queue->row] = rf_rs_degraded;
1933 queue->raidPtr->numFailures++;
1934 queue->raidPtr->numNewFailures++;
1935 } else { /* Disk is already dead... */
1936 /* printf("Disk already marked as dead!\n"); */
1937 }
1938
1939 }
1940
1941 rs = &raid_softc[unit];
1942 RAIDPUTBUF(rs, raidbp);
1943
1944 rf_DiskIOComplete(queue, req, (bp->b_flags & B_ERROR) ? 1 : 0);
1945 (req->CompleteFunc) (req->argument, (bp->b_flags & B_ERROR) ? 1 : 0);
1946
1947 splx(s);
1948 }
1949
1950
1951
1952 /*
1953 * initialize a buf structure for doing an I/O in the kernel.
1954 */
1955 static void
1956 InitBP(bp, b_vp, rw_flag, dev, startSect, numSect, buf, cbFunc, cbArg,
1957 logBytesPerSector, b_proc)
1958 struct buf *bp;
1959 struct vnode *b_vp;
1960 unsigned rw_flag;
1961 dev_t dev;
1962 RF_SectorNum_t startSect;
1963 RF_SectorCount_t numSect;
1964 caddr_t buf;
1965 void (*cbFunc) (struct buf *);
1966 void *cbArg;
1967 int logBytesPerSector;
1968 struct proc *b_proc;
1969 {
1970 /* bp->b_flags = B_PHYS | rw_flag; */
1971 bp->b_flags = B_CALL | rw_flag; /* XXX need B_PHYS here too??? */
1972 bp->b_bcount = numSect << logBytesPerSector;
1973 bp->b_bufsize = bp->b_bcount;
1974 bp->b_error = 0;
1975 bp->b_dev = dev;
1976 bp->b_data = buf;
1977 bp->b_blkno = startSect;
1978 bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */
1979 if (bp->b_bcount == 0) {
1980 panic("bp->b_bcount is zero in InitBP!!\n");
1981 }
1982 bp->b_proc = b_proc;
1983 bp->b_iodone = cbFunc;
1984 bp->b_vp = b_vp;
1985
1986 }
1987
1988 static void
1989 raidgetdefaultlabel(raidPtr, rs, lp)
1990 RF_Raid_t *raidPtr;
1991 struct raid_softc *rs;
1992 struct disklabel *lp;
1993 {
1994 db1_printf(("Building a default label...\n"));
1995 bzero(lp, sizeof(*lp));
1996
1997 /* fabricate a label... */
1998 lp->d_secperunit = raidPtr->totalSectors;
1999 lp->d_secsize = raidPtr->bytesPerSector;
2000 lp->d_nsectors = raidPtr->Layout.dataSectorsPerStripe;
2001 lp->d_ntracks = 4 * raidPtr->numCol;
2002 lp->d_ncylinders = raidPtr->totalSectors /
2003 (lp->d_nsectors * lp->d_ntracks);
2004 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
2005
2006 strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
2007 lp->d_type = DTYPE_RAID;
2008 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
2009 lp->d_rpm = 3600;
2010 lp->d_interleave = 1;
2011 lp->d_flags = 0;
2012
2013 lp->d_partitions[RAW_PART].p_offset = 0;
2014 lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors;
2015 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
2016 lp->d_npartitions = RAW_PART + 1;
2017
2018 lp->d_magic = DISKMAGIC;
2019 lp->d_magic2 = DISKMAGIC;
2020 lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
2021
2022 }
2023 /*
2024 * Read the disklabel from the raid device. If one is not present, fake one
2025 * up.
2026 */
2027 static void
2028 raidgetdisklabel(dev)
2029 dev_t dev;
2030 {
2031 int unit = raidunit(dev);
2032 struct raid_softc *rs = &raid_softc[unit];
2033 char *errstring;
2034 struct disklabel *lp = rs->sc_dkdev.dk_label;
2035 struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel;
2036 RF_Raid_t *raidPtr;
2037
2038 db1_printf(("Getting the disklabel...\n"));
2039
2040 bzero(clp, sizeof(*clp));
2041
2042 raidPtr = raidPtrs[unit];
2043
2044 raidgetdefaultlabel(raidPtr, rs, lp);
2045
2046 /*
2047 * Call the generic disklabel extraction routine.
2048 */
2049 errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
2050 rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
2051 if (errstring)
2052 raidmakedisklabel(rs);
2053 else {
2054 int i;
2055 struct partition *pp;
2056
2057 /*
2058 * Sanity check whether the found disklabel is valid.
2059 *
2060 * This is necessary since total size of the raid device
2061 * may vary when an interleave is changed even though exactly
2062 * same componets are used, and old disklabel may used
2063 * if that is found.
2064 */
2065 if (lp->d_secperunit != rs->sc_size)
2066 printf("WARNING: %s: "
2067 "total sector size in disklabel (%d) != "
2068 "the size of raid (%ld)\n", rs->sc_xname,
2069 lp->d_secperunit, (long) rs->sc_size);
2070 for (i = 0; i < lp->d_npartitions; i++) {
2071 pp = &lp->d_partitions[i];
2072 if (pp->p_offset + pp->p_size > rs->sc_size)
2073 printf("WARNING: %s: end of partition `%c' "
2074 "exceeds the size of raid (%ld)\n",
2075 rs->sc_xname, 'a' + i, (long) rs->sc_size);
2076 }
2077 }
2078
2079 }
2080 /*
2081 * Take care of things one might want to take care of in the event
2082 * that a disklabel isn't present.
2083 */
2084 static void
2085 raidmakedisklabel(rs)
2086 struct raid_softc *rs;
2087 {
2088 struct disklabel *lp = rs->sc_dkdev.dk_label;
2089 db1_printf(("Making a label..\n"));
2090
2091 /*
2092 * For historical reasons, if there's no disklabel present
2093 * the raw partition must be marked FS_BSDFFS.
2094 */
2095
2096 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
2097
2098 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
2099
2100 lp->d_checksum = dkcksum(lp);
2101 }
2102 /*
2103 * Lookup the provided name in the filesystem. If the file exists,
2104 * is a valid block device, and isn't being used by anyone else,
2105 * set *vpp to the file's vnode.
2106 * You'll find the original of this in ccd.c
2107 */
2108 int
2109 raidlookup(path, p, vpp)
2110 char *path;
2111 struct proc *p;
2112 struct vnode **vpp; /* result */
2113 {
2114 struct nameidata nd;
2115 struct vnode *vp;
2116 struct vattr va;
2117 int error;
2118
2119 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, path, p);
2120 if ((error = vn_open(&nd, FREAD | FWRITE, 0)) != 0) {
2121 #ifdef DEBUG
2122 printf("RAIDframe: vn_open returned %d\n", error);
2123 #endif
2124 return (error);
2125 }
2126 vp = nd.ni_vp;
2127 if (vp->v_usecount > 1) {
2128 VOP_UNLOCK(vp, 0);
2129 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2130 return (EBUSY);
2131 }
2132 if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)) != 0) {
2133 VOP_UNLOCK(vp, 0);
2134 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2135 return (error);
2136 }
2137 /* XXX: eventually we should handle VREG, too. */
2138 if (va.va_type != VBLK) {
2139 VOP_UNLOCK(vp, 0);
2140 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2141 return (ENOTBLK);
2142 }
2143 VOP_UNLOCK(vp, 0);
2144 *vpp = vp;
2145 return (0);
2146 }
2147 /*
2148 * Wait interruptibly for an exclusive lock.
2149 *
2150 * XXX
2151 * Several drivers do this; it should be abstracted and made MP-safe.
2152 * (Hmm... where have we seen this warning before :-> GO )
2153 */
2154 static int
2155 raidlock(rs)
2156 struct raid_softc *rs;
2157 {
2158 int error;
2159
2160 while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
2161 rs->sc_flags |= RAIDF_WANTED;
2162 if ((error =
2163 tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
2164 return (error);
2165 }
2166 rs->sc_flags |= RAIDF_LOCKED;
2167 return (0);
2168 }
2169 /*
2170 * Unlock and wake up any waiters.
2171 */
2172 static void
2173 raidunlock(rs)
2174 struct raid_softc *rs;
2175 {
2176
2177 rs->sc_flags &= ~RAIDF_LOCKED;
2178 if ((rs->sc_flags & RAIDF_WANTED) != 0) {
2179 rs->sc_flags &= ~RAIDF_WANTED;
2180 wakeup(rs);
2181 }
2182 }
2183
2184
2185 #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */
2186 #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */
2187
2188 int
2189 raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter)
2190 {
2191 RF_ComponentLabel_t clabel;
2192 raidread_component_label(dev, b_vp, &clabel);
2193 clabel.mod_counter = mod_counter;
2194 clabel.clean = RF_RAID_CLEAN;
2195 raidwrite_component_label(dev, b_vp, &clabel);
2196 return(0);
2197 }
2198
2199
2200 int
2201 raidmarkdirty(dev_t dev, struct vnode *b_vp, int mod_counter)
2202 {
2203 RF_ComponentLabel_t clabel;
2204 raidread_component_label(dev, b_vp, &clabel);
2205 clabel.mod_counter = mod_counter;
2206 clabel.clean = RF_RAID_DIRTY;
2207 raidwrite_component_label(dev, b_vp, &clabel);
2208 return(0);
2209 }
2210
2211 /* ARGSUSED */
2212 int
2213 raidread_component_label(dev, b_vp, clabel)
2214 dev_t dev;
2215 struct vnode *b_vp;
2216 RF_ComponentLabel_t *clabel;
2217 {
2218 struct buf *bp;
2219 int error;
2220
2221 /* XXX should probably ensure that we don't try to do this if
2222 someone has changed rf_protected_sectors. */
2223
2224 if (b_vp == NULL) {
2225 /* For whatever reason, this component is not valid.
2226 Don't try to read a component label from it. */
2227 return(EINVAL);
2228 }
2229
2230 /* get a block of the appropriate size... */
2231 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2232 bp->b_dev = dev;
2233
2234 /* get our ducks in a row for the read */
2235 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2236 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2237 bp->b_flags |= B_READ;
2238 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2239
2240 (*bdevsw[major(bp->b_dev)].d_strategy)(bp);
2241
2242 error = biowait(bp);
2243
2244 if (!error) {
2245 memcpy(clabel, bp->b_data,
2246 sizeof(RF_ComponentLabel_t));
2247 #if 0
2248 rf_print_component_label( clabel );
2249 #endif
2250 } else {
2251 #if 0
2252 printf("Failed to read RAID component label!\n");
2253 #endif
2254 }
2255
2256 brelse(bp);
2257 return(error);
2258 }
2259 /* ARGSUSED */
2260 int
2261 raidwrite_component_label(dev, b_vp, clabel)
2262 dev_t dev;
2263 struct vnode *b_vp;
2264 RF_ComponentLabel_t *clabel;
2265 {
2266 struct buf *bp;
2267 int error;
2268
2269 /* get a block of the appropriate size... */
2270 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2271 bp->b_dev = dev;
2272
2273 /* get our ducks in a row for the write */
2274 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2275 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2276 bp->b_flags |= B_WRITE;
2277 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2278
2279 memset(bp->b_data, 0, RF_COMPONENT_INFO_SIZE );
2280
2281 memcpy(bp->b_data, clabel, sizeof(RF_ComponentLabel_t));
2282
2283 (*bdevsw[major(bp->b_dev)].d_strategy)(bp);
2284 error = biowait(bp);
2285 brelse(bp);
2286 if (error) {
2287 #if 1
2288 printf("Failed to write RAID component info!\n");
2289 #endif
2290 }
2291
2292 return(error);
2293 }
2294
2295 void
2296 rf_markalldirty(raidPtr)
2297 RF_Raid_t *raidPtr;
2298 {
2299 RF_ComponentLabel_t clabel;
2300 int r,c;
2301
2302 raidPtr->mod_counter++;
2303 for (r = 0; r < raidPtr->numRow; r++) {
2304 for (c = 0; c < raidPtr->numCol; c++) {
2305 /* we don't want to touch (at all) a disk that has
2306 failed */
2307 if (!RF_DEAD_DISK(raidPtr->Disks[r][c].status)) {
2308 raidread_component_label(
2309 raidPtr->Disks[r][c].dev,
2310 raidPtr->raid_cinfo[r][c].ci_vp,
2311 &clabel);
2312 if (clabel.status == rf_ds_spared) {
2313 /* XXX do something special...
2314 but whatever you do, don't
2315 try to access it!! */
2316 } else {
2317 #if 0
2318 clabel.status =
2319 raidPtr->Disks[r][c].status;
2320 raidwrite_component_label(
2321 raidPtr->Disks[r][c].dev,
2322 raidPtr->raid_cinfo[r][c].ci_vp,
2323 &clabel);
2324 #endif
2325 raidmarkdirty(
2326 raidPtr->Disks[r][c].dev,
2327 raidPtr->raid_cinfo[r][c].ci_vp,
2328 raidPtr->mod_counter);
2329 }
2330 }
2331 }
2332 }
2333 /* printf("Component labels marked dirty.\n"); */
2334 #if 0
2335 for( c = 0; c < raidPtr->numSpare ; c++) {
2336 sparecol = raidPtr->numCol + c;
2337 if (raidPtr->Disks[r][sparecol].status == rf_ds_used_spare) {
2338 /*
2339
2340 XXX this is where we get fancy and map this spare
2341 into it's correct spot in the array.
2342
2343 */
2344 /*
2345
2346 we claim this disk is "optimal" if it's
2347 rf_ds_used_spare, as that means it should be
2348 directly substitutable for the disk it replaced.
2349 We note that too...
2350
2351 */
2352
2353 for(i=0;i<raidPtr->numRow;i++) {
2354 for(j=0;j<raidPtr->numCol;j++) {
2355 if ((raidPtr->Disks[i][j].spareRow ==
2356 r) &&
2357 (raidPtr->Disks[i][j].spareCol ==
2358 sparecol)) {
2359 srow = r;
2360 scol = sparecol;
2361 break;
2362 }
2363 }
2364 }
2365
2366 raidread_component_label(
2367 raidPtr->Disks[r][sparecol].dev,
2368 raidPtr->raid_cinfo[r][sparecol].ci_vp,
2369 &clabel);
2370 /* make sure status is noted */
2371 clabel.version = RF_COMPONENT_LABEL_VERSION;
2372 clabel.mod_counter = raidPtr->mod_counter;
2373 clabel.serial_number = raidPtr->serial_number;
2374 clabel.row = srow;
2375 clabel.column = scol;
2376 clabel.num_rows = raidPtr->numRow;
2377 clabel.num_columns = raidPtr->numCol;
2378 clabel.clean = RF_RAID_DIRTY; /* changed in a bit*/
2379 clabel.status = rf_ds_optimal;
2380 raidwrite_component_label(
2381 raidPtr->Disks[r][sparecol].dev,
2382 raidPtr->raid_cinfo[r][sparecol].ci_vp,
2383 &clabel);
2384 raidmarkclean( raidPtr->Disks[r][sparecol].dev,
2385 raidPtr->raid_cinfo[r][sparecol].ci_vp);
2386 }
2387 }
2388
2389 #endif
2390 }
2391
2392
2393 void
2394 rf_update_component_labels(raidPtr, final)
2395 RF_Raid_t *raidPtr;
2396 int final;
2397 {
2398 RF_ComponentLabel_t clabel;
2399 int sparecol;
2400 int r,c;
2401 int i,j;
2402 int srow, scol;
2403
2404 srow = -1;
2405 scol = -1;
2406
2407 /* XXX should do extra checks to make sure things really are clean,
2408 rather than blindly setting the clean bit... */
2409
2410 raidPtr->mod_counter++;
2411
2412 for (r = 0; r < raidPtr->numRow; r++) {
2413 for (c = 0; c < raidPtr->numCol; c++) {
2414 if (raidPtr->Disks[r][c].status == rf_ds_optimal) {
2415 raidread_component_label(
2416 raidPtr->Disks[r][c].dev,
2417 raidPtr->raid_cinfo[r][c].ci_vp,
2418 &clabel);
2419 /* make sure status is noted */
2420 clabel.status = rf_ds_optimal;
2421 /* bump the counter */
2422 clabel.mod_counter = raidPtr->mod_counter;
2423
2424 raidwrite_component_label(
2425 raidPtr->Disks[r][c].dev,
2426 raidPtr->raid_cinfo[r][c].ci_vp,
2427 &clabel);
2428 if (final == RF_FINAL_COMPONENT_UPDATE) {
2429 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2430 raidmarkclean(
2431 raidPtr->Disks[r][c].dev,
2432 raidPtr->raid_cinfo[r][c].ci_vp,
2433 raidPtr->mod_counter);
2434 }
2435 }
2436 }
2437 /* else we don't touch it.. */
2438 }
2439 }
2440
2441 for( c = 0; c < raidPtr->numSpare ; c++) {
2442 sparecol = raidPtr->numCol + c;
2443 if (raidPtr->Disks[0][sparecol].status == rf_ds_used_spare) {
2444 /*
2445
2446 we claim this disk is "optimal" if it's
2447 rf_ds_used_spare, as that means it should be
2448 directly substitutable for the disk it replaced.
2449 We note that too...
2450
2451 */
2452
2453 for(i=0;i<raidPtr->numRow;i++) {
2454 for(j=0;j<raidPtr->numCol;j++) {
2455 if ((raidPtr->Disks[i][j].spareRow ==
2456 0) &&
2457 (raidPtr->Disks[i][j].spareCol ==
2458 sparecol)) {
2459 srow = i;
2460 scol = j;
2461 break;
2462 }
2463 }
2464 }
2465
2466 /* XXX shouldn't *really* need this... */
2467 raidread_component_label(
2468 raidPtr->Disks[0][sparecol].dev,
2469 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2470 &clabel);
2471 /* make sure status is noted */
2472
2473 raid_init_component_label(raidPtr, &clabel);
2474
2475 clabel.mod_counter = raidPtr->mod_counter;
2476 clabel.row = srow;
2477 clabel.column = scol;
2478 clabel.status = rf_ds_optimal;
2479
2480 raidwrite_component_label(
2481 raidPtr->Disks[0][sparecol].dev,
2482 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2483 &clabel);
2484 if (final == RF_FINAL_COMPONENT_UPDATE) {
2485 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2486 raidmarkclean( raidPtr->Disks[0][sparecol].dev,
2487 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2488 raidPtr->mod_counter);
2489 }
2490 }
2491 }
2492 }
2493 /* printf("Component labels updated\n"); */
2494 }
2495
2496 void
2497 rf_close_component(raidPtr, vp, auto_configured)
2498 RF_Raid_t *raidPtr;
2499 struct vnode *vp;
2500 int auto_configured;
2501 {
2502 struct proc *p;
2503
2504 p = raidPtr->engine_thread;
2505
2506 if (vp != NULL) {
2507 if (auto_configured == 1) {
2508 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2509 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
2510 vput(vp);
2511
2512 } else {
2513 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2514 }
2515 } else {
2516 printf("vnode was NULL\n");
2517 }
2518 }
2519
2520
2521 void
2522 rf_UnconfigureVnodes(raidPtr)
2523 RF_Raid_t *raidPtr;
2524 {
2525 int r,c;
2526 struct proc *p;
2527 struct vnode *vp;
2528 int acd;
2529
2530
2531 /* We take this opportunity to close the vnodes like we should.. */
2532
2533 p = raidPtr->engine_thread;
2534
2535 for (r = 0; r < raidPtr->numRow; r++) {
2536 for (c = 0; c < raidPtr->numCol; c++) {
2537 printf("Closing vnode for row: %d col: %d\n", r, c);
2538 vp = raidPtr->raid_cinfo[r][c].ci_vp;
2539 acd = raidPtr->Disks[r][c].auto_configured;
2540 rf_close_component(raidPtr, vp, acd);
2541 raidPtr->raid_cinfo[r][c].ci_vp = NULL;
2542 raidPtr->Disks[r][c].auto_configured = 0;
2543 }
2544 }
2545 for (r = 0; r < raidPtr->numSpare; r++) {
2546 printf("Closing vnode for spare: %d\n", r);
2547 vp = raidPtr->raid_cinfo[0][raidPtr->numCol + r].ci_vp;
2548 acd = raidPtr->Disks[0][raidPtr->numCol + r].auto_configured;
2549 rf_close_component(raidPtr, vp, acd);
2550 raidPtr->raid_cinfo[0][raidPtr->numCol + r].ci_vp = NULL;
2551 raidPtr->Disks[0][raidPtr->numCol + r].auto_configured = 0;
2552 }
2553 }
2554
2555
2556 void
2557 rf_ReconThread(req)
2558 struct rf_recon_req *req;
2559 {
2560 int s;
2561 RF_Raid_t *raidPtr;
2562
2563 s = splbio();
2564 raidPtr = (RF_Raid_t *) req->raidPtr;
2565 raidPtr->recon_in_progress = 1;
2566
2567 rf_FailDisk((RF_Raid_t *) req->raidPtr, req->row, req->col,
2568 ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
2569
2570 /* XXX get rid of this! we don't need it at all.. */
2571 RF_Free(req, sizeof(*req));
2572
2573 raidPtr->recon_in_progress = 0;
2574 splx(s);
2575
2576 /* That's all... */
2577 kthread_exit(0); /* does not return */
2578 }
2579
2580 void
2581 rf_RewriteParityThread(raidPtr)
2582 RF_Raid_t *raidPtr;
2583 {
2584 int retcode;
2585 int s;
2586
2587 raidPtr->parity_rewrite_in_progress = 1;
2588 s = splbio();
2589 retcode = rf_RewriteParity(raidPtr);
2590 splx(s);
2591 if (retcode) {
2592 printf("raid%d: Error re-writing parity!\n",raidPtr->raidid);
2593 } else {
2594 /* set the clean bit! If we shutdown correctly,
2595 the clean bit on each component label will get
2596 set */
2597 raidPtr->parity_good = RF_RAID_CLEAN;
2598 }
2599 raidPtr->parity_rewrite_in_progress = 0;
2600
2601 /* Anyone waiting for us to stop? If so, inform them... */
2602 if (raidPtr->waitShutdown) {
2603 wakeup(&raidPtr->parity_rewrite_in_progress);
2604 }
2605
2606 /* That's all... */
2607 kthread_exit(0); /* does not return */
2608 }
2609
2610
2611 void
2612 rf_CopybackThread(raidPtr)
2613 RF_Raid_t *raidPtr;
2614 {
2615 int s;
2616
2617 raidPtr->copyback_in_progress = 1;
2618 s = splbio();
2619 rf_CopybackReconstructedData(raidPtr);
2620 splx(s);
2621 raidPtr->copyback_in_progress = 0;
2622
2623 /* That's all... */
2624 kthread_exit(0); /* does not return */
2625 }
2626
2627
2628 void
2629 rf_ReconstructInPlaceThread(req)
2630 struct rf_recon_req *req;
2631 {
2632 int retcode;
2633 int s;
2634 RF_Raid_t *raidPtr;
2635
2636 s = splbio();
2637 raidPtr = req->raidPtr;
2638 raidPtr->recon_in_progress = 1;
2639 retcode = rf_ReconstructInPlace(raidPtr, req->row, req->col);
2640 RF_Free(req, sizeof(*req));
2641 raidPtr->recon_in_progress = 0;
2642 splx(s);
2643
2644 /* That's all... */
2645 kthread_exit(0); /* does not return */
2646 }
2647
2648 void
2649 rf_mountroot_hook(dev)
2650 struct device *dev;
2651 {
2652
2653 }
2654
2655
2656 RF_AutoConfig_t *
2657 rf_find_raid_components()
2658 {
2659 struct devnametobdevmaj *dtobdm;
2660 struct vnode *vp;
2661 struct disklabel label;
2662 struct device *dv;
2663 char *cd_name;
2664 dev_t dev;
2665 int error;
2666 int i;
2667 int good_one;
2668 RF_ComponentLabel_t *clabel;
2669 RF_AutoConfig_t *ac_list;
2670 RF_AutoConfig_t *ac;
2671
2672
2673 /* initialize the AutoConfig list */
2674 ac_list = NULL;
2675
2676 /* we begin by trolling through *all* the devices on the system */
2677
2678 for (dv = alldevs.tqh_first; dv != NULL;
2679 dv = dv->dv_list.tqe_next) {
2680
2681 /* we are only interested in disks... */
2682 if (dv->dv_class != DV_DISK)
2683 continue;
2684
2685 /* we don't care about floppies... */
2686 if (!strcmp(dv->dv_cfdata->cf_driver->cd_name,"fd")) {
2687 continue;
2688 }
2689
2690 /* need to find the device_name_to_block_device_major stuff */
2691 cd_name = dv->dv_cfdata->cf_driver->cd_name;
2692 dtobdm = dev_name2blk;
2693 while (dtobdm->d_name && strcmp(dtobdm->d_name, cd_name)) {
2694 dtobdm++;
2695 }
2696
2697 /* get a vnode for the raw partition of this disk */
2698
2699 dev = MAKEDISKDEV(dtobdm->d_maj, dv->dv_unit, RAW_PART);
2700 if (bdevvp(dev, &vp))
2701 panic("RAID can't alloc vnode");
2702
2703 error = VOP_OPEN(vp, FREAD, NOCRED, 0);
2704
2705 if (error) {
2706 /* "Who cares." Continue looking
2707 for something that exists*/
2708 vput(vp);
2709 continue;
2710 }
2711
2712 /* Ok, the disk exists. Go get the disklabel. */
2713 error = VOP_IOCTL(vp, DIOCGDINFO, (caddr_t)&label,
2714 FREAD, NOCRED, 0);
2715 if (error) {
2716 /*
2717 * XXX can't happen - open() would
2718 * have errored out (or faked up one)
2719 */
2720 printf("can't get label for dev %s%c (%d)!?!?\n",
2721 dv->dv_xname, 'a' + RAW_PART, error);
2722 }
2723
2724 /* don't need this any more. We'll allocate it again
2725 a little later if we really do... */
2726 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2727 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
2728 vput(vp);
2729
2730 for (i=0; i < label.d_npartitions; i++) {
2731 /* We only support partitions marked as RAID */
2732 if (label.d_partitions[i].p_fstype != FS_RAID)
2733 continue;
2734
2735 dev = MAKEDISKDEV(dtobdm->d_maj, dv->dv_unit, i);
2736 if (bdevvp(dev, &vp))
2737 panic("RAID can't alloc vnode");
2738
2739 error = VOP_OPEN(vp, FREAD, NOCRED, 0);
2740 if (error) {
2741 /* Whatever... */
2742 vput(vp);
2743 continue;
2744 }
2745
2746 good_one = 0;
2747
2748 clabel = (RF_ComponentLabel_t *)
2749 malloc(sizeof(RF_ComponentLabel_t),
2750 M_RAIDFRAME, M_NOWAIT);
2751 if (clabel == NULL) {
2752 /* XXX CLEANUP HERE */
2753 printf("RAID auto config: out of memory!\n");
2754 return(NULL); /* XXX probably should panic? */
2755 }
2756
2757 if (!raidread_component_label(dev, vp, clabel)) {
2758 /* Got the label. Does it look reasonable? */
2759 if (rf_reasonable_label(clabel) &&
2760 (clabel->partitionSize <=
2761 label.d_partitions[i].p_size)) {
2762 #if DEBUG
2763 printf("Component on: %s%c: %d\n",
2764 dv->dv_xname, 'a'+i,
2765 label.d_partitions[i].p_size);
2766 rf_print_component_label(clabel);
2767 #endif
2768 /* if it's reasonable, add it,
2769 else ignore it. */
2770 ac = (RF_AutoConfig_t *)
2771 malloc(sizeof(RF_AutoConfig_t),
2772 M_RAIDFRAME,
2773 M_NOWAIT);
2774 if (ac == NULL) {
2775 /* XXX should panic?? */
2776 return(NULL);
2777 }
2778
2779 sprintf(ac->devname, "%s%c",
2780 dv->dv_xname, 'a'+i);
2781 ac->dev = dev;
2782 ac->vp = vp;
2783 ac->clabel = clabel;
2784 ac->next = ac_list;
2785 ac_list = ac;
2786 good_one = 1;
2787 }
2788 }
2789 if (!good_one) {
2790 /* cleanup */
2791 free(clabel, M_RAIDFRAME);
2792 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2793 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
2794 vput(vp);
2795 }
2796 }
2797 }
2798 return(ac_list);
2799 }
2800
2801 static int
2802 rf_reasonable_label(clabel)
2803 RF_ComponentLabel_t *clabel;
2804 {
2805
2806 if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) ||
2807 (clabel->version==RF_COMPONENT_LABEL_VERSION)) &&
2808 ((clabel->clean == RF_RAID_CLEAN) ||
2809 (clabel->clean == RF_RAID_DIRTY)) &&
2810 clabel->row >=0 &&
2811 clabel->column >= 0 &&
2812 clabel->num_rows > 0 &&
2813 clabel->num_columns > 0 &&
2814 clabel->row < clabel->num_rows &&
2815 clabel->column < clabel->num_columns &&
2816 clabel->blockSize > 0 &&
2817 clabel->numBlocks > 0) {
2818 /* label looks reasonable enough... */
2819 return(1);
2820 }
2821 return(0);
2822 }
2823
2824
2825 void
2826 rf_print_component_label(clabel)
2827 RF_ComponentLabel_t *clabel;
2828 {
2829 printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
2830 clabel->row, clabel->column,
2831 clabel->num_rows, clabel->num_columns);
2832 printf(" Version: %d Serial Number: %d Mod Counter: %d\n",
2833 clabel->version, clabel->serial_number,
2834 clabel->mod_counter);
2835 printf(" Clean: %s Status: %d\n",
2836 clabel->clean ? "Yes" : "No", clabel->status );
2837 printf(" sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
2838 clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
2839 printf(" RAID Level: %c blocksize: %d numBlocks: %d\n",
2840 (char) clabel->parityConfig, clabel->blockSize,
2841 clabel->numBlocks);
2842 printf(" Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No" );
2843 printf(" Contains root partition: %s\n",
2844 clabel->root_partition ? "Yes" : "No" );
2845 printf(" Last configured as: raid%d\n", clabel->last_unit );
2846 #if 0
2847 printf(" Config order: %d\n", clabel->config_order);
2848 #endif
2849
2850 }
2851
2852 RF_ConfigSet_t *
2853 rf_create_auto_sets(ac_list)
2854 RF_AutoConfig_t *ac_list;
2855 {
2856 RF_AutoConfig_t *ac;
2857 RF_ConfigSet_t *config_sets;
2858 RF_ConfigSet_t *cset;
2859 RF_AutoConfig_t *ac_next;
2860
2861
2862 config_sets = NULL;
2863
2864 /* Go through the AutoConfig list, and figure out which components
2865 belong to what sets. */
2866 ac = ac_list;
2867 while(ac!=NULL) {
2868 /* we're going to putz with ac->next, so save it here
2869 for use at the end of the loop */
2870 ac_next = ac->next;
2871
2872 if (config_sets == NULL) {
2873 /* will need at least this one... */
2874 config_sets = (RF_ConfigSet_t *)
2875 malloc(sizeof(RF_ConfigSet_t),
2876 M_RAIDFRAME, M_NOWAIT);
2877 if (config_sets == NULL) {
2878 panic("rf_create_auto_sets: No memory!\n");
2879 }
2880 /* this one is easy :) */
2881 config_sets->ac = ac;
2882 config_sets->next = NULL;
2883 config_sets->rootable = 0;
2884 ac->next = NULL;
2885 } else {
2886 /* which set does this component fit into? */
2887 cset = config_sets;
2888 while(cset!=NULL) {
2889 if (rf_does_it_fit(cset, ac)) {
2890 /* looks like it matches... */
2891 ac->next = cset->ac;
2892 cset->ac = ac;
2893 break;
2894 }
2895 cset = cset->next;
2896 }
2897 if (cset==NULL) {
2898 /* didn't find a match above... new set..*/
2899 cset = (RF_ConfigSet_t *)
2900 malloc(sizeof(RF_ConfigSet_t),
2901 M_RAIDFRAME, M_NOWAIT);
2902 if (cset == NULL) {
2903 panic("rf_create_auto_sets: No memory!\n");
2904 }
2905 cset->ac = ac;
2906 ac->next = NULL;
2907 cset->next = config_sets;
2908 cset->rootable = 0;
2909 config_sets = cset;
2910 }
2911 }
2912 ac = ac_next;
2913 }
2914
2915
2916 return(config_sets);
2917 }
2918
2919 static int
2920 rf_does_it_fit(cset, ac)
2921 RF_ConfigSet_t *cset;
2922 RF_AutoConfig_t *ac;
2923 {
2924 RF_ComponentLabel_t *clabel1, *clabel2;
2925
2926 /* If this one matches the *first* one in the set, that's good
2927 enough, since the other members of the set would have been
2928 through here too... */
2929 /* note that we are not checking partitionSize here..
2930
2931 Note that we are also not checking the mod_counters here.
2932 If everything else matches execpt the mod_counter, that's
2933 good enough for this test. We will deal with the mod_counters
2934 a little later in the autoconfiguration process.
2935
2936 (clabel1->mod_counter == clabel2->mod_counter) &&
2937
2938 The reason we don't check for this is that failed disks
2939 will have lower modification counts. If those disks are
2940 not added to the set they used to belong to, then they will
2941 form their own set, which may result in 2 different sets,
2942 for example, competing to be configured at raid0, and
2943 perhaps competing to be the root filesystem set. If the
2944 wrong ones get configured, or both attempt to become /,
2945 weird behaviour and or serious lossage will occur. Thus we
2946 need to bring them into the fold here, and kick them out at
2947 a later point.
2948
2949 */
2950
2951 clabel1 = cset->ac->clabel;
2952 clabel2 = ac->clabel;
2953 if ((clabel1->version == clabel2->version) &&
2954 (clabel1->serial_number == clabel2->serial_number) &&
2955 (clabel1->num_rows == clabel2->num_rows) &&
2956 (clabel1->num_columns == clabel2->num_columns) &&
2957 (clabel1->sectPerSU == clabel2->sectPerSU) &&
2958 (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
2959 (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
2960 (clabel1->parityConfig == clabel2->parityConfig) &&
2961 (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
2962 (clabel1->blockSize == clabel2->blockSize) &&
2963 (clabel1->numBlocks == clabel2->numBlocks) &&
2964 (clabel1->autoconfigure == clabel2->autoconfigure) &&
2965 (clabel1->root_partition == clabel2->root_partition) &&
2966 (clabel1->last_unit == clabel2->last_unit) &&
2967 (clabel1->config_order == clabel2->config_order)) {
2968 /* if it get's here, it almost *has* to be a match */
2969 } else {
2970 /* it's not consistent with somebody in the set..
2971 punt */
2972 return(0);
2973 }
2974 /* all was fine.. it must fit... */
2975 return(1);
2976 }
2977
2978 int
2979 rf_have_enough_components(cset)
2980 RF_ConfigSet_t *cset;
2981 {
2982 RF_AutoConfig_t *ac;
2983 RF_AutoConfig_t *auto_config;
2984 RF_ComponentLabel_t *clabel;
2985 int r,c;
2986 int num_rows;
2987 int num_cols;
2988 int num_missing;
2989 int mod_counter;
2990 int mod_counter_found;
2991 int even_pair_failed;
2992 char parity_type;
2993
2994
2995 /* check to see that we have enough 'live' components
2996 of this set. If so, we can configure it if necessary */
2997
2998 num_rows = cset->ac->clabel->num_rows;
2999 num_cols = cset->ac->clabel->num_columns;
3000 parity_type = cset->ac->clabel->parityConfig;
3001
3002 /* XXX Check for duplicate components!?!?!? */
3003
3004 /* Determine what the mod_counter is supposed to be for this set. */
3005
3006 mod_counter_found = 0;
3007 mod_counter = 0;
3008 ac = cset->ac;
3009 while(ac!=NULL) {
3010 if (mod_counter_found==0) {
3011 mod_counter = ac->clabel->mod_counter;
3012 mod_counter_found = 1;
3013 } else {
3014 if (ac->clabel->mod_counter > mod_counter) {
3015 mod_counter = ac->clabel->mod_counter;
3016 }
3017 }
3018 ac = ac->next;
3019 }
3020
3021 num_missing = 0;
3022 auto_config = cset->ac;
3023
3024 for(r=0; r<num_rows; r++) {
3025 even_pair_failed = 0;
3026 for(c=0; c<num_cols; c++) {
3027 ac = auto_config;
3028 while(ac!=NULL) {
3029 if ((ac->clabel->row == r) &&
3030 (ac->clabel->column == c) &&
3031 (ac->clabel->mod_counter == mod_counter)) {
3032 /* it's this one... */
3033 #if DEBUG
3034 printf("Found: %s at %d,%d\n",
3035 ac->devname,r,c);
3036 #endif
3037 break;
3038 }
3039 ac=ac->next;
3040 }
3041 if (ac==NULL) {
3042 /* Didn't find one here! */
3043 /* special case for RAID 1, especially
3044 where there are more than 2
3045 components (where RAIDframe treats
3046 things a little differently :( ) */
3047 if (parity_type == '1') {
3048 if (c%2 == 0) { /* even component */
3049 even_pair_failed = 1;
3050 } else { /* odd component. If
3051 we're failed, and
3052 so is the even
3053 component, it's
3054 "Good Night, Charlie" */
3055 if (even_pair_failed == 1) {
3056 return(0);
3057 }
3058 }
3059 } else {
3060 /* normal accounting */
3061 num_missing++;
3062 }
3063 }
3064 if ((parity_type == '1') && (c%2 == 1)) {
3065 /* Just did an even component, and we didn't
3066 bail.. reset the even_pair_failed flag,
3067 and go on to the next component.... */
3068 even_pair_failed = 0;
3069 }
3070 }
3071 }
3072
3073 clabel = cset->ac->clabel;
3074
3075 if (((clabel->parityConfig == '0') && (num_missing > 0)) ||
3076 ((clabel->parityConfig == '4') && (num_missing > 1)) ||
3077 ((clabel->parityConfig == '5') && (num_missing > 1))) {
3078 /* XXX this needs to be made *much* more general */
3079 /* Too many failures */
3080 return(0);
3081 }
3082 /* otherwise, all is well, and we've got enough to take a kick
3083 at autoconfiguring this set */
3084 return(1);
3085 }
3086
3087 void
3088 rf_create_configuration(ac,config,raidPtr)
3089 RF_AutoConfig_t *ac;
3090 RF_Config_t *config;
3091 RF_Raid_t *raidPtr;
3092 {
3093 RF_ComponentLabel_t *clabel;
3094 int i;
3095
3096 clabel = ac->clabel;
3097
3098 /* 1. Fill in the common stuff */
3099 config->numRow = clabel->num_rows;
3100 config->numCol = clabel->num_columns;
3101 config->numSpare = 0; /* XXX should this be set here? */
3102 config->sectPerSU = clabel->sectPerSU;
3103 config->SUsPerPU = clabel->SUsPerPU;
3104 config->SUsPerRU = clabel->SUsPerRU;
3105 config->parityConfig = clabel->parityConfig;
3106 /* XXX... */
3107 strcpy(config->diskQueueType,"fifo");
3108 config->maxOutstandingDiskReqs = clabel->maxOutstanding;
3109 config->layoutSpecificSize = 0; /* XXX ?? */
3110
3111 while(ac!=NULL) {
3112 /* row/col values will be in range due to the checks
3113 in reasonable_label() */
3114 strcpy(config->devnames[ac->clabel->row][ac->clabel->column],
3115 ac->devname);
3116 ac = ac->next;
3117 }
3118
3119 for(i=0;i<RF_MAXDBGV;i++) {
3120 config->debugVars[i][0] = NULL;
3121 }
3122 }
3123
3124 int
3125 rf_set_autoconfig(raidPtr, new_value)
3126 RF_Raid_t *raidPtr;
3127 int new_value;
3128 {
3129 RF_ComponentLabel_t clabel;
3130 struct vnode *vp;
3131 dev_t dev;
3132 int row, column;
3133
3134 raidPtr->autoconfigure = new_value;
3135 for(row=0; row<raidPtr->numRow; row++) {
3136 for(column=0; column<raidPtr->numCol; column++) {
3137 if (raidPtr->Disks[row][column].status ==
3138 rf_ds_optimal) {
3139 dev = raidPtr->Disks[row][column].dev;
3140 vp = raidPtr->raid_cinfo[row][column].ci_vp;
3141 raidread_component_label(dev, vp, &clabel);
3142 clabel.autoconfigure = new_value;
3143 raidwrite_component_label(dev, vp, &clabel);
3144 }
3145 }
3146 }
3147 return(new_value);
3148 }
3149
3150 int
3151 rf_set_rootpartition(raidPtr, new_value)
3152 RF_Raid_t *raidPtr;
3153 int new_value;
3154 {
3155 RF_ComponentLabel_t clabel;
3156 struct vnode *vp;
3157 dev_t dev;
3158 int row, column;
3159
3160 raidPtr->root_partition = new_value;
3161 for(row=0; row<raidPtr->numRow; row++) {
3162 for(column=0; column<raidPtr->numCol; column++) {
3163 if (raidPtr->Disks[row][column].status ==
3164 rf_ds_optimal) {
3165 dev = raidPtr->Disks[row][column].dev;
3166 vp = raidPtr->raid_cinfo[row][column].ci_vp;
3167 raidread_component_label(dev, vp, &clabel);
3168 clabel.root_partition = new_value;
3169 raidwrite_component_label(dev, vp, &clabel);
3170 }
3171 }
3172 }
3173 return(new_value);
3174 }
3175
3176 void
3177 rf_release_all_vps(cset)
3178 RF_ConfigSet_t *cset;
3179 {
3180 RF_AutoConfig_t *ac;
3181
3182 ac = cset->ac;
3183 while(ac!=NULL) {
3184 /* Close the vp, and give it back */
3185 if (ac->vp) {
3186 vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY);
3187 VOP_CLOSE(ac->vp, FREAD, NOCRED, 0);
3188 vput(ac->vp);
3189 ac->vp = NULL;
3190 }
3191 ac = ac->next;
3192 }
3193 }
3194
3195
3196 void
3197 rf_cleanup_config_set(cset)
3198 RF_ConfigSet_t *cset;
3199 {
3200 RF_AutoConfig_t *ac;
3201 RF_AutoConfig_t *next_ac;
3202
3203 ac = cset->ac;
3204 while(ac!=NULL) {
3205 next_ac = ac->next;
3206 /* nuke the label */
3207 free(ac->clabel, M_RAIDFRAME);
3208 /* cleanup the config structure */
3209 free(ac, M_RAIDFRAME);
3210 /* "next.." */
3211 ac = next_ac;
3212 }
3213 /* and, finally, nuke the config set */
3214 free(cset, M_RAIDFRAME);
3215 }
3216
3217
3218 void
3219 raid_init_component_label(raidPtr, clabel)
3220 RF_Raid_t *raidPtr;
3221 RF_ComponentLabel_t *clabel;
3222 {
3223 /* current version number */
3224 clabel->version = RF_COMPONENT_LABEL_VERSION;
3225 clabel->serial_number = raidPtr->serial_number;
3226 clabel->mod_counter = raidPtr->mod_counter;
3227 clabel->num_rows = raidPtr->numRow;
3228 clabel->num_columns = raidPtr->numCol;
3229 clabel->clean = RF_RAID_DIRTY; /* not clean */
3230 clabel->status = rf_ds_optimal; /* "It's good!" */
3231
3232 clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
3233 clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
3234 clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
3235
3236 clabel->blockSize = raidPtr->bytesPerSector;
3237 clabel->numBlocks = raidPtr->sectorsPerDisk;
3238
3239 /* XXX not portable */
3240 clabel->parityConfig = raidPtr->Layout.map->parityConfig;
3241 clabel->maxOutstanding = raidPtr->maxOutstanding;
3242 clabel->autoconfigure = raidPtr->autoconfigure;
3243 clabel->root_partition = raidPtr->root_partition;
3244 clabel->last_unit = raidPtr->raidid;
3245 clabel->config_order = raidPtr->config_order;
3246 }
3247
3248 int
3249 rf_auto_config_set(cset,unit)
3250 RF_ConfigSet_t *cset;
3251 int *unit;
3252 {
3253 RF_Raid_t *raidPtr;
3254 RF_Config_t *config;
3255 int raidID;
3256 int retcode;
3257
3258 printf("RAID autoconfigure\n");
3259
3260 retcode = 0;
3261 *unit = -1;
3262
3263 /* 1. Create a config structure */
3264
3265 config = (RF_Config_t *)malloc(sizeof(RF_Config_t),
3266 M_RAIDFRAME,
3267 M_NOWAIT);
3268 if (config==NULL) {
3269 printf("Out of mem!?!?\n");
3270 /* XXX do something more intelligent here. */
3271 return(1);
3272 }
3273
3274 memset(config, 0, sizeof(RF_Config_t));
3275
3276 /* XXX raidID needs to be set correctly.. */
3277
3278 /*
3279 2. Figure out what RAID ID this one is supposed to live at
3280 See if we can get the same RAID dev that it was configured
3281 on last time..
3282 */
3283
3284 raidID = cset->ac->clabel->last_unit;
3285 if ((raidID < 0) || (raidID >= numraid)) {
3286 /* let's not wander off into lala land. */
3287 raidID = numraid - 1;
3288 }
3289 if (raidPtrs[raidID]->valid != 0) {
3290
3291 /*
3292 Nope... Go looking for an alternative...
3293 Start high so we don't immediately use raid0 if that's
3294 not taken.
3295 */
3296
3297 for(raidID = numraid; raidID >= 0; raidID--) {
3298 if (raidPtrs[raidID]->valid == 0) {
3299 /* can use this one! */
3300 break;
3301 }
3302 }
3303 }
3304
3305 if (raidID < 0) {
3306 /* punt... */
3307 printf("Unable to auto configure this set!\n");
3308 printf("(Out of RAID devs!)\n");
3309 return(1);
3310 }
3311 printf("Configuring raid%d:\n",raidID);
3312 raidPtr = raidPtrs[raidID];
3313
3314 /* XXX all this stuff should be done SOMEWHERE ELSE! */
3315 raidPtr->raidid = raidID;
3316 raidPtr->openings = RAIDOUTSTANDING;
3317
3318 /* 3. Build the configuration structure */
3319 rf_create_configuration(cset->ac, config, raidPtr);
3320
3321 /* 4. Do the configuration */
3322 retcode = rf_Configure(raidPtr, config, cset->ac);
3323
3324 if (retcode == 0) {
3325
3326 raidinit(raidPtrs[raidID]);
3327
3328 rf_markalldirty(raidPtrs[raidID]);
3329 raidPtrs[raidID]->autoconfigure = 1; /* XXX do this here? */
3330 if (cset->ac->clabel->root_partition==1) {
3331 /* everything configured just fine. Make a note
3332 that this set is eligible to be root. */
3333 cset->rootable = 1;
3334 /* XXX do this here? */
3335 raidPtrs[raidID]->root_partition = 1;
3336 }
3337 }
3338
3339 /* 5. Cleanup */
3340 free(config, M_RAIDFRAME);
3341
3342 *unit = raidID;
3343 return(retcode);
3344 }
3345
3346 void
3347 rf_disk_unbusy(desc)
3348 RF_RaidAccessDesc_t *desc;
3349 {
3350 struct buf *bp;
3351
3352 bp = (struct buf *)desc->bp;
3353 disk_unbusy(&raid_softc[desc->raidPtr->raidid].sc_dkdev,
3354 (bp->b_bcount - bp->b_resid));
3355 }
3356