rf_netbsdkintf.c revision 1.104.2.12 1 /* $NetBSD: rf_netbsdkintf.c,v 1.104.2.12 2002/07/12 01:40:10 nathanw Exp $ */
2 /*-
3 * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
4 * All rights reserved.
5 *
6 * This code is derived from software contributed to The NetBSD Foundation
7 * by Greg Oster; Jason R. Thorpe.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed by the NetBSD
20 * Foundation, Inc. and its contributors.
21 * 4. Neither the name of The NetBSD Foundation nor the names of its
22 * contributors may be used to endorse or promote products derived
23 * from this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
26 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
36 */
37
38 /*
39 * Copyright (c) 1988 University of Utah.
40 * Copyright (c) 1990, 1993
41 * The Regents of the University of California. All rights reserved.
42 *
43 * This code is derived from software contributed to Berkeley by
44 * the Systems Programming Group of the University of Utah Computer
45 * Science Department.
46 *
47 * Redistribution and use in source and binary forms, with or without
48 * modification, are permitted provided that the following conditions
49 * are met:
50 * 1. Redistributions of source code must retain the above copyright
51 * notice, this list of conditions and the following disclaimer.
52 * 2. Redistributions in binary form must reproduce the above copyright
53 * notice, this list of conditions and the following disclaimer in the
54 * documentation and/or other materials provided with the distribution.
55 * 3. All advertising materials mentioning features or use of this software
56 * must display the following acknowledgement:
57 * This product includes software developed by the University of
58 * California, Berkeley and its contributors.
59 * 4. Neither the name of the University nor the names of its contributors
60 * may be used to endorse or promote products derived from this software
61 * without specific prior written permission.
62 *
63 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
64 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
65 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
66 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
67 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
68 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
69 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
70 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
71 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
72 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
73 * SUCH DAMAGE.
74 *
75 * from: Utah $Hdr: cd.c 1.6 90/11/28$
76 *
77 * @(#)cd.c 8.2 (Berkeley) 11/16/93
78 */
79
80
81
82
83 /*
84 * Copyright (c) 1995 Carnegie-Mellon University.
85 * All rights reserved.
86 *
87 * Authors: Mark Holland, Jim Zelenka
88 *
89 * Permission to use, copy, modify and distribute this software and
90 * its documentation is hereby granted, provided that both the copyright
91 * notice and this permission notice appear in all copies of the
92 * software, derivative works or modified versions, and any portions
93 * thereof, and that both notices appear in supporting documentation.
94 *
95 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
96 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
97 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
98 *
99 * Carnegie Mellon requests users of this software to return to
100 *
101 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
102 * School of Computer Science
103 * Carnegie Mellon University
104 * Pittsburgh PA 15213-3890
105 *
106 * any improvements or extensions that they make and grant Carnegie the
107 * rights to redistribute these changes.
108 */
109
110 /***********************************************************
111 *
112 * rf_kintf.c -- the kernel interface routines for RAIDframe
113 *
114 ***********************************************************/
115
116 #include <sys/cdefs.h>
117 __KERNEL_RCSID(0, "$NetBSD: rf_netbsdkintf.c,v 1.104.2.12 2002/07/12 01:40:10 nathanw Exp $");
118
119 #include <sys/param.h>
120 #include <sys/errno.h>
121 #include <sys/pool.h>
122 #include <sys/proc.h>
123 #include <sys/queue.h>
124 #include <sys/disk.h>
125 #include <sys/device.h>
126 #include <sys/stat.h>
127 #include <sys/ioctl.h>
128 #include <sys/fcntl.h>
129 #include <sys/systm.h>
130 #include <sys/namei.h>
131 #include <sys/vnode.h>
132 #include <sys/disklabel.h>
133 #include <sys/conf.h>
134 #include <sys/lock.h>
135 #include <sys/buf.h>
136 #include <sys/user.h>
137 #include <sys/reboot.h>
138
139 #include <dev/raidframe/raidframevar.h>
140 #include <dev/raidframe/raidframeio.h>
141 #include "raid.h"
142 #include "opt_raid_autoconfig.h"
143 #include "rf_raid.h"
144 #include "rf_copyback.h"
145 #include "rf_dag.h"
146 #include "rf_dagflags.h"
147 #include "rf_desc.h"
148 #include "rf_diskqueue.h"
149 #include "rf_acctrace.h"
150 #include "rf_etimer.h"
151 #include "rf_general.h"
152 #include "rf_debugMem.h"
153 #include "rf_kintf.h"
154 #include "rf_options.h"
155 #include "rf_driver.h"
156 #include "rf_parityscan.h"
157 #include "rf_debugprint.h"
158 #include "rf_threadstuff.h"
159
160 int rf_kdebug_level = 0;
161
162 #ifdef DEBUG
163 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
164 #else /* DEBUG */
165 #define db1_printf(a) { }
166 #endif /* DEBUG */
167
168 static RF_Raid_t **raidPtrs; /* global raid device descriptors */
169
170 RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex)
171
172 static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a
173 * spare table */
174 static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from
175 * installation process */
176
177 /* prototypes */
178 static void KernelWakeupFunc(struct buf * bp);
179 static void InitBP(struct buf * bp, struct vnode *, unsigned rw_flag,
180 dev_t dev, RF_SectorNum_t startSect,
181 RF_SectorCount_t numSect, caddr_t buf,
182 void (*cbFunc) (struct buf *), void *cbArg,
183 int logBytesPerSector, struct proc * b_proc);
184 static void raidinit(RF_Raid_t *);
185
186 void raidattach(int);
187 int raidsize(dev_t);
188 int raidopen(dev_t, int, int, struct proc *);
189 int raidclose(dev_t, int, int, struct proc *);
190 int raidioctl(dev_t, u_long, caddr_t, int, struct proc *);
191 int raidwrite(dev_t, struct uio *, int);
192 int raidread(dev_t, struct uio *, int);
193 void raidstrategy(struct buf *);
194 int raiddump(dev_t, daddr_t, caddr_t, size_t);
195
196 /*
197 * Pilfered from ccd.c
198 */
199
200 struct raidbuf {
201 struct buf rf_buf; /* new I/O buf. MUST BE FIRST!!! */
202 struct buf *rf_obp; /* ptr. to original I/O buf */
203 int rf_flags; /* misc. flags */
204 RF_DiskQueueData_t *req;/* the request that this was part of.. */
205 };
206
207 /* component buffer pool */
208 struct pool raidframe_cbufpool;
209
210 #define RAIDGETBUF(rs) pool_get(&raidframe_cbufpool, PR_NOWAIT)
211 #define RAIDPUTBUF(rs, cbp) pool_put(&raidframe_cbufpool, cbp)
212
213 /* XXX Not sure if the following should be replacing the raidPtrs above,
214 or if it should be used in conjunction with that...
215 */
216
217 struct raid_softc {
218 int sc_flags; /* flags */
219 int sc_cflags; /* configuration flags */
220 size_t sc_size; /* size of the raid device */
221 char sc_xname[20]; /* XXX external name */
222 struct disk sc_dkdev; /* generic disk device info */
223 struct buf_queue buf_queue; /* used for the device queue */
224 };
225 /* sc_flags */
226 #define RAIDF_INITED 0x01 /* unit has been initialized */
227 #define RAIDF_WLABEL 0x02 /* label area is writable */
228 #define RAIDF_LABELLING 0x04 /* unit is currently being labelled */
229 #define RAIDF_WANTED 0x40 /* someone is waiting to obtain a lock */
230 #define RAIDF_LOCKED 0x80 /* unit is locked */
231
232 #define raidunit(x) DISKUNIT(x)
233 int numraid = 0;
234
235 /*
236 * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
237 * Be aware that large numbers can allow the driver to consume a lot of
238 * kernel memory, especially on writes, and in degraded mode reads.
239 *
240 * For example: with a stripe width of 64 blocks (32k) and 5 disks,
241 * a single 64K write will typically require 64K for the old data,
242 * 64K for the old parity, and 64K for the new parity, for a total
243 * of 192K (if the parity buffer is not re-used immediately).
244 * Even it if is used immediately, that's still 128K, which when multiplied
245 * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
246 *
247 * Now in degraded mode, for example, a 64K read on the above setup may
248 * require data reconstruction, which will require *all* of the 4 remaining
249 * disks to participate -- 4 * 32K/disk == 128K again.
250 */
251
252 #ifndef RAIDOUTSTANDING
253 #define RAIDOUTSTANDING 6
254 #endif
255
256 #define RAIDLABELDEV(dev) \
257 (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
258
259 /* declared here, and made public, for the benefit of KVM stuff.. */
260 struct raid_softc *raid_softc;
261
262 static void raidgetdefaultlabel(RF_Raid_t *, struct raid_softc *,
263 struct disklabel *);
264 static void raidgetdisklabel(dev_t);
265 static void raidmakedisklabel(struct raid_softc *);
266
267 static int raidlock(struct raid_softc *);
268 static void raidunlock(struct raid_softc *);
269
270 static void rf_markalldirty(RF_Raid_t *);
271 void rf_mountroot_hook(struct device *);
272
273 struct device *raidrootdev;
274
275 void rf_ReconThread(struct rf_recon_req *);
276 /* XXX what I want is: */
277 /*void rf_ReconThread(RF_Raid_t *raidPtr); */
278 void rf_RewriteParityThread(RF_Raid_t *raidPtr);
279 void rf_CopybackThread(RF_Raid_t *raidPtr);
280 void rf_ReconstructInPlaceThread(struct rf_recon_req *);
281 void rf_buildroothack(void *);
282
283 RF_AutoConfig_t *rf_find_raid_components(void);
284 RF_ConfigSet_t *rf_create_auto_sets(RF_AutoConfig_t *);
285 static int rf_does_it_fit(RF_ConfigSet_t *,RF_AutoConfig_t *);
286 static int rf_reasonable_label(RF_ComponentLabel_t *);
287 void rf_create_configuration(RF_AutoConfig_t *,RF_Config_t *, RF_Raid_t *);
288 int rf_set_autoconfig(RF_Raid_t *, int);
289 int rf_set_rootpartition(RF_Raid_t *, int);
290 void rf_release_all_vps(RF_ConfigSet_t *);
291 void rf_cleanup_config_set(RF_ConfigSet_t *);
292 int rf_have_enough_components(RF_ConfigSet_t *);
293 int rf_auto_config_set(RF_ConfigSet_t *, int *);
294
295 static int raidautoconfig = 0; /* Debugging, mostly. Set to 0 to not
296 allow autoconfig to take place.
297 Note that this is overridden by having
298 RAID_AUTOCONFIG as an option in the
299 kernel config file. */
300
301 void
302 raidattach(num)
303 int num;
304 {
305 int raidID;
306 int i, rc;
307 RF_AutoConfig_t *ac_list; /* autoconfig list */
308 RF_ConfigSet_t *config_sets;
309
310 #ifdef DEBUG
311 printf("raidattach: Asked for %d units\n", num);
312 #endif
313
314 if (num <= 0) {
315 #ifdef DIAGNOSTIC
316 panic("raidattach: count <= 0");
317 #endif
318 return;
319 }
320 /* This is where all the initialization stuff gets done. */
321
322 numraid = num;
323
324 /* Make some space for requested number of units... */
325
326 RF_Calloc(raidPtrs, num, sizeof(RF_Raid_t *), (RF_Raid_t **));
327 if (raidPtrs == NULL) {
328 panic("raidPtrs is NULL!!\n");
329 }
330
331 /* Initialize the component buffer pool. */
332 pool_init(&raidframe_cbufpool, sizeof(struct raidbuf), 0,
333 0, 0, "raidpl", NULL);
334
335 rc = rf_mutex_init(&rf_sparet_wait_mutex);
336 if (rc) {
337 RF_PANIC();
338 }
339
340 rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
341
342 for (i = 0; i < num; i++)
343 raidPtrs[i] = NULL;
344 rc = rf_BootRaidframe();
345 if (rc == 0)
346 printf("Kernelized RAIDframe activated\n");
347 else
348 panic("Serious error booting RAID!!\n");
349
350 /* put together some datastructures like the CCD device does.. This
351 * lets us lock the device and what-not when it gets opened. */
352
353 raid_softc = (struct raid_softc *)
354 malloc(num * sizeof(struct raid_softc),
355 M_RAIDFRAME, M_NOWAIT);
356 if (raid_softc == NULL) {
357 printf("WARNING: no memory for RAIDframe driver\n");
358 return;
359 }
360
361 memset(raid_softc, 0, num * sizeof(struct raid_softc));
362
363 raidrootdev = (struct device *)malloc(num * sizeof(struct device),
364 M_RAIDFRAME, M_NOWAIT);
365 if (raidrootdev == NULL) {
366 panic("No memory for RAIDframe driver!!?!?!\n");
367 }
368
369 for (raidID = 0; raidID < num; raidID++) {
370 BUFQ_INIT(&raid_softc[raidID].buf_queue);
371
372 raidrootdev[raidID].dv_class = DV_DISK;
373 raidrootdev[raidID].dv_cfdata = NULL;
374 raidrootdev[raidID].dv_unit = raidID;
375 raidrootdev[raidID].dv_parent = NULL;
376 raidrootdev[raidID].dv_flags = 0;
377 sprintf(raidrootdev[raidID].dv_xname,"raid%d",raidID);
378
379 RF_Calloc(raidPtrs[raidID], 1, sizeof(RF_Raid_t),
380 (RF_Raid_t *));
381 if (raidPtrs[raidID] == NULL) {
382 printf("WARNING: raidPtrs[%d] is NULL\n", raidID);
383 numraid = raidID;
384 return;
385 }
386 }
387
388 #ifdef RAID_AUTOCONFIG
389 raidautoconfig = 1;
390 #endif
391
392 if (raidautoconfig) {
393 /* 1. locate all RAID components on the system */
394
395 #if DEBUG
396 printf("Searching for raid components...\n");
397 #endif
398 ac_list = rf_find_raid_components();
399
400 /* 2. sort them into their respective sets */
401
402 config_sets = rf_create_auto_sets(ac_list);
403
404 /* 3. evaluate each set and configure the valid ones
405 This gets done in rf_buildroothack() */
406
407 /* schedule the creation of the thread to do the
408 "/ on RAID" stuff */
409
410 kthread_create(rf_buildroothack,config_sets);
411
412 #if 0
413 mountroothook_establish(rf_mountroot_hook, &raidrootdev[0]);
414 #endif
415 }
416
417 }
418
419 void
420 rf_buildroothack(arg)
421 void *arg;
422 {
423 RF_ConfigSet_t *config_sets = arg;
424 RF_ConfigSet_t *cset;
425 RF_ConfigSet_t *next_cset;
426 int retcode;
427 int raidID;
428 int rootID;
429 int num_root;
430
431 rootID = 0;
432 num_root = 0;
433 cset = config_sets;
434 while(cset != NULL ) {
435 next_cset = cset->next;
436 if (rf_have_enough_components(cset) &&
437 cset->ac->clabel->autoconfigure==1) {
438 retcode = rf_auto_config_set(cset,&raidID);
439 if (!retcode) {
440 if (cset->rootable) {
441 rootID = raidID;
442 num_root++;
443 }
444 } else {
445 /* The autoconfig didn't work :( */
446 #if DEBUG
447 printf("Autoconfig failed with code %d for raid%d\n", retcode, raidID);
448 #endif
449 rf_release_all_vps(cset);
450 }
451 } else {
452 /* we're not autoconfiguring this set...
453 release the associated resources */
454 rf_release_all_vps(cset);
455 }
456 /* cleanup */
457 rf_cleanup_config_set(cset);
458 cset = next_cset;
459 }
460 if (boothowto & RB_ASKNAME) {
461 /* We don't auto-config... */
462 } else {
463 /* They didn't ask, and we found something bootable... */
464
465 if (num_root == 1) {
466 booted_device = &raidrootdev[rootID];
467 } else if (num_root > 1) {
468 /* we can't guess.. require the user to answer... */
469 boothowto |= RB_ASKNAME;
470 }
471 }
472 }
473
474
475 int
476 raidsize(dev)
477 dev_t dev;
478 {
479 struct raid_softc *rs;
480 struct disklabel *lp;
481 int part, unit, omask, size;
482
483 unit = raidunit(dev);
484 if (unit >= numraid)
485 return (-1);
486 rs = &raid_softc[unit];
487
488 if ((rs->sc_flags & RAIDF_INITED) == 0)
489 return (-1);
490
491 part = DISKPART(dev);
492 omask = rs->sc_dkdev.dk_openmask & (1 << part);
493 lp = rs->sc_dkdev.dk_label;
494
495 if (omask == 0 && raidopen(dev, 0, S_IFBLK, curproc))
496 return (-1);
497
498 if (lp->d_partitions[part].p_fstype != FS_SWAP)
499 size = -1;
500 else
501 size = lp->d_partitions[part].p_size *
502 (lp->d_secsize / DEV_BSIZE);
503
504 if (omask == 0 && raidclose(dev, 0, S_IFBLK, curproc))
505 return (-1);
506
507 return (size);
508
509 }
510
511 int
512 raiddump(dev, blkno, va, size)
513 dev_t dev;
514 daddr_t blkno;
515 caddr_t va;
516 size_t size;
517 {
518 /* Not implemented. */
519 return ENXIO;
520 }
521 /* ARGSUSED */
522 int
523 raidopen(dev, flags, fmt, p)
524 dev_t dev;
525 int flags, fmt;
526 struct proc *p;
527 {
528 int unit = raidunit(dev);
529 struct raid_softc *rs;
530 struct disklabel *lp;
531 int part, pmask;
532 int error = 0;
533
534 if (unit >= numraid)
535 return (ENXIO);
536 rs = &raid_softc[unit];
537
538 if ((error = raidlock(rs)) != 0)
539 return (error);
540 lp = rs->sc_dkdev.dk_label;
541
542 part = DISKPART(dev);
543 pmask = (1 << part);
544
545 db1_printf(("Opening raid device number: %d partition: %d\n",
546 unit, part));
547
548
549 if ((rs->sc_flags & RAIDF_INITED) &&
550 (rs->sc_dkdev.dk_openmask == 0))
551 raidgetdisklabel(dev);
552
553 /* make sure that this partition exists */
554
555 if (part != RAW_PART) {
556 db1_printf(("Not a raw partition..\n"));
557 if (((rs->sc_flags & RAIDF_INITED) == 0) ||
558 ((part >= lp->d_npartitions) ||
559 (lp->d_partitions[part].p_fstype == FS_UNUSED))) {
560 error = ENXIO;
561 raidunlock(rs);
562 db1_printf(("Bailing out...\n"));
563 return (error);
564 }
565 }
566 /* Prevent this unit from being unconfigured while open. */
567 switch (fmt) {
568 case S_IFCHR:
569 rs->sc_dkdev.dk_copenmask |= pmask;
570 break;
571
572 case S_IFBLK:
573 rs->sc_dkdev.dk_bopenmask |= pmask;
574 break;
575 }
576
577 if ((rs->sc_dkdev.dk_openmask == 0) &&
578 ((rs->sc_flags & RAIDF_INITED) != 0)) {
579 /* First one... mark things as dirty... Note that we *MUST*
580 have done a configure before this. I DO NOT WANT TO BE
581 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
582 THAT THEY BELONG TOGETHER!!!!! */
583 /* XXX should check to see if we're only open for reading
584 here... If so, we needn't do this, but then need some
585 other way of keeping track of what's happened.. */
586
587 rf_markalldirty( raidPtrs[unit] );
588 }
589
590
591 rs->sc_dkdev.dk_openmask =
592 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
593
594 raidunlock(rs);
595
596 return (error);
597
598
599 }
600 /* ARGSUSED */
601 int
602 raidclose(dev, flags, fmt, p)
603 dev_t dev;
604 int flags, fmt;
605 struct proc *p;
606 {
607 int unit = raidunit(dev);
608 struct raid_softc *rs;
609 int error = 0;
610 int part;
611
612 if (unit >= numraid)
613 return (ENXIO);
614 rs = &raid_softc[unit];
615
616 if ((error = raidlock(rs)) != 0)
617 return (error);
618
619 part = DISKPART(dev);
620
621 /* ...that much closer to allowing unconfiguration... */
622 switch (fmt) {
623 case S_IFCHR:
624 rs->sc_dkdev.dk_copenmask &= ~(1 << part);
625 break;
626
627 case S_IFBLK:
628 rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
629 break;
630 }
631 rs->sc_dkdev.dk_openmask =
632 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
633
634 if ((rs->sc_dkdev.dk_openmask == 0) &&
635 ((rs->sc_flags & RAIDF_INITED) != 0)) {
636 /* Last one... device is not unconfigured yet.
637 Device shutdown has taken care of setting the
638 clean bits if RAIDF_INITED is not set
639 mark things as clean... */
640 #if 0
641 printf("Last one on raid%d. Updating status.\n",unit);
642 #endif
643 rf_update_component_labels(raidPtrs[unit],
644 RF_FINAL_COMPONENT_UPDATE);
645 if (doing_shutdown) {
646 /* last one, and we're going down, so
647 lights out for this RAID set too. */
648 error = rf_Shutdown(raidPtrs[unit]);
649
650 /* It's no longer initialized... */
651 rs->sc_flags &= ~RAIDF_INITED;
652
653 /* Detach the disk. */
654 disk_detach(&rs->sc_dkdev);
655 }
656 }
657
658 raidunlock(rs);
659 return (0);
660
661 }
662
663 void
664 raidstrategy(bp)
665 struct buf *bp;
666 {
667 int s;
668
669 unsigned int raidID = raidunit(bp->b_dev);
670 RF_Raid_t *raidPtr;
671 struct raid_softc *rs = &raid_softc[raidID];
672 struct disklabel *lp;
673 int wlabel;
674
675 if ((rs->sc_flags & RAIDF_INITED) ==0) {
676 bp->b_error = ENXIO;
677 bp->b_flags |= B_ERROR;
678 bp->b_resid = bp->b_bcount;
679 biodone(bp);
680 return;
681 }
682 if (raidID >= numraid || !raidPtrs[raidID]) {
683 bp->b_error = ENODEV;
684 bp->b_flags |= B_ERROR;
685 bp->b_resid = bp->b_bcount;
686 biodone(bp);
687 return;
688 }
689 raidPtr = raidPtrs[raidID];
690 if (!raidPtr->valid) {
691 bp->b_error = ENODEV;
692 bp->b_flags |= B_ERROR;
693 bp->b_resid = bp->b_bcount;
694 biodone(bp);
695 return;
696 }
697 if (bp->b_bcount == 0) {
698 db1_printf(("b_bcount is zero..\n"));
699 biodone(bp);
700 return;
701 }
702 lp = rs->sc_dkdev.dk_label;
703
704 /*
705 * Do bounds checking and adjust transfer. If there's an
706 * error, the bounds check will flag that for us.
707 */
708
709 wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
710 if (DISKPART(bp->b_dev) != RAW_PART)
711 if (bounds_check_with_label(bp, lp, wlabel) <= 0) {
712 db1_printf(("Bounds check failed!!:%d %d\n",
713 (int) bp->b_blkno, (int) wlabel));
714 biodone(bp);
715 return;
716 }
717 s = splbio();
718
719 bp->b_resid = 0;
720
721 /* stuff it onto our queue */
722 BUFQ_INSERT_TAIL(&rs->buf_queue, bp);
723
724 raidstart(raidPtrs[raidID]);
725
726 splx(s);
727 }
728 /* ARGSUSED */
729 int
730 raidread(dev, uio, flags)
731 dev_t dev;
732 struct uio *uio;
733 int flags;
734 {
735 int unit = raidunit(dev);
736 struct raid_softc *rs;
737 int part;
738
739 if (unit >= numraid)
740 return (ENXIO);
741 rs = &raid_softc[unit];
742
743 if ((rs->sc_flags & RAIDF_INITED) == 0)
744 return (ENXIO);
745 part = DISKPART(dev);
746
747 db1_printf(("raidread: unit: %d partition: %d\n", unit, part));
748
749 return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
750
751 }
752 /* ARGSUSED */
753 int
754 raidwrite(dev, uio, flags)
755 dev_t dev;
756 struct uio *uio;
757 int flags;
758 {
759 int unit = raidunit(dev);
760 struct raid_softc *rs;
761
762 if (unit >= numraid)
763 return (ENXIO);
764 rs = &raid_softc[unit];
765
766 if ((rs->sc_flags & RAIDF_INITED) == 0)
767 return (ENXIO);
768 db1_printf(("raidwrite\n"));
769 return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
770
771 }
772
773 int
774 raidioctl(dev, cmd, data, flag, p)
775 dev_t dev;
776 u_long cmd;
777 caddr_t data;
778 int flag;
779 struct proc *p;
780 {
781 int unit = raidunit(dev);
782 int error = 0;
783 int part, pmask;
784 struct raid_softc *rs;
785 RF_Config_t *k_cfg, *u_cfg;
786 RF_Raid_t *raidPtr;
787 RF_RaidDisk_t *diskPtr;
788 RF_AccTotals_t *totals;
789 RF_DeviceConfig_t *d_cfg, **ucfgp;
790 u_char *specific_buf;
791 int retcode = 0;
792 int row;
793 int column;
794 struct rf_recon_req *rrcopy, *rr;
795 RF_ComponentLabel_t *clabel;
796 RF_ComponentLabel_t ci_label;
797 RF_ComponentLabel_t **clabel_ptr;
798 RF_SingleComponent_t *sparePtr,*componentPtr;
799 RF_SingleComponent_t hot_spare;
800 RF_SingleComponent_t component;
801 RF_ProgressInfo_t progressInfo, **progressInfoPtr;
802 int i, j, d;
803 #ifdef __HAVE_OLD_DISKLABEL
804 struct disklabel newlabel;
805 #endif
806
807 if (unit >= numraid)
808 return (ENXIO);
809 rs = &raid_softc[unit];
810 raidPtr = raidPtrs[unit];
811
812 db1_printf(("raidioctl: %d %d %d %d\n", (int) dev,
813 (int) DISKPART(dev), (int) unit, (int) cmd));
814
815 /* Must be open for writes for these commands... */
816 switch (cmd) {
817 case DIOCSDINFO:
818 case DIOCWDINFO:
819 #ifdef __HAVE_OLD_DISKLABEL
820 case ODIOCWDINFO:
821 case ODIOCSDINFO:
822 #endif
823 case DIOCWLABEL:
824 if ((flag & FWRITE) == 0)
825 return (EBADF);
826 }
827
828 /* Must be initialized for these... */
829 switch (cmd) {
830 case DIOCGDINFO:
831 case DIOCSDINFO:
832 case DIOCWDINFO:
833 #ifdef __HAVE_OLD_DISKLABEL
834 case ODIOCGDINFO:
835 case ODIOCWDINFO:
836 case ODIOCSDINFO:
837 case ODIOCGDEFLABEL:
838 #endif
839 case DIOCGPART:
840 case DIOCWLABEL:
841 case DIOCGDEFLABEL:
842 case RAIDFRAME_SHUTDOWN:
843 case RAIDFRAME_REWRITEPARITY:
844 case RAIDFRAME_GET_INFO:
845 case RAIDFRAME_RESET_ACCTOTALS:
846 case RAIDFRAME_GET_ACCTOTALS:
847 case RAIDFRAME_KEEP_ACCTOTALS:
848 case RAIDFRAME_GET_SIZE:
849 case RAIDFRAME_FAIL_DISK:
850 case RAIDFRAME_COPYBACK:
851 case RAIDFRAME_CHECK_RECON_STATUS:
852 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
853 case RAIDFRAME_GET_COMPONENT_LABEL:
854 case RAIDFRAME_SET_COMPONENT_LABEL:
855 case RAIDFRAME_ADD_HOT_SPARE:
856 case RAIDFRAME_REMOVE_HOT_SPARE:
857 case RAIDFRAME_INIT_LABELS:
858 case RAIDFRAME_REBUILD_IN_PLACE:
859 case RAIDFRAME_CHECK_PARITY:
860 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
861 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
862 case RAIDFRAME_CHECK_COPYBACK_STATUS:
863 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
864 case RAIDFRAME_SET_AUTOCONFIG:
865 case RAIDFRAME_SET_ROOT:
866 case RAIDFRAME_DELETE_COMPONENT:
867 case RAIDFRAME_INCORPORATE_HOT_SPARE:
868 if ((rs->sc_flags & RAIDF_INITED) == 0)
869 return (ENXIO);
870 }
871
872 switch (cmd) {
873
874 /* configure the system */
875 case RAIDFRAME_CONFIGURE:
876
877 if (raidPtr->valid) {
878 /* There is a valid RAID set running on this unit! */
879 printf("raid%d: Device already configured!\n",unit);
880 return(EINVAL);
881 }
882
883 /* copy-in the configuration information */
884 /* data points to a pointer to the configuration structure */
885
886 u_cfg = *((RF_Config_t **) data);
887 RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
888 if (k_cfg == NULL) {
889 return (ENOMEM);
890 }
891 retcode = copyin((caddr_t) u_cfg, (caddr_t) k_cfg,
892 sizeof(RF_Config_t));
893 if (retcode) {
894 RF_Free(k_cfg, sizeof(RF_Config_t));
895 db1_printf(("rf_ioctl: retcode=%d copyin.1\n",
896 retcode));
897 return (retcode);
898 }
899 /* allocate a buffer for the layout-specific data, and copy it
900 * in */
901 if (k_cfg->layoutSpecificSize) {
902 if (k_cfg->layoutSpecificSize > 10000) {
903 /* sanity check */
904 RF_Free(k_cfg, sizeof(RF_Config_t));
905 return (EINVAL);
906 }
907 RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
908 (u_char *));
909 if (specific_buf == NULL) {
910 RF_Free(k_cfg, sizeof(RF_Config_t));
911 return (ENOMEM);
912 }
913 retcode = copyin(k_cfg->layoutSpecific,
914 (caddr_t) specific_buf,
915 k_cfg->layoutSpecificSize);
916 if (retcode) {
917 RF_Free(k_cfg, sizeof(RF_Config_t));
918 RF_Free(specific_buf,
919 k_cfg->layoutSpecificSize);
920 db1_printf(("rf_ioctl: retcode=%d copyin.2\n",
921 retcode));
922 return (retcode);
923 }
924 } else
925 specific_buf = NULL;
926 k_cfg->layoutSpecific = specific_buf;
927
928 /* should do some kind of sanity check on the configuration.
929 * Store the sum of all the bytes in the last byte? */
930
931 /* configure the system */
932
933 /*
934 * Clear the entire RAID descriptor, just to make sure
935 * there is no stale data left in the case of a
936 * reconfiguration
937 */
938 memset((char *) raidPtr, 0, sizeof(RF_Raid_t));
939 raidPtr->raidid = unit;
940
941 retcode = rf_Configure(raidPtr, k_cfg, NULL);
942
943 if (retcode == 0) {
944
945 /* allow this many simultaneous IO's to
946 this RAID device */
947 raidPtr->openings = RAIDOUTSTANDING;
948
949 raidinit(raidPtr);
950 rf_markalldirty(raidPtr);
951 }
952 /* free the buffers. No return code here. */
953 if (k_cfg->layoutSpecificSize) {
954 RF_Free(specific_buf, k_cfg->layoutSpecificSize);
955 }
956 RF_Free(k_cfg, sizeof(RF_Config_t));
957
958 return (retcode);
959
960 /* shutdown the system */
961 case RAIDFRAME_SHUTDOWN:
962
963 if ((error = raidlock(rs)) != 0)
964 return (error);
965
966 /*
967 * If somebody has a partition mounted, we shouldn't
968 * shutdown.
969 */
970
971 part = DISKPART(dev);
972 pmask = (1 << part);
973 if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
974 ((rs->sc_dkdev.dk_bopenmask & pmask) &&
975 (rs->sc_dkdev.dk_copenmask & pmask))) {
976 raidunlock(rs);
977 return (EBUSY);
978 }
979
980 retcode = rf_Shutdown(raidPtr);
981
982 /* It's no longer initialized... */
983 rs->sc_flags &= ~RAIDF_INITED;
984
985 /* Detach the disk. */
986 disk_detach(&rs->sc_dkdev);
987
988 raidunlock(rs);
989
990 return (retcode);
991 case RAIDFRAME_GET_COMPONENT_LABEL:
992 clabel_ptr = (RF_ComponentLabel_t **) data;
993 /* need to read the component label for the disk indicated
994 by row,column in clabel */
995
996 /* For practice, let's get it directly fromdisk, rather
997 than from the in-core copy */
998 RF_Malloc( clabel, sizeof( RF_ComponentLabel_t ),
999 (RF_ComponentLabel_t *));
1000 if (clabel == NULL)
1001 return (ENOMEM);
1002
1003 memset((char *) clabel, 0, sizeof(RF_ComponentLabel_t));
1004
1005 retcode = copyin( *clabel_ptr, clabel,
1006 sizeof(RF_ComponentLabel_t));
1007
1008 if (retcode) {
1009 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
1010 return(retcode);
1011 }
1012
1013 row = clabel->row;
1014 column = clabel->column;
1015
1016 if ((row < 0) || (row >= raidPtr->numRow) ||
1017 (column < 0) || (column >= raidPtr->numCol +
1018 raidPtr->numSpare)) {
1019 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
1020 return(EINVAL);
1021 }
1022
1023 raidread_component_label(raidPtr->Disks[row][column].dev,
1024 raidPtr->raid_cinfo[row][column].ci_vp,
1025 clabel );
1026
1027 retcode = copyout((caddr_t) clabel,
1028 (caddr_t) *clabel_ptr,
1029 sizeof(RF_ComponentLabel_t));
1030 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
1031 return (retcode);
1032
1033 case RAIDFRAME_SET_COMPONENT_LABEL:
1034 clabel = (RF_ComponentLabel_t *) data;
1035
1036 /* XXX check the label for valid stuff... */
1037 /* Note that some things *should not* get modified --
1038 the user should be re-initing the labels instead of
1039 trying to patch things.
1040 */
1041
1042 printf("Got component label:\n");
1043 printf("Version: %d\n",clabel->version);
1044 printf("Serial Number: %d\n",clabel->serial_number);
1045 printf("Mod counter: %d\n",clabel->mod_counter);
1046 printf("Row: %d\n", clabel->row);
1047 printf("Column: %d\n", clabel->column);
1048 printf("Num Rows: %d\n", clabel->num_rows);
1049 printf("Num Columns: %d\n", clabel->num_columns);
1050 printf("Clean: %d\n", clabel->clean);
1051 printf("Status: %d\n", clabel->status);
1052
1053 row = clabel->row;
1054 column = clabel->column;
1055
1056 if ((row < 0) || (row >= raidPtr->numRow) ||
1057 (column < 0) || (column >= raidPtr->numCol)) {
1058 return(EINVAL);
1059 }
1060
1061 /* XXX this isn't allowed to do anything for now :-) */
1062
1063 /* XXX and before it is, we need to fill in the rest
1064 of the fields!?!?!?! */
1065 #if 0
1066 raidwrite_component_label(
1067 raidPtr->Disks[row][column].dev,
1068 raidPtr->raid_cinfo[row][column].ci_vp,
1069 clabel );
1070 #endif
1071 return (0);
1072
1073 case RAIDFRAME_INIT_LABELS:
1074 clabel = (RF_ComponentLabel_t *) data;
1075 /*
1076 we only want the serial number from
1077 the above. We get all the rest of the information
1078 from the config that was used to create this RAID
1079 set.
1080 */
1081
1082 raidPtr->serial_number = clabel->serial_number;
1083
1084 raid_init_component_label(raidPtr, &ci_label);
1085 ci_label.serial_number = clabel->serial_number;
1086
1087 for(row=0;row<raidPtr->numRow;row++) {
1088 ci_label.row = row;
1089 for(column=0;column<raidPtr->numCol;column++) {
1090 diskPtr = &raidPtr->Disks[row][column];
1091 if (!RF_DEAD_DISK(diskPtr->status)) {
1092 ci_label.partitionSize = diskPtr->partitionSize;
1093 ci_label.column = column;
1094 raidwrite_component_label(
1095 raidPtr->Disks[row][column].dev,
1096 raidPtr->raid_cinfo[row][column].ci_vp,
1097 &ci_label );
1098 }
1099 }
1100 }
1101
1102 return (retcode);
1103 case RAIDFRAME_SET_AUTOCONFIG:
1104 d = rf_set_autoconfig(raidPtr, *(int *) data);
1105 printf("New autoconfig value is: %d\n", d);
1106 *(int *) data = d;
1107 return (retcode);
1108
1109 case RAIDFRAME_SET_ROOT:
1110 d = rf_set_rootpartition(raidPtr, *(int *) data);
1111 printf("New rootpartition value is: %d\n", d);
1112 *(int *) data = d;
1113 return (retcode);
1114
1115 /* initialize all parity */
1116 case RAIDFRAME_REWRITEPARITY:
1117
1118 if (raidPtr->Layout.map->faultsTolerated == 0) {
1119 /* Parity for RAID 0 is trivially correct */
1120 raidPtr->parity_good = RF_RAID_CLEAN;
1121 return(0);
1122 }
1123
1124 if (raidPtr->parity_rewrite_in_progress == 1) {
1125 /* Re-write is already in progress! */
1126 return(EINVAL);
1127 }
1128
1129 retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
1130 rf_RewriteParityThread,
1131 raidPtr,"raid_parity");
1132 return (retcode);
1133
1134
1135 case RAIDFRAME_ADD_HOT_SPARE:
1136 sparePtr = (RF_SingleComponent_t *) data;
1137 memcpy( &hot_spare, sparePtr, sizeof(RF_SingleComponent_t));
1138 retcode = rf_add_hot_spare(raidPtr, &hot_spare);
1139 return(retcode);
1140
1141 case RAIDFRAME_REMOVE_HOT_SPARE:
1142 return(retcode);
1143
1144 case RAIDFRAME_DELETE_COMPONENT:
1145 componentPtr = (RF_SingleComponent_t *)data;
1146 memcpy( &component, componentPtr,
1147 sizeof(RF_SingleComponent_t));
1148 retcode = rf_delete_component(raidPtr, &component);
1149 return(retcode);
1150
1151 case RAIDFRAME_INCORPORATE_HOT_SPARE:
1152 componentPtr = (RF_SingleComponent_t *)data;
1153 memcpy( &component, componentPtr,
1154 sizeof(RF_SingleComponent_t));
1155 retcode = rf_incorporate_hot_spare(raidPtr, &component);
1156 return(retcode);
1157
1158 case RAIDFRAME_REBUILD_IN_PLACE:
1159
1160 if (raidPtr->Layout.map->faultsTolerated == 0) {
1161 /* Can't do this on a RAID 0!! */
1162 return(EINVAL);
1163 }
1164
1165 if (raidPtr->recon_in_progress == 1) {
1166 /* a reconstruct is already in progress! */
1167 return(EINVAL);
1168 }
1169
1170 componentPtr = (RF_SingleComponent_t *) data;
1171 memcpy( &component, componentPtr,
1172 sizeof(RF_SingleComponent_t));
1173 row = component.row;
1174 column = component.column;
1175 printf("Rebuild: %d %d\n",row, column);
1176 if ((row < 0) || (row >= raidPtr->numRow) ||
1177 (column < 0) || (column >= raidPtr->numCol)) {
1178 return(EINVAL);
1179 }
1180
1181 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1182 if (rrcopy == NULL)
1183 return(ENOMEM);
1184
1185 rrcopy->raidPtr = (void *) raidPtr;
1186 rrcopy->row = row;
1187 rrcopy->col = column;
1188
1189 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1190 rf_ReconstructInPlaceThread,
1191 rrcopy,"raid_reconip");
1192 return(retcode);
1193
1194 case RAIDFRAME_GET_INFO:
1195 if (!raidPtr->valid)
1196 return (ENODEV);
1197 ucfgp = (RF_DeviceConfig_t **) data;
1198 RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t),
1199 (RF_DeviceConfig_t *));
1200 if (d_cfg == NULL)
1201 return (ENOMEM);
1202 memset((char *) d_cfg, 0, sizeof(RF_DeviceConfig_t));
1203 d_cfg->rows = raidPtr->numRow;
1204 d_cfg->cols = raidPtr->numCol;
1205 d_cfg->ndevs = raidPtr->numRow * raidPtr->numCol;
1206 if (d_cfg->ndevs >= RF_MAX_DISKS) {
1207 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1208 return (ENOMEM);
1209 }
1210 d_cfg->nspares = raidPtr->numSpare;
1211 if (d_cfg->nspares >= RF_MAX_DISKS) {
1212 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1213 return (ENOMEM);
1214 }
1215 d_cfg->maxqdepth = raidPtr->maxQueueDepth;
1216 d = 0;
1217 for (i = 0; i < d_cfg->rows; i++) {
1218 for (j = 0; j < d_cfg->cols; j++) {
1219 d_cfg->devs[d] = raidPtr->Disks[i][j];
1220 d++;
1221 }
1222 }
1223 for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) {
1224 d_cfg->spares[i] = raidPtr->Disks[0][j];
1225 }
1226 retcode = copyout((caddr_t) d_cfg, (caddr_t) * ucfgp,
1227 sizeof(RF_DeviceConfig_t));
1228 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1229
1230 return (retcode);
1231
1232 case RAIDFRAME_CHECK_PARITY:
1233 *(int *) data = raidPtr->parity_good;
1234 return (0);
1235
1236 case RAIDFRAME_RESET_ACCTOTALS:
1237 memset(&raidPtr->acc_totals, 0, sizeof(raidPtr->acc_totals));
1238 return (0);
1239
1240 case RAIDFRAME_GET_ACCTOTALS:
1241 totals = (RF_AccTotals_t *) data;
1242 *totals = raidPtr->acc_totals;
1243 return (0);
1244
1245 case RAIDFRAME_KEEP_ACCTOTALS:
1246 raidPtr->keep_acc_totals = *(int *)data;
1247 return (0);
1248
1249 case RAIDFRAME_GET_SIZE:
1250 *(int *) data = raidPtr->totalSectors;
1251 return (0);
1252
1253 /* fail a disk & optionally start reconstruction */
1254 case RAIDFRAME_FAIL_DISK:
1255
1256 if (raidPtr->Layout.map->faultsTolerated == 0) {
1257 /* Can't do this on a RAID 0!! */
1258 return(EINVAL);
1259 }
1260
1261 rr = (struct rf_recon_req *) data;
1262
1263 if (rr->row < 0 || rr->row >= raidPtr->numRow
1264 || rr->col < 0 || rr->col >= raidPtr->numCol)
1265 return (EINVAL);
1266
1267 printf("raid%d: Failing the disk: row: %d col: %d\n",
1268 unit, rr->row, rr->col);
1269
1270 /* make a copy of the recon request so that we don't rely on
1271 * the user's buffer */
1272 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1273 if (rrcopy == NULL)
1274 return(ENOMEM);
1275 memcpy(rrcopy, rr, sizeof(*rr));
1276 rrcopy->raidPtr = (void *) raidPtr;
1277
1278 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1279 rf_ReconThread,
1280 rrcopy,"raid_recon");
1281 return (0);
1282
1283 /* invoke a copyback operation after recon on whatever disk
1284 * needs it, if any */
1285 case RAIDFRAME_COPYBACK:
1286
1287 if (raidPtr->Layout.map->faultsTolerated == 0) {
1288 /* This makes no sense on a RAID 0!! */
1289 return(EINVAL);
1290 }
1291
1292 if (raidPtr->copyback_in_progress == 1) {
1293 /* Copyback is already in progress! */
1294 return(EINVAL);
1295 }
1296
1297 retcode = RF_CREATE_THREAD(raidPtr->copyback_thread,
1298 rf_CopybackThread,
1299 raidPtr,"raid_copyback");
1300 return (retcode);
1301
1302 /* return the percentage completion of reconstruction */
1303 case RAIDFRAME_CHECK_RECON_STATUS:
1304 if (raidPtr->Layout.map->faultsTolerated == 0) {
1305 /* This makes no sense on a RAID 0, so tell the
1306 user it's done. */
1307 *(int *) data = 100;
1308 return(0);
1309 }
1310 row = 0; /* XXX we only consider a single row... */
1311 if (raidPtr->status[row] != rf_rs_reconstructing)
1312 *(int *) data = 100;
1313 else
1314 *(int *) data = raidPtr->reconControl[row]->percentComplete;
1315 return (0);
1316 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
1317 progressInfoPtr = (RF_ProgressInfo_t **) data;
1318 row = 0; /* XXX we only consider a single row... */
1319 if (raidPtr->status[row] != rf_rs_reconstructing) {
1320 progressInfo.remaining = 0;
1321 progressInfo.completed = 100;
1322 progressInfo.total = 100;
1323 } else {
1324 progressInfo.total =
1325 raidPtr->reconControl[row]->numRUsTotal;
1326 progressInfo.completed =
1327 raidPtr->reconControl[row]->numRUsComplete;
1328 progressInfo.remaining = progressInfo.total -
1329 progressInfo.completed;
1330 }
1331 retcode = copyout((caddr_t) &progressInfo,
1332 (caddr_t) *progressInfoPtr,
1333 sizeof(RF_ProgressInfo_t));
1334 return (retcode);
1335
1336 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1337 if (raidPtr->Layout.map->faultsTolerated == 0) {
1338 /* This makes no sense on a RAID 0, so tell the
1339 user it's done. */
1340 *(int *) data = 100;
1341 return(0);
1342 }
1343 if (raidPtr->parity_rewrite_in_progress == 1) {
1344 *(int *) data = 100 *
1345 raidPtr->parity_rewrite_stripes_done /
1346 raidPtr->Layout.numStripe;
1347 } else {
1348 *(int *) data = 100;
1349 }
1350 return (0);
1351
1352 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
1353 progressInfoPtr = (RF_ProgressInfo_t **) data;
1354 if (raidPtr->parity_rewrite_in_progress == 1) {
1355 progressInfo.total = raidPtr->Layout.numStripe;
1356 progressInfo.completed =
1357 raidPtr->parity_rewrite_stripes_done;
1358 progressInfo.remaining = progressInfo.total -
1359 progressInfo.completed;
1360 } else {
1361 progressInfo.remaining = 0;
1362 progressInfo.completed = 100;
1363 progressInfo.total = 100;
1364 }
1365 retcode = copyout((caddr_t) &progressInfo,
1366 (caddr_t) *progressInfoPtr,
1367 sizeof(RF_ProgressInfo_t));
1368 return (retcode);
1369
1370 case RAIDFRAME_CHECK_COPYBACK_STATUS:
1371 if (raidPtr->Layout.map->faultsTolerated == 0) {
1372 /* This makes no sense on a RAID 0 */
1373 *(int *) data = 100;
1374 return(0);
1375 }
1376 if (raidPtr->copyback_in_progress == 1) {
1377 *(int *) data = 100 * raidPtr->copyback_stripes_done /
1378 raidPtr->Layout.numStripe;
1379 } else {
1380 *(int *) data = 100;
1381 }
1382 return (0);
1383
1384 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
1385 progressInfoPtr = (RF_ProgressInfo_t **) data;
1386 if (raidPtr->copyback_in_progress == 1) {
1387 progressInfo.total = raidPtr->Layout.numStripe;
1388 progressInfo.completed =
1389 raidPtr->copyback_stripes_done;
1390 progressInfo.remaining = progressInfo.total -
1391 progressInfo.completed;
1392 } else {
1393 progressInfo.remaining = 0;
1394 progressInfo.completed = 100;
1395 progressInfo.total = 100;
1396 }
1397 retcode = copyout((caddr_t) &progressInfo,
1398 (caddr_t) *progressInfoPtr,
1399 sizeof(RF_ProgressInfo_t));
1400 return (retcode);
1401
1402 /* the sparetable daemon calls this to wait for the kernel to
1403 * need a spare table. this ioctl does not return until a
1404 * spare table is needed. XXX -- calling mpsleep here in the
1405 * ioctl code is almost certainly wrong and evil. -- XXX XXX
1406 * -- I should either compute the spare table in the kernel,
1407 * or have a different -- XXX XXX -- interface (a different
1408 * character device) for delivering the table -- XXX */
1409 #if 0
1410 case RAIDFRAME_SPARET_WAIT:
1411 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1412 while (!rf_sparet_wait_queue)
1413 mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE);
1414 waitreq = rf_sparet_wait_queue;
1415 rf_sparet_wait_queue = rf_sparet_wait_queue->next;
1416 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1417
1418 /* structure assignment */
1419 *((RF_SparetWait_t *) data) = *waitreq;
1420
1421 RF_Free(waitreq, sizeof(*waitreq));
1422 return (0);
1423
1424 /* wakes up a process waiting on SPARET_WAIT and puts an error
1425 * code in it that will cause the dameon to exit */
1426 case RAIDFRAME_ABORT_SPARET_WAIT:
1427 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1428 waitreq->fcol = -1;
1429 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1430 waitreq->next = rf_sparet_wait_queue;
1431 rf_sparet_wait_queue = waitreq;
1432 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1433 wakeup(&rf_sparet_wait_queue);
1434 return (0);
1435
1436 /* used by the spare table daemon to deliver a spare table
1437 * into the kernel */
1438 case RAIDFRAME_SEND_SPARET:
1439
1440 /* install the spare table */
1441 retcode = rf_SetSpareTable(raidPtr, *(void **) data);
1442
1443 /* respond to the requestor. the return status of the spare
1444 * table installation is passed in the "fcol" field */
1445 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1446 waitreq->fcol = retcode;
1447 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1448 waitreq->next = rf_sparet_resp_queue;
1449 rf_sparet_resp_queue = waitreq;
1450 wakeup(&rf_sparet_resp_queue);
1451 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1452
1453 return (retcode);
1454 #endif
1455
1456 default:
1457 break; /* fall through to the os-specific code below */
1458
1459 }
1460
1461 if (!raidPtr->valid)
1462 return (EINVAL);
1463
1464 /*
1465 * Add support for "regular" device ioctls here.
1466 */
1467
1468 switch (cmd) {
1469 case DIOCGDINFO:
1470 *(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
1471 break;
1472 #ifdef __HAVE_OLD_DISKLABEL
1473 case ODIOCGDINFO:
1474 newlabel = *(rs->sc_dkdev.dk_label);
1475 if (newlabel.d_npartitions > OLDMAXPARTITIONS)
1476 return ENOTTY;
1477 memcpy(data, &newlabel, sizeof (struct olddisklabel));
1478 break;
1479 #endif
1480
1481 case DIOCGPART:
1482 ((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
1483 ((struct partinfo *) data)->part =
1484 &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
1485 break;
1486
1487 case DIOCWDINFO:
1488 case DIOCSDINFO:
1489 #ifdef __HAVE_OLD_DISKLABEL
1490 case ODIOCWDINFO:
1491 case ODIOCSDINFO:
1492 #endif
1493 {
1494 struct disklabel *lp;
1495 #ifdef __HAVE_OLD_DISKLABEL
1496 if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) {
1497 memset(&newlabel, 0, sizeof newlabel);
1498 memcpy(&newlabel, data, sizeof (struct olddisklabel));
1499 lp = &newlabel;
1500 } else
1501 #endif
1502 lp = (struct disklabel *)data;
1503
1504 if ((error = raidlock(rs)) != 0)
1505 return (error);
1506
1507 rs->sc_flags |= RAIDF_LABELLING;
1508
1509 error = setdisklabel(rs->sc_dkdev.dk_label,
1510 lp, 0, rs->sc_dkdev.dk_cpulabel);
1511 if (error == 0) {
1512 if (cmd == DIOCWDINFO
1513 #ifdef __HAVE_OLD_DISKLABEL
1514 || cmd == ODIOCWDINFO
1515 #endif
1516 )
1517 error = writedisklabel(RAIDLABELDEV(dev),
1518 raidstrategy, rs->sc_dkdev.dk_label,
1519 rs->sc_dkdev.dk_cpulabel);
1520 }
1521 rs->sc_flags &= ~RAIDF_LABELLING;
1522
1523 raidunlock(rs);
1524
1525 if (error)
1526 return (error);
1527 break;
1528 }
1529
1530 case DIOCWLABEL:
1531 if (*(int *) data != 0)
1532 rs->sc_flags |= RAIDF_WLABEL;
1533 else
1534 rs->sc_flags &= ~RAIDF_WLABEL;
1535 break;
1536
1537 case DIOCGDEFLABEL:
1538 raidgetdefaultlabel(raidPtr, rs, (struct disklabel *) data);
1539 break;
1540
1541 #ifdef __HAVE_OLD_DISKLABEL
1542 case ODIOCGDEFLABEL:
1543 raidgetdefaultlabel(raidPtr, rs, &newlabel);
1544 if (newlabel.d_npartitions > OLDMAXPARTITIONS)
1545 return ENOTTY;
1546 memcpy(data, &newlabel, sizeof (struct olddisklabel));
1547 break;
1548 #endif
1549
1550 default:
1551 retcode = ENOTTY;
1552 }
1553 return (retcode);
1554
1555 }
1556
1557
1558 /* raidinit -- complete the rest of the initialization for the
1559 RAIDframe device. */
1560
1561
1562 static void
1563 raidinit(raidPtr)
1564 RF_Raid_t *raidPtr;
1565 {
1566 struct raid_softc *rs;
1567 int unit;
1568
1569 unit = raidPtr->raidid;
1570
1571 rs = &raid_softc[unit];
1572
1573 /* XXX should check return code first... */
1574 rs->sc_flags |= RAIDF_INITED;
1575
1576 sprintf(rs->sc_xname, "raid%d", unit); /* XXX doesn't check bounds. */
1577
1578 rs->sc_dkdev.dk_name = rs->sc_xname;
1579
1580 /* disk_attach actually creates space for the CPU disklabel, among
1581 * other things, so it's critical to call this *BEFORE* we try putzing
1582 * with disklabels. */
1583
1584 disk_attach(&rs->sc_dkdev);
1585
1586 /* XXX There may be a weird interaction here between this, and
1587 * protectedSectors, as used in RAIDframe. */
1588
1589 rs->sc_size = raidPtr->totalSectors;
1590
1591 }
1592
1593 /* wake up the daemon & tell it to get us a spare table
1594 * XXX
1595 * the entries in the queues should be tagged with the raidPtr
1596 * so that in the extremely rare case that two recons happen at once,
1597 * we know for which device were requesting a spare table
1598 * XXX
1599 *
1600 * XXX This code is not currently used. GO
1601 */
1602 int
1603 rf_GetSpareTableFromDaemon(req)
1604 RF_SparetWait_t *req;
1605 {
1606 int retcode;
1607
1608 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1609 req->next = rf_sparet_wait_queue;
1610 rf_sparet_wait_queue = req;
1611 wakeup(&rf_sparet_wait_queue);
1612
1613 /* mpsleep unlocks the mutex */
1614 while (!rf_sparet_resp_queue) {
1615 tsleep(&rf_sparet_resp_queue, PRIBIO,
1616 "raidframe getsparetable", 0);
1617 }
1618 req = rf_sparet_resp_queue;
1619 rf_sparet_resp_queue = req->next;
1620 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1621
1622 retcode = req->fcol;
1623 RF_Free(req, sizeof(*req)); /* this is not the same req as we
1624 * alloc'd */
1625 return (retcode);
1626 }
1627
1628 /* a wrapper around rf_DoAccess that extracts appropriate info from the
1629 * bp & passes it down.
1630 * any calls originating in the kernel must use non-blocking I/O
1631 * do some extra sanity checking to return "appropriate" error values for
1632 * certain conditions (to make some standard utilities work)
1633 *
1634 * Formerly known as: rf_DoAccessKernel
1635 */
1636 void
1637 raidstart(raidPtr)
1638 RF_Raid_t *raidPtr;
1639 {
1640 RF_SectorCount_t num_blocks, pb, sum;
1641 RF_RaidAddr_t raid_addr;
1642 int retcode;
1643 struct partition *pp;
1644 daddr_t blocknum;
1645 int unit;
1646 struct raid_softc *rs;
1647 int do_async;
1648 struct buf *bp;
1649
1650 unit = raidPtr->raidid;
1651 rs = &raid_softc[unit];
1652
1653 /* quick check to see if anything has died recently */
1654 RF_LOCK_MUTEX(raidPtr->mutex);
1655 if (raidPtr->numNewFailures > 0) {
1656 rf_update_component_labels(raidPtr,
1657 RF_NORMAL_COMPONENT_UPDATE);
1658 raidPtr->numNewFailures--;
1659 }
1660 RF_UNLOCK_MUTEX(raidPtr->mutex);
1661
1662 /* Check to see if we're at the limit... */
1663 RF_LOCK_MUTEX(raidPtr->mutex);
1664 while (raidPtr->openings > 0) {
1665 RF_UNLOCK_MUTEX(raidPtr->mutex);
1666
1667 /* get the next item, if any, from the queue */
1668 if ((bp = BUFQ_FIRST(&rs->buf_queue)) == NULL) {
1669 /* nothing more to do */
1670 return;
1671 }
1672 BUFQ_REMOVE(&rs->buf_queue, bp);
1673
1674 /* Ok, for the bp we have here, bp->b_blkno is relative to the
1675 * partition.. Need to make it absolute to the underlying
1676 * device.. */
1677
1678 blocknum = bp->b_blkno;
1679 if (DISKPART(bp->b_dev) != RAW_PART) {
1680 pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
1681 blocknum += pp->p_offset;
1682 }
1683
1684 db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
1685 (int) blocknum));
1686
1687 db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
1688 db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
1689
1690 /* *THIS* is where we adjust what block we're going to...
1691 * but DO NOT TOUCH bp->b_blkno!!! */
1692 raid_addr = blocknum;
1693
1694 num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
1695 pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
1696 sum = raid_addr + num_blocks + pb;
1697 if (1 || rf_debugKernelAccess) {
1698 db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
1699 (int) raid_addr, (int) sum, (int) num_blocks,
1700 (int) pb, (int) bp->b_resid));
1701 }
1702 if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
1703 || (sum < num_blocks) || (sum < pb)) {
1704 bp->b_error = ENOSPC;
1705 bp->b_flags |= B_ERROR;
1706 bp->b_resid = bp->b_bcount;
1707 biodone(bp);
1708 RF_LOCK_MUTEX(raidPtr->mutex);
1709 continue;
1710 }
1711 /*
1712 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
1713 */
1714
1715 if (bp->b_bcount & raidPtr->sectorMask) {
1716 bp->b_error = EINVAL;
1717 bp->b_flags |= B_ERROR;
1718 bp->b_resid = bp->b_bcount;
1719 biodone(bp);
1720 RF_LOCK_MUTEX(raidPtr->mutex);
1721 continue;
1722
1723 }
1724 db1_printf(("Calling DoAccess..\n"));
1725
1726
1727 RF_LOCK_MUTEX(raidPtr->mutex);
1728 raidPtr->openings--;
1729 RF_UNLOCK_MUTEX(raidPtr->mutex);
1730
1731 /*
1732 * Everything is async.
1733 */
1734 do_async = 1;
1735
1736 disk_busy(&rs->sc_dkdev);
1737
1738 /* XXX we're still at splbio() here... do we *really*
1739 need to be? */
1740
1741 /* don't ever condition on bp->b_flags & B_WRITE.
1742 * always condition on B_READ instead */
1743
1744 retcode = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
1745 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
1746 do_async, raid_addr, num_blocks,
1747 bp->b_data, bp, RF_DAG_NONBLOCKING_IO);
1748
1749 RF_LOCK_MUTEX(raidPtr->mutex);
1750 }
1751 RF_UNLOCK_MUTEX(raidPtr->mutex);
1752 }
1753
1754
1755
1756
1757 /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */
1758
1759 int
1760 rf_DispatchKernelIO(queue, req)
1761 RF_DiskQueue_t *queue;
1762 RF_DiskQueueData_t *req;
1763 {
1764 int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
1765 struct buf *bp;
1766 struct raidbuf *raidbp = NULL;
1767 struct raid_softc *rs;
1768 int unit;
1769 int s;
1770
1771 s=0;
1772 /* s = splbio();*/ /* want to test this */
1773 /* XXX along with the vnode, we also need the softc associated with
1774 * this device.. */
1775
1776 req->queue = queue;
1777
1778 unit = queue->raidPtr->raidid;
1779
1780 db1_printf(("DispatchKernelIO unit: %d\n", unit));
1781
1782 if (unit >= numraid) {
1783 printf("Invalid unit number: %d %d\n", unit, numraid);
1784 panic("Invalid Unit number in rf_DispatchKernelIO\n");
1785 }
1786 rs = &raid_softc[unit];
1787
1788 bp = req->bp;
1789 #if 1
1790 /* XXX when there is a physical disk failure, someone is passing us a
1791 * buffer that contains old stuff!! Attempt to deal with this problem
1792 * without taking a performance hit... (not sure where the real bug
1793 * is. It's buried in RAIDframe somewhere) :-( GO ) */
1794
1795 if (bp->b_flags & B_ERROR) {
1796 bp->b_flags &= ~B_ERROR;
1797 }
1798 if (bp->b_error != 0) {
1799 bp->b_error = 0;
1800 }
1801 #endif
1802 raidbp = RAIDGETBUF(rs);
1803
1804 raidbp->rf_flags = 0; /* XXX not really used anywhere... */
1805
1806 /*
1807 * context for raidiodone
1808 */
1809 raidbp->rf_obp = bp;
1810 raidbp->req = req;
1811
1812 LIST_INIT(&raidbp->rf_buf.b_dep);
1813
1814 switch (req->type) {
1815 case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */
1816 /* XXX need to do something extra here.. */
1817 /* I'm leaving this in, as I've never actually seen it used,
1818 * and I'd like folks to report it... GO */
1819 printf(("WAKEUP CALLED\n"));
1820 queue->numOutstanding++;
1821
1822 /* XXX need to glue the original buffer into this?? */
1823
1824 KernelWakeupFunc(&raidbp->rf_buf);
1825 break;
1826
1827 case RF_IO_TYPE_READ:
1828 case RF_IO_TYPE_WRITE:
1829
1830 if (req->tracerec) {
1831 RF_ETIMER_START(req->tracerec->timer);
1832 }
1833 InitBP(&raidbp->rf_buf, queue->rf_cinfo->ci_vp,
1834 op | bp->b_flags, queue->rf_cinfo->ci_dev,
1835 req->sectorOffset, req->numSector,
1836 req->buf, KernelWakeupFunc, (void *) req,
1837 queue->raidPtr->logBytesPerSector, req->b_proc);
1838
1839 if (rf_debugKernelAccess) {
1840 db1_printf(("dispatch: bp->b_blkno = %ld\n",
1841 (long) bp->b_blkno));
1842 }
1843 queue->numOutstanding++;
1844 queue->last_deq_sector = req->sectorOffset;
1845 /* acc wouldn't have been let in if there were any pending
1846 * reqs at any other priority */
1847 queue->curPriority = req->priority;
1848
1849 db1_printf(("Going for %c to unit %d row %d col %d\n",
1850 req->type, unit, queue->row, queue->col));
1851 db1_printf(("sector %d count %d (%d bytes) %d\n",
1852 (int) req->sectorOffset, (int) req->numSector,
1853 (int) (req->numSector <<
1854 queue->raidPtr->logBytesPerSector),
1855 (int) queue->raidPtr->logBytesPerSector));
1856 if ((raidbp->rf_buf.b_flags & B_READ) == 0) {
1857 raidbp->rf_buf.b_vp->v_numoutput++;
1858 }
1859 VOP_STRATEGY(&raidbp->rf_buf);
1860
1861 break;
1862
1863 default:
1864 panic("bad req->type in rf_DispatchKernelIO");
1865 }
1866 db1_printf(("Exiting from DispatchKernelIO\n"));
1867 /* splx(s); */ /* want to test this */
1868 return (0);
1869 }
1870 /* this is the callback function associated with a I/O invoked from
1871 kernel code.
1872 */
1873 static void
1874 KernelWakeupFunc(vbp)
1875 struct buf *vbp;
1876 {
1877 RF_DiskQueueData_t *req = NULL;
1878 RF_DiskQueue_t *queue;
1879 struct raidbuf *raidbp = (struct raidbuf *) vbp;
1880 struct buf *bp;
1881 struct raid_softc *rs;
1882 int unit;
1883 int s;
1884
1885 s = splbio();
1886 db1_printf(("recovering the request queue:\n"));
1887 req = raidbp->req;
1888
1889 bp = raidbp->rf_obp;
1890
1891 queue = (RF_DiskQueue_t *) req->queue;
1892
1893 if (raidbp->rf_buf.b_flags & B_ERROR) {
1894 bp->b_flags |= B_ERROR;
1895 bp->b_error = raidbp->rf_buf.b_error ?
1896 raidbp->rf_buf.b_error : EIO;
1897 }
1898
1899 /* XXX methinks this could be wrong... */
1900 #if 1
1901 bp->b_resid = raidbp->rf_buf.b_resid;
1902 #endif
1903
1904 if (req->tracerec) {
1905 RF_ETIMER_STOP(req->tracerec->timer);
1906 RF_ETIMER_EVAL(req->tracerec->timer);
1907 RF_LOCK_MUTEX(rf_tracing_mutex);
1908 req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1909 req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1910 req->tracerec->num_phys_ios++;
1911 RF_UNLOCK_MUTEX(rf_tracing_mutex);
1912 }
1913 bp->b_bcount = raidbp->rf_buf.b_bcount; /* XXXX ?? */
1914
1915 unit = queue->raidPtr->raidid; /* *Much* simpler :-> */
1916
1917
1918 /* XXX Ok, let's get aggressive... If B_ERROR is set, let's go
1919 * ballistic, and mark the component as hosed... */
1920
1921 if (bp->b_flags & B_ERROR) {
1922 /* Mark the disk as dead */
1923 /* but only mark it once... */
1924 if (queue->raidPtr->Disks[queue->row][queue->col].status ==
1925 rf_ds_optimal) {
1926 printf("raid%d: IO Error. Marking %s as failed.\n",
1927 unit, queue->raidPtr->Disks[queue->row][queue->col].devname);
1928 queue->raidPtr->Disks[queue->row][queue->col].status =
1929 rf_ds_failed;
1930 queue->raidPtr->status[queue->row] = rf_rs_degraded;
1931 queue->raidPtr->numFailures++;
1932 queue->raidPtr->numNewFailures++;
1933 } else { /* Disk is already dead... */
1934 /* printf("Disk already marked as dead!\n"); */
1935 }
1936
1937 }
1938
1939 rs = &raid_softc[unit];
1940 RAIDPUTBUF(rs, raidbp);
1941
1942 rf_DiskIOComplete(queue, req, (bp->b_flags & B_ERROR) ? 1 : 0);
1943 (req->CompleteFunc) (req->argument, (bp->b_flags & B_ERROR) ? 1 : 0);
1944
1945 splx(s);
1946 }
1947
1948
1949
1950 /*
1951 * initialize a buf structure for doing an I/O in the kernel.
1952 */
1953 static void
1954 InitBP(bp, b_vp, rw_flag, dev, startSect, numSect, buf, cbFunc, cbArg,
1955 logBytesPerSector, b_proc)
1956 struct buf *bp;
1957 struct vnode *b_vp;
1958 unsigned rw_flag;
1959 dev_t dev;
1960 RF_SectorNum_t startSect;
1961 RF_SectorCount_t numSect;
1962 caddr_t buf;
1963 void (*cbFunc) (struct buf *);
1964 void *cbArg;
1965 int logBytesPerSector;
1966 struct proc *b_proc;
1967 {
1968 /* bp->b_flags = B_PHYS | rw_flag; */
1969 bp->b_flags = B_CALL | rw_flag; /* XXX need B_PHYS here too??? */
1970 bp->b_bcount = numSect << logBytesPerSector;
1971 bp->b_bufsize = bp->b_bcount;
1972 bp->b_error = 0;
1973 bp->b_dev = dev;
1974 bp->b_data = buf;
1975 bp->b_blkno = startSect;
1976 bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */
1977 if (bp->b_bcount == 0) {
1978 panic("bp->b_bcount is zero in InitBP!!\n");
1979 }
1980 bp->b_proc = b_proc;
1981 bp->b_iodone = cbFunc;
1982 bp->b_vp = b_vp;
1983
1984 }
1985
1986 static void
1987 raidgetdefaultlabel(raidPtr, rs, lp)
1988 RF_Raid_t *raidPtr;
1989 struct raid_softc *rs;
1990 struct disklabel *lp;
1991 {
1992 db1_printf(("Building a default label...\n"));
1993 memset(lp, 0, sizeof(*lp));
1994
1995 /* fabricate a label... */
1996 lp->d_secperunit = raidPtr->totalSectors;
1997 lp->d_secsize = raidPtr->bytesPerSector;
1998 lp->d_nsectors = raidPtr->Layout.dataSectorsPerStripe;
1999 lp->d_ntracks = 4 * raidPtr->numCol;
2000 lp->d_ncylinders = raidPtr->totalSectors /
2001 (lp->d_nsectors * lp->d_ntracks);
2002 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
2003
2004 strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
2005 lp->d_type = DTYPE_RAID;
2006 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
2007 lp->d_rpm = 3600;
2008 lp->d_interleave = 1;
2009 lp->d_flags = 0;
2010
2011 lp->d_partitions[RAW_PART].p_offset = 0;
2012 lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors;
2013 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
2014 lp->d_npartitions = RAW_PART + 1;
2015
2016 lp->d_magic = DISKMAGIC;
2017 lp->d_magic2 = DISKMAGIC;
2018 lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
2019
2020 }
2021 /*
2022 * Read the disklabel from the raid device. If one is not present, fake one
2023 * up.
2024 */
2025 static void
2026 raidgetdisklabel(dev)
2027 dev_t dev;
2028 {
2029 int unit = raidunit(dev);
2030 struct raid_softc *rs = &raid_softc[unit];
2031 char *errstring;
2032 struct disklabel *lp = rs->sc_dkdev.dk_label;
2033 struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel;
2034 RF_Raid_t *raidPtr;
2035
2036 db1_printf(("Getting the disklabel...\n"));
2037
2038 memset(clp, 0, sizeof(*clp));
2039
2040 raidPtr = raidPtrs[unit];
2041
2042 raidgetdefaultlabel(raidPtr, rs, lp);
2043
2044 /*
2045 * Call the generic disklabel extraction routine.
2046 */
2047 errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
2048 rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
2049 if (errstring)
2050 raidmakedisklabel(rs);
2051 else {
2052 int i;
2053 struct partition *pp;
2054
2055 /*
2056 * Sanity check whether the found disklabel is valid.
2057 *
2058 * This is necessary since total size of the raid device
2059 * may vary when an interleave is changed even though exactly
2060 * same componets are used, and old disklabel may used
2061 * if that is found.
2062 */
2063 if (lp->d_secperunit != rs->sc_size)
2064 printf("WARNING: %s: "
2065 "total sector size in disklabel (%d) != "
2066 "the size of raid (%ld)\n", rs->sc_xname,
2067 lp->d_secperunit, (long) rs->sc_size);
2068 for (i = 0; i < lp->d_npartitions; i++) {
2069 pp = &lp->d_partitions[i];
2070 if (pp->p_offset + pp->p_size > rs->sc_size)
2071 printf("WARNING: %s: end of partition `%c' "
2072 "exceeds the size of raid (%ld)\n",
2073 rs->sc_xname, 'a' + i, (long) rs->sc_size);
2074 }
2075 }
2076
2077 }
2078 /*
2079 * Take care of things one might want to take care of in the event
2080 * that a disklabel isn't present.
2081 */
2082 static void
2083 raidmakedisklabel(rs)
2084 struct raid_softc *rs;
2085 {
2086 struct disklabel *lp = rs->sc_dkdev.dk_label;
2087 db1_printf(("Making a label..\n"));
2088
2089 /*
2090 * For historical reasons, if there's no disklabel present
2091 * the raw partition must be marked FS_BSDFFS.
2092 */
2093
2094 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
2095
2096 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
2097
2098 lp->d_checksum = dkcksum(lp);
2099 }
2100 /*
2101 * Lookup the provided name in the filesystem. If the file exists,
2102 * is a valid block device, and isn't being used by anyone else,
2103 * set *vpp to the file's vnode.
2104 * You'll find the original of this in ccd.c
2105 */
2106 int
2107 raidlookup(path, p, vpp)
2108 char *path;
2109 struct proc *p;
2110 struct vnode **vpp; /* result */
2111 {
2112 struct nameidata nd;
2113 struct vnode *vp;
2114 struct vattr va;
2115 int error;
2116
2117 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, path, p);
2118 if ((error = vn_open(&nd, FREAD | FWRITE, 0)) != 0) {
2119 #ifdef DEBUG
2120 printf("RAIDframe: vn_open returned %d\n", error);
2121 #endif
2122 return (error);
2123 }
2124 vp = nd.ni_vp;
2125 if (vp->v_usecount > 1) {
2126 VOP_UNLOCK(vp, 0);
2127 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2128 return (EBUSY);
2129 }
2130 if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)) != 0) {
2131 VOP_UNLOCK(vp, 0);
2132 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2133 return (error);
2134 }
2135 /* XXX: eventually we should handle VREG, too. */
2136 if (va.va_type != VBLK) {
2137 VOP_UNLOCK(vp, 0);
2138 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2139 return (ENOTBLK);
2140 }
2141 VOP_UNLOCK(vp, 0);
2142 *vpp = vp;
2143 return (0);
2144 }
2145 /*
2146 * Wait interruptibly for an exclusive lock.
2147 *
2148 * XXX
2149 * Several drivers do this; it should be abstracted and made MP-safe.
2150 * (Hmm... where have we seen this warning before :-> GO )
2151 */
2152 static int
2153 raidlock(rs)
2154 struct raid_softc *rs;
2155 {
2156 int error;
2157
2158 while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
2159 rs->sc_flags |= RAIDF_WANTED;
2160 if ((error =
2161 tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
2162 return (error);
2163 }
2164 rs->sc_flags |= RAIDF_LOCKED;
2165 return (0);
2166 }
2167 /*
2168 * Unlock and wake up any waiters.
2169 */
2170 static void
2171 raidunlock(rs)
2172 struct raid_softc *rs;
2173 {
2174
2175 rs->sc_flags &= ~RAIDF_LOCKED;
2176 if ((rs->sc_flags & RAIDF_WANTED) != 0) {
2177 rs->sc_flags &= ~RAIDF_WANTED;
2178 wakeup(rs);
2179 }
2180 }
2181
2182
2183 #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */
2184 #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */
2185
2186 int
2187 raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter)
2188 {
2189 RF_ComponentLabel_t clabel;
2190 raidread_component_label(dev, b_vp, &clabel);
2191 clabel.mod_counter = mod_counter;
2192 clabel.clean = RF_RAID_CLEAN;
2193 raidwrite_component_label(dev, b_vp, &clabel);
2194 return(0);
2195 }
2196
2197
2198 int
2199 raidmarkdirty(dev_t dev, struct vnode *b_vp, int mod_counter)
2200 {
2201 RF_ComponentLabel_t clabel;
2202 raidread_component_label(dev, b_vp, &clabel);
2203 clabel.mod_counter = mod_counter;
2204 clabel.clean = RF_RAID_DIRTY;
2205 raidwrite_component_label(dev, b_vp, &clabel);
2206 return(0);
2207 }
2208
2209 /* ARGSUSED */
2210 int
2211 raidread_component_label(dev, b_vp, clabel)
2212 dev_t dev;
2213 struct vnode *b_vp;
2214 RF_ComponentLabel_t *clabel;
2215 {
2216 struct buf *bp;
2217 int error;
2218
2219 /* XXX should probably ensure that we don't try to do this if
2220 someone has changed rf_protected_sectors. */
2221
2222 if (b_vp == NULL) {
2223 /* For whatever reason, this component is not valid.
2224 Don't try to read a component label from it. */
2225 return(EINVAL);
2226 }
2227
2228 /* get a block of the appropriate size... */
2229 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2230 bp->b_dev = dev;
2231
2232 /* get our ducks in a row for the read */
2233 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2234 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2235 bp->b_flags |= B_READ;
2236 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2237
2238 (*bdevsw[major(bp->b_dev)].d_strategy)(bp);
2239
2240 error = biowait(bp);
2241
2242 if (!error) {
2243 memcpy(clabel, bp->b_data,
2244 sizeof(RF_ComponentLabel_t));
2245 #if 0
2246 rf_print_component_label( clabel );
2247 #endif
2248 } else {
2249 #if 0
2250 printf("Failed to read RAID component label!\n");
2251 #endif
2252 }
2253
2254 brelse(bp);
2255 return(error);
2256 }
2257 /* ARGSUSED */
2258 int
2259 raidwrite_component_label(dev, b_vp, clabel)
2260 dev_t dev;
2261 struct vnode *b_vp;
2262 RF_ComponentLabel_t *clabel;
2263 {
2264 struct buf *bp;
2265 int error;
2266
2267 /* get a block of the appropriate size... */
2268 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2269 bp->b_dev = dev;
2270
2271 /* get our ducks in a row for the write */
2272 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2273 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2274 bp->b_flags |= B_WRITE;
2275 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2276
2277 memset(bp->b_data, 0, RF_COMPONENT_INFO_SIZE );
2278
2279 memcpy(bp->b_data, clabel, sizeof(RF_ComponentLabel_t));
2280
2281 (*bdevsw[major(bp->b_dev)].d_strategy)(bp);
2282 error = biowait(bp);
2283 brelse(bp);
2284 if (error) {
2285 #if 1
2286 printf("Failed to write RAID component info!\n");
2287 #endif
2288 }
2289
2290 return(error);
2291 }
2292
2293 void
2294 rf_markalldirty(raidPtr)
2295 RF_Raid_t *raidPtr;
2296 {
2297 RF_ComponentLabel_t clabel;
2298 int r,c;
2299
2300 raidPtr->mod_counter++;
2301 for (r = 0; r < raidPtr->numRow; r++) {
2302 for (c = 0; c < raidPtr->numCol; c++) {
2303 /* we don't want to touch (at all) a disk that has
2304 failed */
2305 if (!RF_DEAD_DISK(raidPtr->Disks[r][c].status)) {
2306 raidread_component_label(
2307 raidPtr->Disks[r][c].dev,
2308 raidPtr->raid_cinfo[r][c].ci_vp,
2309 &clabel);
2310 if (clabel.status == rf_ds_spared) {
2311 /* XXX do something special...
2312 but whatever you do, don't
2313 try to access it!! */
2314 } else {
2315 #if 0
2316 clabel.status =
2317 raidPtr->Disks[r][c].status;
2318 raidwrite_component_label(
2319 raidPtr->Disks[r][c].dev,
2320 raidPtr->raid_cinfo[r][c].ci_vp,
2321 &clabel);
2322 #endif
2323 raidmarkdirty(
2324 raidPtr->Disks[r][c].dev,
2325 raidPtr->raid_cinfo[r][c].ci_vp,
2326 raidPtr->mod_counter);
2327 }
2328 }
2329 }
2330 }
2331 /* printf("Component labels marked dirty.\n"); */
2332 #if 0
2333 for( c = 0; c < raidPtr->numSpare ; c++) {
2334 sparecol = raidPtr->numCol + c;
2335 if (raidPtr->Disks[r][sparecol].status == rf_ds_used_spare) {
2336 /*
2337
2338 XXX this is where we get fancy and map this spare
2339 into it's correct spot in the array.
2340
2341 */
2342 /*
2343
2344 we claim this disk is "optimal" if it's
2345 rf_ds_used_spare, as that means it should be
2346 directly substitutable for the disk it replaced.
2347 We note that too...
2348
2349 */
2350
2351 for(i=0;i<raidPtr->numRow;i++) {
2352 for(j=0;j<raidPtr->numCol;j++) {
2353 if ((raidPtr->Disks[i][j].spareRow ==
2354 r) &&
2355 (raidPtr->Disks[i][j].spareCol ==
2356 sparecol)) {
2357 srow = r;
2358 scol = sparecol;
2359 break;
2360 }
2361 }
2362 }
2363
2364 raidread_component_label(
2365 raidPtr->Disks[r][sparecol].dev,
2366 raidPtr->raid_cinfo[r][sparecol].ci_vp,
2367 &clabel);
2368 /* make sure status is noted */
2369 clabel.version = RF_COMPONENT_LABEL_VERSION;
2370 clabel.mod_counter = raidPtr->mod_counter;
2371 clabel.serial_number = raidPtr->serial_number;
2372 clabel.row = srow;
2373 clabel.column = scol;
2374 clabel.num_rows = raidPtr->numRow;
2375 clabel.num_columns = raidPtr->numCol;
2376 clabel.clean = RF_RAID_DIRTY; /* changed in a bit*/
2377 clabel.status = rf_ds_optimal;
2378 raidwrite_component_label(
2379 raidPtr->Disks[r][sparecol].dev,
2380 raidPtr->raid_cinfo[r][sparecol].ci_vp,
2381 &clabel);
2382 raidmarkclean( raidPtr->Disks[r][sparecol].dev,
2383 raidPtr->raid_cinfo[r][sparecol].ci_vp);
2384 }
2385 }
2386
2387 #endif
2388 }
2389
2390
2391 void
2392 rf_update_component_labels(raidPtr, final)
2393 RF_Raid_t *raidPtr;
2394 int final;
2395 {
2396 RF_ComponentLabel_t clabel;
2397 int sparecol;
2398 int r,c;
2399 int i,j;
2400 int srow, scol;
2401
2402 srow = -1;
2403 scol = -1;
2404
2405 /* XXX should do extra checks to make sure things really are clean,
2406 rather than blindly setting the clean bit... */
2407
2408 raidPtr->mod_counter++;
2409
2410 for (r = 0; r < raidPtr->numRow; r++) {
2411 for (c = 0; c < raidPtr->numCol; c++) {
2412 if (raidPtr->Disks[r][c].status == rf_ds_optimal) {
2413 raidread_component_label(
2414 raidPtr->Disks[r][c].dev,
2415 raidPtr->raid_cinfo[r][c].ci_vp,
2416 &clabel);
2417 /* make sure status is noted */
2418 clabel.status = rf_ds_optimal;
2419 /* bump the counter */
2420 clabel.mod_counter = raidPtr->mod_counter;
2421
2422 raidwrite_component_label(
2423 raidPtr->Disks[r][c].dev,
2424 raidPtr->raid_cinfo[r][c].ci_vp,
2425 &clabel);
2426 if (final == RF_FINAL_COMPONENT_UPDATE) {
2427 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2428 raidmarkclean(
2429 raidPtr->Disks[r][c].dev,
2430 raidPtr->raid_cinfo[r][c].ci_vp,
2431 raidPtr->mod_counter);
2432 }
2433 }
2434 }
2435 /* else we don't touch it.. */
2436 }
2437 }
2438
2439 for( c = 0; c < raidPtr->numSpare ; c++) {
2440 sparecol = raidPtr->numCol + c;
2441 /* Need to ensure that the reconstruct actually completed! */
2442 if (raidPtr->Disks[0][sparecol].status == rf_ds_used_spare) {
2443 /*
2444
2445 we claim this disk is "optimal" if it's
2446 rf_ds_used_spare, as that means it should be
2447 directly substitutable for the disk it replaced.
2448 We note that too...
2449
2450 */
2451
2452 for(i=0;i<raidPtr->numRow;i++) {
2453 for(j=0;j<raidPtr->numCol;j++) {
2454 if ((raidPtr->Disks[i][j].spareRow ==
2455 0) &&
2456 (raidPtr->Disks[i][j].spareCol ==
2457 sparecol)) {
2458 srow = i;
2459 scol = j;
2460 break;
2461 }
2462 }
2463 }
2464
2465 /* XXX shouldn't *really* need this... */
2466 raidread_component_label(
2467 raidPtr->Disks[0][sparecol].dev,
2468 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2469 &clabel);
2470 /* make sure status is noted */
2471
2472 raid_init_component_label(raidPtr, &clabel);
2473
2474 clabel.mod_counter = raidPtr->mod_counter;
2475 clabel.row = srow;
2476 clabel.column = scol;
2477 clabel.status = rf_ds_optimal;
2478
2479 raidwrite_component_label(
2480 raidPtr->Disks[0][sparecol].dev,
2481 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2482 &clabel);
2483 if (final == RF_FINAL_COMPONENT_UPDATE) {
2484 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2485 raidmarkclean( raidPtr->Disks[0][sparecol].dev,
2486 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2487 raidPtr->mod_counter);
2488 }
2489 }
2490 }
2491 }
2492 /* printf("Component labels updated\n"); */
2493 }
2494
2495 void
2496 rf_close_component(raidPtr, vp, auto_configured)
2497 RF_Raid_t *raidPtr;
2498 struct vnode *vp;
2499 int auto_configured;
2500 {
2501 struct proc *p;
2502
2503 p = raidPtr->engine_thread;
2504
2505 if (vp != NULL) {
2506 if (auto_configured == 1) {
2507 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2508 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
2509 vput(vp);
2510
2511 } else {
2512 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2513 }
2514 } else {
2515 printf("vnode was NULL\n");
2516 }
2517 }
2518
2519
2520 void
2521 rf_UnconfigureVnodes(raidPtr)
2522 RF_Raid_t *raidPtr;
2523 {
2524 int r,c;
2525 struct proc *p;
2526 struct vnode *vp;
2527 int acd;
2528
2529
2530 /* We take this opportunity to close the vnodes like we should.. */
2531
2532 p = raidPtr->engine_thread;
2533
2534 for (r = 0; r < raidPtr->numRow; r++) {
2535 for (c = 0; c < raidPtr->numCol; c++) {
2536 printf("Closing vnode for row: %d col: %d\n", r, c);
2537 vp = raidPtr->raid_cinfo[r][c].ci_vp;
2538 acd = raidPtr->Disks[r][c].auto_configured;
2539 rf_close_component(raidPtr, vp, acd);
2540 raidPtr->raid_cinfo[r][c].ci_vp = NULL;
2541 raidPtr->Disks[r][c].auto_configured = 0;
2542 }
2543 }
2544 for (r = 0; r < raidPtr->numSpare; r++) {
2545 printf("Closing vnode for spare: %d\n", r);
2546 vp = raidPtr->raid_cinfo[0][raidPtr->numCol + r].ci_vp;
2547 acd = raidPtr->Disks[0][raidPtr->numCol + r].auto_configured;
2548 rf_close_component(raidPtr, vp, acd);
2549 raidPtr->raid_cinfo[0][raidPtr->numCol + r].ci_vp = NULL;
2550 raidPtr->Disks[0][raidPtr->numCol + r].auto_configured = 0;
2551 }
2552 }
2553
2554
2555 void
2556 rf_ReconThread(req)
2557 struct rf_recon_req *req;
2558 {
2559 int s;
2560 RF_Raid_t *raidPtr;
2561
2562 s = splbio();
2563 raidPtr = (RF_Raid_t *) req->raidPtr;
2564 raidPtr->recon_in_progress = 1;
2565
2566 rf_FailDisk((RF_Raid_t *) req->raidPtr, req->row, req->col,
2567 ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
2568
2569 /* XXX get rid of this! we don't need it at all.. */
2570 RF_Free(req, sizeof(*req));
2571
2572 raidPtr->recon_in_progress = 0;
2573 splx(s);
2574
2575 /* That's all... */
2576 kthread_exit(0); /* does not return */
2577 }
2578
2579 void
2580 rf_RewriteParityThread(raidPtr)
2581 RF_Raid_t *raidPtr;
2582 {
2583 int retcode;
2584 int s;
2585
2586 raidPtr->parity_rewrite_in_progress = 1;
2587 s = splbio();
2588 retcode = rf_RewriteParity(raidPtr);
2589 splx(s);
2590 if (retcode) {
2591 printf("raid%d: Error re-writing parity!\n",raidPtr->raidid);
2592 } else {
2593 /* set the clean bit! If we shutdown correctly,
2594 the clean bit on each component label will get
2595 set */
2596 raidPtr->parity_good = RF_RAID_CLEAN;
2597 }
2598 raidPtr->parity_rewrite_in_progress = 0;
2599
2600 /* Anyone waiting for us to stop? If so, inform them... */
2601 if (raidPtr->waitShutdown) {
2602 wakeup(&raidPtr->parity_rewrite_in_progress);
2603 }
2604
2605 /* That's all... */
2606 kthread_exit(0); /* does not return */
2607 }
2608
2609
2610 void
2611 rf_CopybackThread(raidPtr)
2612 RF_Raid_t *raidPtr;
2613 {
2614 int s;
2615
2616 raidPtr->copyback_in_progress = 1;
2617 s = splbio();
2618 rf_CopybackReconstructedData(raidPtr);
2619 splx(s);
2620 raidPtr->copyback_in_progress = 0;
2621
2622 /* That's all... */
2623 kthread_exit(0); /* does not return */
2624 }
2625
2626
2627 void
2628 rf_ReconstructInPlaceThread(req)
2629 struct rf_recon_req *req;
2630 {
2631 int retcode;
2632 int s;
2633 RF_Raid_t *raidPtr;
2634
2635 s = splbio();
2636 raidPtr = req->raidPtr;
2637 raidPtr->recon_in_progress = 1;
2638 retcode = rf_ReconstructInPlace(raidPtr, req->row, req->col);
2639 RF_Free(req, sizeof(*req));
2640 raidPtr->recon_in_progress = 0;
2641 splx(s);
2642
2643 /* That's all... */
2644 kthread_exit(0); /* does not return */
2645 }
2646
2647 void
2648 rf_mountroot_hook(dev)
2649 struct device *dev;
2650 {
2651
2652 }
2653
2654
2655 RF_AutoConfig_t *
2656 rf_find_raid_components()
2657 {
2658 struct devnametobdevmaj *dtobdm;
2659 struct vnode *vp;
2660 struct disklabel label;
2661 struct device *dv;
2662 char *cd_name;
2663 dev_t dev;
2664 int error;
2665 int i;
2666 int good_one;
2667 RF_ComponentLabel_t *clabel;
2668 RF_AutoConfig_t *ac_list;
2669 RF_AutoConfig_t *ac;
2670
2671
2672 /* initialize the AutoConfig list */
2673 ac_list = NULL;
2674
2675 /* we begin by trolling through *all* the devices on the system */
2676
2677 for (dv = alldevs.tqh_first; dv != NULL;
2678 dv = dv->dv_list.tqe_next) {
2679
2680 /* we are only interested in disks... */
2681 if (dv->dv_class != DV_DISK)
2682 continue;
2683
2684 /* we don't care about floppies... */
2685 if (!strcmp(dv->dv_cfdata->cf_driver->cd_name,"fd")) {
2686 continue;
2687 }
2688 /* hdfd is the Atari/Hades floppy driver */
2689 if (!strcmp(dv->dv_cfdata->cf_driver->cd_name,"hdfd")) {
2690 continue;
2691 }
2692
2693 /* need to find the device_name_to_block_device_major stuff */
2694 cd_name = dv->dv_cfdata->cf_driver->cd_name;
2695 dtobdm = dev_name2blk;
2696 while (dtobdm->d_name && strcmp(dtobdm->d_name, cd_name)) {
2697 dtobdm++;
2698 }
2699
2700 /* get a vnode for the raw partition of this disk */
2701
2702 dev = MAKEDISKDEV(dtobdm->d_maj, dv->dv_unit, RAW_PART);
2703 if (bdevvp(dev, &vp))
2704 panic("RAID can't alloc vnode");
2705
2706 error = VOP_OPEN(vp, FREAD, NOCRED, 0);
2707
2708 if (error) {
2709 /* "Who cares." Continue looking
2710 for something that exists*/
2711 vput(vp);
2712 continue;
2713 }
2714
2715 /* Ok, the disk exists. Go get the disklabel. */
2716 error = VOP_IOCTL(vp, DIOCGDINFO, (caddr_t)&label,
2717 FREAD, NOCRED, 0);
2718 if (error) {
2719 /*
2720 * XXX can't happen - open() would
2721 * have errored out (or faked up one)
2722 */
2723 printf("can't get label for dev %s%c (%d)!?!?\n",
2724 dv->dv_xname, 'a' + RAW_PART, error);
2725 }
2726
2727 /* don't need this any more. We'll allocate it again
2728 a little later if we really do... */
2729 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2730 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
2731 vput(vp);
2732
2733 for (i=0; i < label.d_npartitions; i++) {
2734 /* We only support partitions marked as RAID */
2735 if (label.d_partitions[i].p_fstype != FS_RAID)
2736 continue;
2737
2738 dev = MAKEDISKDEV(dtobdm->d_maj, dv->dv_unit, i);
2739 if (bdevvp(dev, &vp))
2740 panic("RAID can't alloc vnode");
2741
2742 error = VOP_OPEN(vp, FREAD, NOCRED, 0);
2743 if (error) {
2744 /* Whatever... */
2745 vput(vp);
2746 continue;
2747 }
2748
2749 good_one = 0;
2750
2751 clabel = (RF_ComponentLabel_t *)
2752 malloc(sizeof(RF_ComponentLabel_t),
2753 M_RAIDFRAME, M_NOWAIT);
2754 if (clabel == NULL) {
2755 /* XXX CLEANUP HERE */
2756 printf("RAID auto config: out of memory!\n");
2757 return(NULL); /* XXX probably should panic? */
2758 }
2759
2760 if (!raidread_component_label(dev, vp, clabel)) {
2761 /* Got the label. Does it look reasonable? */
2762 if (rf_reasonable_label(clabel) &&
2763 (clabel->partitionSize <=
2764 label.d_partitions[i].p_size)) {
2765 #if DEBUG
2766 printf("Component on: %s%c: %d\n",
2767 dv->dv_xname, 'a'+i,
2768 label.d_partitions[i].p_size);
2769 rf_print_component_label(clabel);
2770 #endif
2771 /* if it's reasonable, add it,
2772 else ignore it. */
2773 ac = (RF_AutoConfig_t *)
2774 malloc(sizeof(RF_AutoConfig_t),
2775 M_RAIDFRAME,
2776 M_NOWAIT);
2777 if (ac == NULL) {
2778 /* XXX should panic?? */
2779 return(NULL);
2780 }
2781
2782 sprintf(ac->devname, "%s%c",
2783 dv->dv_xname, 'a'+i);
2784 ac->dev = dev;
2785 ac->vp = vp;
2786 ac->clabel = clabel;
2787 ac->next = ac_list;
2788 ac_list = ac;
2789 good_one = 1;
2790 }
2791 }
2792 if (!good_one) {
2793 /* cleanup */
2794 free(clabel, M_RAIDFRAME);
2795 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2796 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
2797 vput(vp);
2798 }
2799 }
2800 }
2801 return(ac_list);
2802 }
2803
2804 static int
2805 rf_reasonable_label(clabel)
2806 RF_ComponentLabel_t *clabel;
2807 {
2808
2809 if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) ||
2810 (clabel->version==RF_COMPONENT_LABEL_VERSION)) &&
2811 ((clabel->clean == RF_RAID_CLEAN) ||
2812 (clabel->clean == RF_RAID_DIRTY)) &&
2813 clabel->row >=0 &&
2814 clabel->column >= 0 &&
2815 clabel->num_rows > 0 &&
2816 clabel->num_columns > 0 &&
2817 clabel->row < clabel->num_rows &&
2818 clabel->column < clabel->num_columns &&
2819 clabel->blockSize > 0 &&
2820 clabel->numBlocks > 0) {
2821 /* label looks reasonable enough... */
2822 return(1);
2823 }
2824 return(0);
2825 }
2826
2827
2828 void
2829 rf_print_component_label(clabel)
2830 RF_ComponentLabel_t *clabel;
2831 {
2832 printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
2833 clabel->row, clabel->column,
2834 clabel->num_rows, clabel->num_columns);
2835 printf(" Version: %d Serial Number: %d Mod Counter: %d\n",
2836 clabel->version, clabel->serial_number,
2837 clabel->mod_counter);
2838 printf(" Clean: %s Status: %d\n",
2839 clabel->clean ? "Yes" : "No", clabel->status );
2840 printf(" sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
2841 clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
2842 printf(" RAID Level: %c blocksize: %d numBlocks: %d\n",
2843 (char) clabel->parityConfig, clabel->blockSize,
2844 clabel->numBlocks);
2845 printf(" Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No" );
2846 printf(" Contains root partition: %s\n",
2847 clabel->root_partition ? "Yes" : "No" );
2848 printf(" Last configured as: raid%d\n", clabel->last_unit );
2849 #if 0
2850 printf(" Config order: %d\n", clabel->config_order);
2851 #endif
2852
2853 }
2854
2855 RF_ConfigSet_t *
2856 rf_create_auto_sets(ac_list)
2857 RF_AutoConfig_t *ac_list;
2858 {
2859 RF_AutoConfig_t *ac;
2860 RF_ConfigSet_t *config_sets;
2861 RF_ConfigSet_t *cset;
2862 RF_AutoConfig_t *ac_next;
2863
2864
2865 config_sets = NULL;
2866
2867 /* Go through the AutoConfig list, and figure out which components
2868 belong to what sets. */
2869 ac = ac_list;
2870 while(ac!=NULL) {
2871 /* we're going to putz with ac->next, so save it here
2872 for use at the end of the loop */
2873 ac_next = ac->next;
2874
2875 if (config_sets == NULL) {
2876 /* will need at least this one... */
2877 config_sets = (RF_ConfigSet_t *)
2878 malloc(sizeof(RF_ConfigSet_t),
2879 M_RAIDFRAME, M_NOWAIT);
2880 if (config_sets == NULL) {
2881 panic("rf_create_auto_sets: No memory!\n");
2882 }
2883 /* this one is easy :) */
2884 config_sets->ac = ac;
2885 config_sets->next = NULL;
2886 config_sets->rootable = 0;
2887 ac->next = NULL;
2888 } else {
2889 /* which set does this component fit into? */
2890 cset = config_sets;
2891 while(cset!=NULL) {
2892 if (rf_does_it_fit(cset, ac)) {
2893 /* looks like it matches... */
2894 ac->next = cset->ac;
2895 cset->ac = ac;
2896 break;
2897 }
2898 cset = cset->next;
2899 }
2900 if (cset==NULL) {
2901 /* didn't find a match above... new set..*/
2902 cset = (RF_ConfigSet_t *)
2903 malloc(sizeof(RF_ConfigSet_t),
2904 M_RAIDFRAME, M_NOWAIT);
2905 if (cset == NULL) {
2906 panic("rf_create_auto_sets: No memory!\n");
2907 }
2908 cset->ac = ac;
2909 ac->next = NULL;
2910 cset->next = config_sets;
2911 cset->rootable = 0;
2912 config_sets = cset;
2913 }
2914 }
2915 ac = ac_next;
2916 }
2917
2918
2919 return(config_sets);
2920 }
2921
2922 static int
2923 rf_does_it_fit(cset, ac)
2924 RF_ConfigSet_t *cset;
2925 RF_AutoConfig_t *ac;
2926 {
2927 RF_ComponentLabel_t *clabel1, *clabel2;
2928
2929 /* If this one matches the *first* one in the set, that's good
2930 enough, since the other members of the set would have been
2931 through here too... */
2932 /* note that we are not checking partitionSize here..
2933
2934 Note that we are also not checking the mod_counters here.
2935 If everything else matches execpt the mod_counter, that's
2936 good enough for this test. We will deal with the mod_counters
2937 a little later in the autoconfiguration process.
2938
2939 (clabel1->mod_counter == clabel2->mod_counter) &&
2940
2941 The reason we don't check for this is that failed disks
2942 will have lower modification counts. If those disks are
2943 not added to the set they used to belong to, then they will
2944 form their own set, which may result in 2 different sets,
2945 for example, competing to be configured at raid0, and
2946 perhaps competing to be the root filesystem set. If the
2947 wrong ones get configured, or both attempt to become /,
2948 weird behaviour and or serious lossage will occur. Thus we
2949 need to bring them into the fold here, and kick them out at
2950 a later point.
2951
2952 */
2953
2954 clabel1 = cset->ac->clabel;
2955 clabel2 = ac->clabel;
2956 if ((clabel1->version == clabel2->version) &&
2957 (clabel1->serial_number == clabel2->serial_number) &&
2958 (clabel1->num_rows == clabel2->num_rows) &&
2959 (clabel1->num_columns == clabel2->num_columns) &&
2960 (clabel1->sectPerSU == clabel2->sectPerSU) &&
2961 (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
2962 (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
2963 (clabel1->parityConfig == clabel2->parityConfig) &&
2964 (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
2965 (clabel1->blockSize == clabel2->blockSize) &&
2966 (clabel1->numBlocks == clabel2->numBlocks) &&
2967 (clabel1->autoconfigure == clabel2->autoconfigure) &&
2968 (clabel1->root_partition == clabel2->root_partition) &&
2969 (clabel1->last_unit == clabel2->last_unit) &&
2970 (clabel1->config_order == clabel2->config_order)) {
2971 /* if it get's here, it almost *has* to be a match */
2972 } else {
2973 /* it's not consistent with somebody in the set..
2974 punt */
2975 return(0);
2976 }
2977 /* all was fine.. it must fit... */
2978 return(1);
2979 }
2980
2981 int
2982 rf_have_enough_components(cset)
2983 RF_ConfigSet_t *cset;
2984 {
2985 RF_AutoConfig_t *ac;
2986 RF_AutoConfig_t *auto_config;
2987 RF_ComponentLabel_t *clabel;
2988 int r,c;
2989 int num_rows;
2990 int num_cols;
2991 int num_missing;
2992 int mod_counter;
2993 int mod_counter_found;
2994 int even_pair_failed;
2995 char parity_type;
2996
2997
2998 /* check to see that we have enough 'live' components
2999 of this set. If so, we can configure it if necessary */
3000
3001 num_rows = cset->ac->clabel->num_rows;
3002 num_cols = cset->ac->clabel->num_columns;
3003 parity_type = cset->ac->clabel->parityConfig;
3004
3005 /* XXX Check for duplicate components!?!?!? */
3006
3007 /* Determine what the mod_counter is supposed to be for this set. */
3008
3009 mod_counter_found = 0;
3010 mod_counter = 0;
3011 ac = cset->ac;
3012 while(ac!=NULL) {
3013 if (mod_counter_found==0) {
3014 mod_counter = ac->clabel->mod_counter;
3015 mod_counter_found = 1;
3016 } else {
3017 if (ac->clabel->mod_counter > mod_counter) {
3018 mod_counter = ac->clabel->mod_counter;
3019 }
3020 }
3021 ac = ac->next;
3022 }
3023
3024 num_missing = 0;
3025 auto_config = cset->ac;
3026
3027 for(r=0; r<num_rows; r++) {
3028 even_pair_failed = 0;
3029 for(c=0; c<num_cols; c++) {
3030 ac = auto_config;
3031 while(ac!=NULL) {
3032 if ((ac->clabel->row == r) &&
3033 (ac->clabel->column == c) &&
3034 (ac->clabel->mod_counter == mod_counter)) {
3035 /* it's this one... */
3036 #if DEBUG
3037 printf("Found: %s at %d,%d\n",
3038 ac->devname,r,c);
3039 #endif
3040 break;
3041 }
3042 ac=ac->next;
3043 }
3044 if (ac==NULL) {
3045 /* Didn't find one here! */
3046 /* special case for RAID 1, especially
3047 where there are more than 2
3048 components (where RAIDframe treats
3049 things a little differently :( ) */
3050 if (parity_type == '1') {
3051 if (c%2 == 0) { /* even component */
3052 even_pair_failed = 1;
3053 } else { /* odd component. If
3054 we're failed, and
3055 so is the even
3056 component, it's
3057 "Good Night, Charlie" */
3058 if (even_pair_failed == 1) {
3059 return(0);
3060 }
3061 }
3062 } else {
3063 /* normal accounting */
3064 num_missing++;
3065 }
3066 }
3067 if ((parity_type == '1') && (c%2 == 1)) {
3068 /* Just did an even component, and we didn't
3069 bail.. reset the even_pair_failed flag,
3070 and go on to the next component.... */
3071 even_pair_failed = 0;
3072 }
3073 }
3074 }
3075
3076 clabel = cset->ac->clabel;
3077
3078 if (((clabel->parityConfig == '0') && (num_missing > 0)) ||
3079 ((clabel->parityConfig == '4') && (num_missing > 1)) ||
3080 ((clabel->parityConfig == '5') && (num_missing > 1))) {
3081 /* XXX this needs to be made *much* more general */
3082 /* Too many failures */
3083 return(0);
3084 }
3085 /* otherwise, all is well, and we've got enough to take a kick
3086 at autoconfiguring this set */
3087 return(1);
3088 }
3089
3090 void
3091 rf_create_configuration(ac,config,raidPtr)
3092 RF_AutoConfig_t *ac;
3093 RF_Config_t *config;
3094 RF_Raid_t *raidPtr;
3095 {
3096 RF_ComponentLabel_t *clabel;
3097 int i;
3098
3099 clabel = ac->clabel;
3100
3101 /* 1. Fill in the common stuff */
3102 config->numRow = clabel->num_rows;
3103 config->numCol = clabel->num_columns;
3104 config->numSpare = 0; /* XXX should this be set here? */
3105 config->sectPerSU = clabel->sectPerSU;
3106 config->SUsPerPU = clabel->SUsPerPU;
3107 config->SUsPerRU = clabel->SUsPerRU;
3108 config->parityConfig = clabel->parityConfig;
3109 /* XXX... */
3110 strcpy(config->diskQueueType,"fifo");
3111 config->maxOutstandingDiskReqs = clabel->maxOutstanding;
3112 config->layoutSpecificSize = 0; /* XXX ?? */
3113
3114 while(ac!=NULL) {
3115 /* row/col values will be in range due to the checks
3116 in reasonable_label() */
3117 strcpy(config->devnames[ac->clabel->row][ac->clabel->column],
3118 ac->devname);
3119 ac = ac->next;
3120 }
3121
3122 for(i=0;i<RF_MAXDBGV;i++) {
3123 config->debugVars[i][0] = NULL;
3124 }
3125 }
3126
3127 int
3128 rf_set_autoconfig(raidPtr, new_value)
3129 RF_Raid_t *raidPtr;
3130 int new_value;
3131 {
3132 RF_ComponentLabel_t clabel;
3133 struct vnode *vp;
3134 dev_t dev;
3135 int row, column;
3136
3137 raidPtr->autoconfigure = new_value;
3138 for(row=0; row<raidPtr->numRow; row++) {
3139 for(column=0; column<raidPtr->numCol; column++) {
3140 if (raidPtr->Disks[row][column].status ==
3141 rf_ds_optimal) {
3142 dev = raidPtr->Disks[row][column].dev;
3143 vp = raidPtr->raid_cinfo[row][column].ci_vp;
3144 raidread_component_label(dev, vp, &clabel);
3145 clabel.autoconfigure = new_value;
3146 raidwrite_component_label(dev, vp, &clabel);
3147 }
3148 }
3149 }
3150 return(new_value);
3151 }
3152
3153 int
3154 rf_set_rootpartition(raidPtr, new_value)
3155 RF_Raid_t *raidPtr;
3156 int new_value;
3157 {
3158 RF_ComponentLabel_t clabel;
3159 struct vnode *vp;
3160 dev_t dev;
3161 int row, column;
3162
3163 raidPtr->root_partition = new_value;
3164 for(row=0; row<raidPtr->numRow; row++) {
3165 for(column=0; column<raidPtr->numCol; column++) {
3166 if (raidPtr->Disks[row][column].status ==
3167 rf_ds_optimal) {
3168 dev = raidPtr->Disks[row][column].dev;
3169 vp = raidPtr->raid_cinfo[row][column].ci_vp;
3170 raidread_component_label(dev, vp, &clabel);
3171 clabel.root_partition = new_value;
3172 raidwrite_component_label(dev, vp, &clabel);
3173 }
3174 }
3175 }
3176 return(new_value);
3177 }
3178
3179 void
3180 rf_release_all_vps(cset)
3181 RF_ConfigSet_t *cset;
3182 {
3183 RF_AutoConfig_t *ac;
3184
3185 ac = cset->ac;
3186 while(ac!=NULL) {
3187 /* Close the vp, and give it back */
3188 if (ac->vp) {
3189 vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY);
3190 VOP_CLOSE(ac->vp, FREAD, NOCRED, 0);
3191 vput(ac->vp);
3192 ac->vp = NULL;
3193 }
3194 ac = ac->next;
3195 }
3196 }
3197
3198
3199 void
3200 rf_cleanup_config_set(cset)
3201 RF_ConfigSet_t *cset;
3202 {
3203 RF_AutoConfig_t *ac;
3204 RF_AutoConfig_t *next_ac;
3205
3206 ac = cset->ac;
3207 while(ac!=NULL) {
3208 next_ac = ac->next;
3209 /* nuke the label */
3210 free(ac->clabel, M_RAIDFRAME);
3211 /* cleanup the config structure */
3212 free(ac, M_RAIDFRAME);
3213 /* "next.." */
3214 ac = next_ac;
3215 }
3216 /* and, finally, nuke the config set */
3217 free(cset, M_RAIDFRAME);
3218 }
3219
3220
3221 void
3222 raid_init_component_label(raidPtr, clabel)
3223 RF_Raid_t *raidPtr;
3224 RF_ComponentLabel_t *clabel;
3225 {
3226 /* current version number */
3227 clabel->version = RF_COMPONENT_LABEL_VERSION;
3228 clabel->serial_number = raidPtr->serial_number;
3229 clabel->mod_counter = raidPtr->mod_counter;
3230 clabel->num_rows = raidPtr->numRow;
3231 clabel->num_columns = raidPtr->numCol;
3232 clabel->clean = RF_RAID_DIRTY; /* not clean */
3233 clabel->status = rf_ds_optimal; /* "It's good!" */
3234
3235 clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
3236 clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
3237 clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
3238
3239 clabel->blockSize = raidPtr->bytesPerSector;
3240 clabel->numBlocks = raidPtr->sectorsPerDisk;
3241
3242 /* XXX not portable */
3243 clabel->parityConfig = raidPtr->Layout.map->parityConfig;
3244 clabel->maxOutstanding = raidPtr->maxOutstanding;
3245 clabel->autoconfigure = raidPtr->autoconfigure;
3246 clabel->root_partition = raidPtr->root_partition;
3247 clabel->last_unit = raidPtr->raidid;
3248 clabel->config_order = raidPtr->config_order;
3249 }
3250
3251 int
3252 rf_auto_config_set(cset,unit)
3253 RF_ConfigSet_t *cset;
3254 int *unit;
3255 {
3256 RF_Raid_t *raidPtr;
3257 RF_Config_t *config;
3258 int raidID;
3259 int retcode;
3260
3261 printf("RAID autoconfigure\n");
3262
3263 retcode = 0;
3264 *unit = -1;
3265
3266 /* 1. Create a config structure */
3267
3268 config = (RF_Config_t *)malloc(sizeof(RF_Config_t),
3269 M_RAIDFRAME,
3270 M_NOWAIT);
3271 if (config==NULL) {
3272 printf("Out of mem!?!?\n");
3273 /* XXX do something more intelligent here. */
3274 return(1);
3275 }
3276
3277 memset(config, 0, sizeof(RF_Config_t));
3278
3279 /* XXX raidID needs to be set correctly.. */
3280
3281 /*
3282 2. Figure out what RAID ID this one is supposed to live at
3283 See if we can get the same RAID dev that it was configured
3284 on last time..
3285 */
3286
3287 raidID = cset->ac->clabel->last_unit;
3288 if ((raidID < 0) || (raidID >= numraid)) {
3289 /* let's not wander off into lala land. */
3290 raidID = numraid - 1;
3291 }
3292 if (raidPtrs[raidID]->valid != 0) {
3293
3294 /*
3295 Nope... Go looking for an alternative...
3296 Start high so we don't immediately use raid0 if that's
3297 not taken.
3298 */
3299
3300 for(raidID = numraid - 1; raidID >= 0; raidID--) {
3301 if (raidPtrs[raidID]->valid == 0) {
3302 /* can use this one! */
3303 break;
3304 }
3305 }
3306 }
3307
3308 if (raidID < 0) {
3309 /* punt... */
3310 printf("Unable to auto configure this set!\n");
3311 printf("(Out of RAID devs!)\n");
3312 return(1);
3313 }
3314 printf("Configuring raid%d:\n",raidID);
3315 raidPtr = raidPtrs[raidID];
3316
3317 /* XXX all this stuff should be done SOMEWHERE ELSE! */
3318 raidPtr->raidid = raidID;
3319 raidPtr->openings = RAIDOUTSTANDING;
3320
3321 /* 3. Build the configuration structure */
3322 rf_create_configuration(cset->ac, config, raidPtr);
3323
3324 /* 4. Do the configuration */
3325 retcode = rf_Configure(raidPtr, config, cset->ac);
3326
3327 if (retcode == 0) {
3328
3329 raidinit(raidPtrs[raidID]);
3330
3331 rf_markalldirty(raidPtrs[raidID]);
3332 raidPtrs[raidID]->autoconfigure = 1; /* XXX do this here? */
3333 if (cset->ac->clabel->root_partition==1) {
3334 /* everything configured just fine. Make a note
3335 that this set is eligible to be root. */
3336 cset->rootable = 1;
3337 /* XXX do this here? */
3338 raidPtrs[raidID]->root_partition = 1;
3339 }
3340 }
3341
3342 /* 5. Cleanup */
3343 free(config, M_RAIDFRAME);
3344
3345 *unit = raidID;
3346 return(retcode);
3347 }
3348
3349 void
3350 rf_disk_unbusy(desc)
3351 RF_RaidAccessDesc_t *desc;
3352 {
3353 struct buf *bp;
3354
3355 bp = (struct buf *)desc->bp;
3356 disk_unbusy(&raid_softc[desc->raidPtr->raidid].sc_dkdev,
3357 (bp->b_bcount - bp->b_resid));
3358 }
3359