rf_netbsdkintf.c revision 1.117.6.1 1 /* $NetBSD: rf_netbsdkintf.c,v 1.117.6.1 2002/05/16 11:47:15 gehenna Exp $ */
2 /*-
3 * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
4 * All rights reserved.
5 *
6 * This code is derived from software contributed to The NetBSD Foundation
7 * by Greg Oster; Jason R. Thorpe.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed by the NetBSD
20 * Foundation, Inc. and its contributors.
21 * 4. Neither the name of The NetBSD Foundation nor the names of its
22 * contributors may be used to endorse or promote products derived
23 * from this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
26 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
36 */
37
38 /*
39 * Copyright (c) 1988 University of Utah.
40 * Copyright (c) 1990, 1993
41 * The Regents of the University of California. All rights reserved.
42 *
43 * This code is derived from software contributed to Berkeley by
44 * the Systems Programming Group of the University of Utah Computer
45 * Science Department.
46 *
47 * Redistribution and use in source and binary forms, with or without
48 * modification, are permitted provided that the following conditions
49 * are met:
50 * 1. Redistributions of source code must retain the above copyright
51 * notice, this list of conditions and the following disclaimer.
52 * 2. Redistributions in binary form must reproduce the above copyright
53 * notice, this list of conditions and the following disclaimer in the
54 * documentation and/or other materials provided with the distribution.
55 * 3. All advertising materials mentioning features or use of this software
56 * must display the following acknowledgement:
57 * This product includes software developed by the University of
58 * California, Berkeley and its contributors.
59 * 4. Neither the name of the University nor the names of its contributors
60 * may be used to endorse or promote products derived from this software
61 * without specific prior written permission.
62 *
63 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
64 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
65 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
66 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
67 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
68 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
69 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
70 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
71 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
72 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
73 * SUCH DAMAGE.
74 *
75 * from: Utah $Hdr: cd.c 1.6 90/11/28$
76 *
77 * @(#)cd.c 8.2 (Berkeley) 11/16/93
78 */
79
80
81
82
83 /*
84 * Copyright (c) 1995 Carnegie-Mellon University.
85 * All rights reserved.
86 *
87 * Authors: Mark Holland, Jim Zelenka
88 *
89 * Permission to use, copy, modify and distribute this software and
90 * its documentation is hereby granted, provided that both the copyright
91 * notice and this permission notice appear in all copies of the
92 * software, derivative works or modified versions, and any portions
93 * thereof, and that both notices appear in supporting documentation.
94 *
95 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
96 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
97 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
98 *
99 * Carnegie Mellon requests users of this software to return to
100 *
101 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
102 * School of Computer Science
103 * Carnegie Mellon University
104 * Pittsburgh PA 15213-3890
105 *
106 * any improvements or extensions that they make and grant Carnegie the
107 * rights to redistribute these changes.
108 */
109
110 /***********************************************************
111 *
112 * rf_kintf.c -- the kernel interface routines for RAIDframe
113 *
114 ***********************************************************/
115
116 #include <sys/cdefs.h>
117 __KERNEL_RCSID(0, "$NetBSD: rf_netbsdkintf.c,v 1.117.6.1 2002/05/16 11:47:15 gehenna Exp $");
118
119 #include <sys/param.h>
120 #include <sys/errno.h>
121 #include <sys/pool.h>
122 #include <sys/queue.h>
123 #include <sys/disk.h>
124 #include <sys/device.h>
125 #include <sys/stat.h>
126 #include <sys/ioctl.h>
127 #include <sys/fcntl.h>
128 #include <sys/systm.h>
129 #include <sys/namei.h>
130 #include <sys/vnode.h>
131 #include <sys/disklabel.h>
132 #include <sys/conf.h>
133 #include <sys/lock.h>
134 #include <sys/buf.h>
135 #include <sys/user.h>
136 #include <sys/reboot.h>
137
138 #include <dev/raidframe/raidframevar.h>
139 #include <dev/raidframe/raidframeio.h>
140 #include "raid.h"
141 #include "opt_raid_autoconfig.h"
142 #include "rf_raid.h"
143 #include "rf_copyback.h"
144 #include "rf_dag.h"
145 #include "rf_dagflags.h"
146 #include "rf_desc.h"
147 #include "rf_diskqueue.h"
148 #include "rf_acctrace.h"
149 #include "rf_etimer.h"
150 #include "rf_general.h"
151 #include "rf_debugMem.h"
152 #include "rf_kintf.h"
153 #include "rf_options.h"
154 #include "rf_driver.h"
155 #include "rf_parityscan.h"
156 #include "rf_debugprint.h"
157 #include "rf_threadstuff.h"
158
159 int rf_kdebug_level = 0;
160
161 #ifdef DEBUG
162 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
163 #else /* DEBUG */
164 #define db1_printf(a) { }
165 #endif /* DEBUG */
166
167 static RF_Raid_t **raidPtrs; /* global raid device descriptors */
168
169 RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex)
170
171 static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a
172 * spare table */
173 static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from
174 * installation process */
175
176 /* prototypes */
177 static void KernelWakeupFunc(struct buf * bp);
178 static void InitBP(struct buf * bp, struct vnode *, unsigned rw_flag,
179 dev_t dev, RF_SectorNum_t startSect,
180 RF_SectorCount_t numSect, caddr_t buf,
181 void (*cbFunc) (struct buf *), void *cbArg,
182 int logBytesPerSector, struct proc * b_proc);
183 static void raidinit(RF_Raid_t *);
184
185 void raidattach(int);
186
187 dev_type_open(raidopen);
188 dev_type_close(raidclose);
189 dev_type_read(raidread);
190 dev_type_write(raidwrite);
191 dev_type_ioctl(raidioctl);
192 dev_type_strategy(raidstrategy);
193 dev_type_dump(raiddump);
194 dev_type_size(raidsize);
195
196 const struct bdevsw raid_bdevsw = {
197 raidopen, raidclose, raidstrategy, raidioctl,
198 raiddump, raidsize, D_DISK
199 };
200
201 const struct cdevsw raid_cdevsw = {
202 raidopen, raidclose, raidread, raidwrite, raidioctl,
203 nostop, notty, nopoll, nommap, D_DISK
204 };
205
206 /*
207 * Pilfered from ccd.c
208 */
209
210 struct raidbuf {
211 struct buf rf_buf; /* new I/O buf. MUST BE FIRST!!! */
212 struct buf *rf_obp; /* ptr. to original I/O buf */
213 int rf_flags; /* misc. flags */
214 RF_DiskQueueData_t *req;/* the request that this was part of.. */
215 };
216
217 /* component buffer pool */
218 struct pool raidframe_cbufpool;
219
220 #define RAIDGETBUF(rs) pool_get(&raidframe_cbufpool, PR_NOWAIT)
221 #define RAIDPUTBUF(rs, cbp) pool_put(&raidframe_cbufpool, cbp)
222
223 /* XXX Not sure if the following should be replacing the raidPtrs above,
224 or if it should be used in conjunction with that...
225 */
226
227 struct raid_softc {
228 int sc_flags; /* flags */
229 int sc_cflags; /* configuration flags */
230 size_t sc_size; /* size of the raid device */
231 char sc_xname[20]; /* XXX external name */
232 struct disk sc_dkdev; /* generic disk device info */
233 struct buf_queue buf_queue; /* used for the device queue */
234 };
235 /* sc_flags */
236 #define RAIDF_INITED 0x01 /* unit has been initialized */
237 #define RAIDF_WLABEL 0x02 /* label area is writable */
238 #define RAIDF_LABELLING 0x04 /* unit is currently being labelled */
239 #define RAIDF_WANTED 0x40 /* someone is waiting to obtain a lock */
240 #define RAIDF_LOCKED 0x80 /* unit is locked */
241
242 #define raidunit(x) DISKUNIT(x)
243 int numraid = 0;
244
245 /*
246 * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
247 * Be aware that large numbers can allow the driver to consume a lot of
248 * kernel memory, especially on writes, and in degraded mode reads.
249 *
250 * For example: with a stripe width of 64 blocks (32k) and 5 disks,
251 * a single 64K write will typically require 64K for the old data,
252 * 64K for the old parity, and 64K for the new parity, for a total
253 * of 192K (if the parity buffer is not re-used immediately).
254 * Even it if is used immediately, that's still 128K, which when multiplied
255 * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
256 *
257 * Now in degraded mode, for example, a 64K read on the above setup may
258 * require data reconstruction, which will require *all* of the 4 remaining
259 * disks to participate -- 4 * 32K/disk == 128K again.
260 */
261
262 #ifndef RAIDOUTSTANDING
263 #define RAIDOUTSTANDING 6
264 #endif
265
266 #define RAIDLABELDEV(dev) \
267 (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
268
269 /* declared here, and made public, for the benefit of KVM stuff.. */
270 struct raid_softc *raid_softc;
271
272 static void raidgetdefaultlabel(RF_Raid_t *, struct raid_softc *,
273 struct disklabel *);
274 static void raidgetdisklabel(dev_t);
275 static void raidmakedisklabel(struct raid_softc *);
276
277 static int raidlock(struct raid_softc *);
278 static void raidunlock(struct raid_softc *);
279
280 static void rf_markalldirty(RF_Raid_t *);
281 void rf_mountroot_hook(struct device *);
282
283 struct device *raidrootdev;
284
285 void rf_ReconThread(struct rf_recon_req *);
286 /* XXX what I want is: */
287 /*void rf_ReconThread(RF_Raid_t *raidPtr); */
288 void rf_RewriteParityThread(RF_Raid_t *raidPtr);
289 void rf_CopybackThread(RF_Raid_t *raidPtr);
290 void rf_ReconstructInPlaceThread(struct rf_recon_req *);
291 void rf_buildroothack(void *);
292
293 RF_AutoConfig_t *rf_find_raid_components(void);
294 RF_ConfigSet_t *rf_create_auto_sets(RF_AutoConfig_t *);
295 static int rf_does_it_fit(RF_ConfigSet_t *,RF_AutoConfig_t *);
296 static int rf_reasonable_label(RF_ComponentLabel_t *);
297 void rf_create_configuration(RF_AutoConfig_t *,RF_Config_t *, RF_Raid_t *);
298 int rf_set_autoconfig(RF_Raid_t *, int);
299 int rf_set_rootpartition(RF_Raid_t *, int);
300 void rf_release_all_vps(RF_ConfigSet_t *);
301 void rf_cleanup_config_set(RF_ConfigSet_t *);
302 int rf_have_enough_components(RF_ConfigSet_t *);
303 int rf_auto_config_set(RF_ConfigSet_t *, int *);
304
305 static int raidautoconfig = 0; /* Debugging, mostly. Set to 0 to not
306 allow autoconfig to take place.
307 Note that this is overridden by having
308 RAID_AUTOCONFIG as an option in the
309 kernel config file. */
310
311 void
312 raidattach(num)
313 int num;
314 {
315 int raidID;
316 int i, rc;
317 RF_AutoConfig_t *ac_list; /* autoconfig list */
318 RF_ConfigSet_t *config_sets;
319
320 #ifdef DEBUG
321 printf("raidattach: Asked for %d units\n", num);
322 #endif
323
324 if (num <= 0) {
325 #ifdef DIAGNOSTIC
326 panic("raidattach: count <= 0");
327 #endif
328 return;
329 }
330 /* This is where all the initialization stuff gets done. */
331
332 numraid = num;
333
334 /* Make some space for requested number of units... */
335
336 RF_Calloc(raidPtrs, num, sizeof(RF_Raid_t *), (RF_Raid_t **));
337 if (raidPtrs == NULL) {
338 panic("raidPtrs is NULL!!\n");
339 }
340
341 /* Initialize the component buffer pool. */
342 pool_init(&raidframe_cbufpool, sizeof(struct raidbuf), 0,
343 0, 0, "raidpl", NULL);
344
345 rc = rf_mutex_init(&rf_sparet_wait_mutex);
346 if (rc) {
347 RF_PANIC();
348 }
349
350 rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
351
352 for (i = 0; i < num; i++)
353 raidPtrs[i] = NULL;
354 rc = rf_BootRaidframe();
355 if (rc == 0)
356 printf("Kernelized RAIDframe activated\n");
357 else
358 panic("Serious error booting RAID!!\n");
359
360 /* put together some datastructures like the CCD device does.. This
361 * lets us lock the device and what-not when it gets opened. */
362
363 raid_softc = (struct raid_softc *)
364 malloc(num * sizeof(struct raid_softc),
365 M_RAIDFRAME, M_NOWAIT);
366 if (raid_softc == NULL) {
367 printf("WARNING: no memory for RAIDframe driver\n");
368 return;
369 }
370
371 memset(raid_softc, 0, num * sizeof(struct raid_softc));
372
373 raidrootdev = (struct device *)malloc(num * sizeof(struct device),
374 M_RAIDFRAME, M_NOWAIT);
375 if (raidrootdev == NULL) {
376 panic("No memory for RAIDframe driver!!?!?!\n");
377 }
378
379 for (raidID = 0; raidID < num; raidID++) {
380 BUFQ_INIT(&raid_softc[raidID].buf_queue);
381
382 raidrootdev[raidID].dv_class = DV_DISK;
383 raidrootdev[raidID].dv_cfdata = NULL;
384 raidrootdev[raidID].dv_unit = raidID;
385 raidrootdev[raidID].dv_parent = NULL;
386 raidrootdev[raidID].dv_flags = 0;
387 sprintf(raidrootdev[raidID].dv_xname,"raid%d",raidID);
388
389 RF_Calloc(raidPtrs[raidID], 1, sizeof(RF_Raid_t),
390 (RF_Raid_t *));
391 if (raidPtrs[raidID] == NULL) {
392 printf("WARNING: raidPtrs[%d] is NULL\n", raidID);
393 numraid = raidID;
394 return;
395 }
396 }
397
398 #ifdef RAID_AUTOCONFIG
399 raidautoconfig = 1;
400 #endif
401
402 if (raidautoconfig) {
403 /* 1. locate all RAID components on the system */
404
405 #if DEBUG
406 printf("Searching for raid components...\n");
407 #endif
408 ac_list = rf_find_raid_components();
409
410 /* 2. sort them into their respective sets */
411
412 config_sets = rf_create_auto_sets(ac_list);
413
414 /* 3. evaluate each set and configure the valid ones
415 This gets done in rf_buildroothack() */
416
417 /* schedule the creation of the thread to do the
418 "/ on RAID" stuff */
419
420 kthread_create(rf_buildroothack,config_sets);
421
422 #if 0
423 mountroothook_establish(rf_mountroot_hook, &raidrootdev[0]);
424 #endif
425 }
426
427 }
428
429 void
430 rf_buildroothack(arg)
431 void *arg;
432 {
433 RF_ConfigSet_t *config_sets = arg;
434 RF_ConfigSet_t *cset;
435 RF_ConfigSet_t *next_cset;
436 int retcode;
437 int raidID;
438 int rootID;
439 int num_root;
440
441 rootID = 0;
442 num_root = 0;
443 cset = config_sets;
444 while(cset != NULL ) {
445 next_cset = cset->next;
446 if (rf_have_enough_components(cset) &&
447 cset->ac->clabel->autoconfigure==1) {
448 retcode = rf_auto_config_set(cset,&raidID);
449 if (!retcode) {
450 if (cset->rootable) {
451 rootID = raidID;
452 num_root++;
453 }
454 } else {
455 /* The autoconfig didn't work :( */
456 #if DEBUG
457 printf("Autoconfig failed with code %d for raid%d\n", retcode, raidID);
458 #endif
459 rf_release_all_vps(cset);
460 }
461 } else {
462 /* we're not autoconfiguring this set...
463 release the associated resources */
464 rf_release_all_vps(cset);
465 }
466 /* cleanup */
467 rf_cleanup_config_set(cset);
468 cset = next_cset;
469 }
470 if (boothowto & RB_ASKNAME) {
471 /* We don't auto-config... */
472 } else {
473 /* They didn't ask, and we found something bootable... */
474
475 if (num_root == 1) {
476 booted_device = &raidrootdev[rootID];
477 } else if (num_root > 1) {
478 /* we can't guess.. require the user to answer... */
479 boothowto |= RB_ASKNAME;
480 }
481 }
482 }
483
484
485 int
486 raidsize(dev)
487 dev_t dev;
488 {
489 struct raid_softc *rs;
490 struct disklabel *lp;
491 int part, unit, omask, size;
492
493 unit = raidunit(dev);
494 if (unit >= numraid)
495 return (-1);
496 rs = &raid_softc[unit];
497
498 if ((rs->sc_flags & RAIDF_INITED) == 0)
499 return (-1);
500
501 part = DISKPART(dev);
502 omask = rs->sc_dkdev.dk_openmask & (1 << part);
503 lp = rs->sc_dkdev.dk_label;
504
505 if (omask == 0 && raidopen(dev, 0, S_IFBLK, curproc))
506 return (-1);
507
508 if (lp->d_partitions[part].p_fstype != FS_SWAP)
509 size = -1;
510 else
511 size = lp->d_partitions[part].p_size *
512 (lp->d_secsize / DEV_BSIZE);
513
514 if (omask == 0 && raidclose(dev, 0, S_IFBLK, curproc))
515 return (-1);
516
517 return (size);
518
519 }
520
521 int
522 raiddump(dev, blkno, va, size)
523 dev_t dev;
524 daddr_t blkno;
525 caddr_t va;
526 size_t size;
527 {
528 /* Not implemented. */
529 return ENXIO;
530 }
531 /* ARGSUSED */
532 int
533 raidopen(dev, flags, fmt, p)
534 dev_t dev;
535 int flags, fmt;
536 struct proc *p;
537 {
538 int unit = raidunit(dev);
539 struct raid_softc *rs;
540 struct disklabel *lp;
541 int part, pmask;
542 int error = 0;
543
544 if (unit >= numraid)
545 return (ENXIO);
546 rs = &raid_softc[unit];
547
548 if ((error = raidlock(rs)) != 0)
549 return (error);
550 lp = rs->sc_dkdev.dk_label;
551
552 part = DISKPART(dev);
553 pmask = (1 << part);
554
555 db1_printf(("Opening raid device number: %d partition: %d\n",
556 unit, part));
557
558
559 if ((rs->sc_flags & RAIDF_INITED) &&
560 (rs->sc_dkdev.dk_openmask == 0))
561 raidgetdisklabel(dev);
562
563 /* make sure that this partition exists */
564
565 if (part != RAW_PART) {
566 db1_printf(("Not a raw partition..\n"));
567 if (((rs->sc_flags & RAIDF_INITED) == 0) ||
568 ((part >= lp->d_npartitions) ||
569 (lp->d_partitions[part].p_fstype == FS_UNUSED))) {
570 error = ENXIO;
571 raidunlock(rs);
572 db1_printf(("Bailing out...\n"));
573 return (error);
574 }
575 }
576 /* Prevent this unit from being unconfigured while open. */
577 switch (fmt) {
578 case S_IFCHR:
579 rs->sc_dkdev.dk_copenmask |= pmask;
580 break;
581
582 case S_IFBLK:
583 rs->sc_dkdev.dk_bopenmask |= pmask;
584 break;
585 }
586
587 if ((rs->sc_dkdev.dk_openmask == 0) &&
588 ((rs->sc_flags & RAIDF_INITED) != 0)) {
589 /* First one... mark things as dirty... Note that we *MUST*
590 have done a configure before this. I DO NOT WANT TO BE
591 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
592 THAT THEY BELONG TOGETHER!!!!! */
593 /* XXX should check to see if we're only open for reading
594 here... If so, we needn't do this, but then need some
595 other way of keeping track of what's happened.. */
596
597 rf_markalldirty( raidPtrs[unit] );
598 }
599
600
601 rs->sc_dkdev.dk_openmask =
602 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
603
604 raidunlock(rs);
605
606 return (error);
607
608
609 }
610 /* ARGSUSED */
611 int
612 raidclose(dev, flags, fmt, p)
613 dev_t dev;
614 int flags, fmt;
615 struct proc *p;
616 {
617 int unit = raidunit(dev);
618 struct raid_softc *rs;
619 int error = 0;
620 int part;
621
622 if (unit >= numraid)
623 return (ENXIO);
624 rs = &raid_softc[unit];
625
626 if ((error = raidlock(rs)) != 0)
627 return (error);
628
629 part = DISKPART(dev);
630
631 /* ...that much closer to allowing unconfiguration... */
632 switch (fmt) {
633 case S_IFCHR:
634 rs->sc_dkdev.dk_copenmask &= ~(1 << part);
635 break;
636
637 case S_IFBLK:
638 rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
639 break;
640 }
641 rs->sc_dkdev.dk_openmask =
642 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
643
644 if ((rs->sc_dkdev.dk_openmask == 0) &&
645 ((rs->sc_flags & RAIDF_INITED) != 0)) {
646 /* Last one... device is not unconfigured yet.
647 Device shutdown has taken care of setting the
648 clean bits if RAIDF_INITED is not set
649 mark things as clean... */
650 #if 0
651 printf("Last one on raid%d. Updating status.\n",unit);
652 #endif
653 rf_update_component_labels(raidPtrs[unit],
654 RF_FINAL_COMPONENT_UPDATE);
655 if (doing_shutdown) {
656 /* last one, and we're going down, so
657 lights out for this RAID set too. */
658 error = rf_Shutdown(raidPtrs[unit]);
659
660 /* It's no longer initialized... */
661 rs->sc_flags &= ~RAIDF_INITED;
662
663 /* Detach the disk. */
664 disk_detach(&rs->sc_dkdev);
665 }
666 }
667
668 raidunlock(rs);
669 return (0);
670
671 }
672
673 void
674 raidstrategy(bp)
675 struct buf *bp;
676 {
677 int s;
678
679 unsigned int raidID = raidunit(bp->b_dev);
680 RF_Raid_t *raidPtr;
681 struct raid_softc *rs = &raid_softc[raidID];
682 struct disklabel *lp;
683 int wlabel;
684
685 if ((rs->sc_flags & RAIDF_INITED) ==0) {
686 bp->b_error = ENXIO;
687 bp->b_flags |= B_ERROR;
688 bp->b_resid = bp->b_bcount;
689 biodone(bp);
690 return;
691 }
692 if (raidID >= numraid || !raidPtrs[raidID]) {
693 bp->b_error = ENODEV;
694 bp->b_flags |= B_ERROR;
695 bp->b_resid = bp->b_bcount;
696 biodone(bp);
697 return;
698 }
699 raidPtr = raidPtrs[raidID];
700 if (!raidPtr->valid) {
701 bp->b_error = ENODEV;
702 bp->b_flags |= B_ERROR;
703 bp->b_resid = bp->b_bcount;
704 biodone(bp);
705 return;
706 }
707 if (bp->b_bcount == 0) {
708 db1_printf(("b_bcount is zero..\n"));
709 biodone(bp);
710 return;
711 }
712 lp = rs->sc_dkdev.dk_label;
713
714 /*
715 * Do bounds checking and adjust transfer. If there's an
716 * error, the bounds check will flag that for us.
717 */
718
719 wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
720 if (DISKPART(bp->b_dev) != RAW_PART)
721 if (bounds_check_with_label(bp, lp, wlabel) <= 0) {
722 db1_printf(("Bounds check failed!!:%d %d\n",
723 (int) bp->b_blkno, (int) wlabel));
724 biodone(bp);
725 return;
726 }
727 s = splbio();
728
729 bp->b_resid = 0;
730
731 /* stuff it onto our queue */
732 BUFQ_INSERT_TAIL(&rs->buf_queue, bp);
733
734 raidstart(raidPtrs[raidID]);
735
736 splx(s);
737 }
738 /* ARGSUSED */
739 int
740 raidread(dev, uio, flags)
741 dev_t dev;
742 struct uio *uio;
743 int flags;
744 {
745 int unit = raidunit(dev);
746 struct raid_softc *rs;
747 int part;
748
749 if (unit >= numraid)
750 return (ENXIO);
751 rs = &raid_softc[unit];
752
753 if ((rs->sc_flags & RAIDF_INITED) == 0)
754 return (ENXIO);
755 part = DISKPART(dev);
756
757 db1_printf(("raidread: unit: %d partition: %d\n", unit, part));
758
759 return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
760
761 }
762 /* ARGSUSED */
763 int
764 raidwrite(dev, uio, flags)
765 dev_t dev;
766 struct uio *uio;
767 int flags;
768 {
769 int unit = raidunit(dev);
770 struct raid_softc *rs;
771
772 if (unit >= numraid)
773 return (ENXIO);
774 rs = &raid_softc[unit];
775
776 if ((rs->sc_flags & RAIDF_INITED) == 0)
777 return (ENXIO);
778 db1_printf(("raidwrite\n"));
779 return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
780
781 }
782
783 int
784 raidioctl(dev, cmd, data, flag, p)
785 dev_t dev;
786 u_long cmd;
787 caddr_t data;
788 int flag;
789 struct proc *p;
790 {
791 int unit = raidunit(dev);
792 int error = 0;
793 int part, pmask;
794 struct raid_softc *rs;
795 RF_Config_t *k_cfg, *u_cfg;
796 RF_Raid_t *raidPtr;
797 RF_RaidDisk_t *diskPtr;
798 RF_AccTotals_t *totals;
799 RF_DeviceConfig_t *d_cfg, **ucfgp;
800 u_char *specific_buf;
801 int retcode = 0;
802 int row;
803 int column;
804 struct rf_recon_req *rrcopy, *rr;
805 RF_ComponentLabel_t *clabel;
806 RF_ComponentLabel_t ci_label;
807 RF_ComponentLabel_t **clabel_ptr;
808 RF_SingleComponent_t *sparePtr,*componentPtr;
809 RF_SingleComponent_t hot_spare;
810 RF_SingleComponent_t component;
811 RF_ProgressInfo_t progressInfo, **progressInfoPtr;
812 int i, j, d;
813 #ifdef __HAVE_OLD_DISKLABEL
814 struct disklabel newlabel;
815 #endif
816
817 if (unit >= numraid)
818 return (ENXIO);
819 rs = &raid_softc[unit];
820 raidPtr = raidPtrs[unit];
821
822 db1_printf(("raidioctl: %d %d %d %d\n", (int) dev,
823 (int) DISKPART(dev), (int) unit, (int) cmd));
824
825 /* Must be open for writes for these commands... */
826 switch (cmd) {
827 case DIOCSDINFO:
828 case DIOCWDINFO:
829 #ifdef __HAVE_OLD_DISKLABEL
830 case ODIOCWDINFO:
831 case ODIOCSDINFO:
832 #endif
833 case DIOCWLABEL:
834 if ((flag & FWRITE) == 0)
835 return (EBADF);
836 }
837
838 /* Must be initialized for these... */
839 switch (cmd) {
840 case DIOCGDINFO:
841 case DIOCSDINFO:
842 case DIOCWDINFO:
843 #ifdef __HAVE_OLD_DISKLABEL
844 case ODIOCGDINFO:
845 case ODIOCWDINFO:
846 case ODIOCSDINFO:
847 case ODIOCGDEFLABEL:
848 #endif
849 case DIOCGPART:
850 case DIOCWLABEL:
851 case DIOCGDEFLABEL:
852 case RAIDFRAME_SHUTDOWN:
853 case RAIDFRAME_REWRITEPARITY:
854 case RAIDFRAME_GET_INFO:
855 case RAIDFRAME_RESET_ACCTOTALS:
856 case RAIDFRAME_GET_ACCTOTALS:
857 case RAIDFRAME_KEEP_ACCTOTALS:
858 case RAIDFRAME_GET_SIZE:
859 case RAIDFRAME_FAIL_DISK:
860 case RAIDFRAME_COPYBACK:
861 case RAIDFRAME_CHECK_RECON_STATUS:
862 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
863 case RAIDFRAME_GET_COMPONENT_LABEL:
864 case RAIDFRAME_SET_COMPONENT_LABEL:
865 case RAIDFRAME_ADD_HOT_SPARE:
866 case RAIDFRAME_REMOVE_HOT_SPARE:
867 case RAIDFRAME_INIT_LABELS:
868 case RAIDFRAME_REBUILD_IN_PLACE:
869 case RAIDFRAME_CHECK_PARITY:
870 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
871 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
872 case RAIDFRAME_CHECK_COPYBACK_STATUS:
873 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
874 case RAIDFRAME_SET_AUTOCONFIG:
875 case RAIDFRAME_SET_ROOT:
876 case RAIDFRAME_DELETE_COMPONENT:
877 case RAIDFRAME_INCORPORATE_HOT_SPARE:
878 if ((rs->sc_flags & RAIDF_INITED) == 0)
879 return (ENXIO);
880 }
881
882 switch (cmd) {
883
884 /* configure the system */
885 case RAIDFRAME_CONFIGURE:
886
887 if (raidPtr->valid) {
888 /* There is a valid RAID set running on this unit! */
889 printf("raid%d: Device already configured!\n",unit);
890 return(EINVAL);
891 }
892
893 /* copy-in the configuration information */
894 /* data points to a pointer to the configuration structure */
895
896 u_cfg = *((RF_Config_t **) data);
897 RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
898 if (k_cfg == NULL) {
899 return (ENOMEM);
900 }
901 retcode = copyin((caddr_t) u_cfg, (caddr_t) k_cfg,
902 sizeof(RF_Config_t));
903 if (retcode) {
904 RF_Free(k_cfg, sizeof(RF_Config_t));
905 db1_printf(("rf_ioctl: retcode=%d copyin.1\n",
906 retcode));
907 return (retcode);
908 }
909 /* allocate a buffer for the layout-specific data, and copy it
910 * in */
911 if (k_cfg->layoutSpecificSize) {
912 if (k_cfg->layoutSpecificSize > 10000) {
913 /* sanity check */
914 RF_Free(k_cfg, sizeof(RF_Config_t));
915 return (EINVAL);
916 }
917 RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
918 (u_char *));
919 if (specific_buf == NULL) {
920 RF_Free(k_cfg, sizeof(RF_Config_t));
921 return (ENOMEM);
922 }
923 retcode = copyin(k_cfg->layoutSpecific,
924 (caddr_t) specific_buf,
925 k_cfg->layoutSpecificSize);
926 if (retcode) {
927 RF_Free(k_cfg, sizeof(RF_Config_t));
928 RF_Free(specific_buf,
929 k_cfg->layoutSpecificSize);
930 db1_printf(("rf_ioctl: retcode=%d copyin.2\n",
931 retcode));
932 return (retcode);
933 }
934 } else
935 specific_buf = NULL;
936 k_cfg->layoutSpecific = specific_buf;
937
938 /* should do some kind of sanity check on the configuration.
939 * Store the sum of all the bytes in the last byte? */
940
941 /* configure the system */
942
943 /*
944 * Clear the entire RAID descriptor, just to make sure
945 * there is no stale data left in the case of a
946 * reconfiguration
947 */
948 memset((char *) raidPtr, 0, sizeof(RF_Raid_t));
949 raidPtr->raidid = unit;
950
951 retcode = rf_Configure(raidPtr, k_cfg, NULL);
952
953 if (retcode == 0) {
954
955 /* allow this many simultaneous IO's to
956 this RAID device */
957 raidPtr->openings = RAIDOUTSTANDING;
958
959 raidinit(raidPtr);
960 rf_markalldirty(raidPtr);
961 }
962 /* free the buffers. No return code here. */
963 if (k_cfg->layoutSpecificSize) {
964 RF_Free(specific_buf, k_cfg->layoutSpecificSize);
965 }
966 RF_Free(k_cfg, sizeof(RF_Config_t));
967
968 return (retcode);
969
970 /* shutdown the system */
971 case RAIDFRAME_SHUTDOWN:
972
973 if ((error = raidlock(rs)) != 0)
974 return (error);
975
976 /*
977 * If somebody has a partition mounted, we shouldn't
978 * shutdown.
979 */
980
981 part = DISKPART(dev);
982 pmask = (1 << part);
983 if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
984 ((rs->sc_dkdev.dk_bopenmask & pmask) &&
985 (rs->sc_dkdev.dk_copenmask & pmask))) {
986 raidunlock(rs);
987 return (EBUSY);
988 }
989
990 retcode = rf_Shutdown(raidPtr);
991
992 /* It's no longer initialized... */
993 rs->sc_flags &= ~RAIDF_INITED;
994
995 /* Detach the disk. */
996 disk_detach(&rs->sc_dkdev);
997
998 raidunlock(rs);
999
1000 return (retcode);
1001 case RAIDFRAME_GET_COMPONENT_LABEL:
1002 clabel_ptr = (RF_ComponentLabel_t **) data;
1003 /* need to read the component label for the disk indicated
1004 by row,column in clabel */
1005
1006 /* For practice, let's get it directly fromdisk, rather
1007 than from the in-core copy */
1008 RF_Malloc( clabel, sizeof( RF_ComponentLabel_t ),
1009 (RF_ComponentLabel_t *));
1010 if (clabel == NULL)
1011 return (ENOMEM);
1012
1013 memset((char *) clabel, 0, sizeof(RF_ComponentLabel_t));
1014
1015 retcode = copyin( *clabel_ptr, clabel,
1016 sizeof(RF_ComponentLabel_t));
1017
1018 if (retcode) {
1019 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
1020 return(retcode);
1021 }
1022
1023 row = clabel->row;
1024 column = clabel->column;
1025
1026 if ((row < 0) || (row >= raidPtr->numRow) ||
1027 (column < 0) || (column >= raidPtr->numCol +
1028 raidPtr->numSpare)) {
1029 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
1030 return(EINVAL);
1031 }
1032
1033 raidread_component_label(raidPtr->Disks[row][column].dev,
1034 raidPtr->raid_cinfo[row][column].ci_vp,
1035 clabel );
1036
1037 retcode = copyout((caddr_t) clabel,
1038 (caddr_t) *clabel_ptr,
1039 sizeof(RF_ComponentLabel_t));
1040 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
1041 return (retcode);
1042
1043 case RAIDFRAME_SET_COMPONENT_LABEL:
1044 clabel = (RF_ComponentLabel_t *) data;
1045
1046 /* XXX check the label for valid stuff... */
1047 /* Note that some things *should not* get modified --
1048 the user should be re-initing the labels instead of
1049 trying to patch things.
1050 */
1051
1052 printf("Got component label:\n");
1053 printf("Version: %d\n",clabel->version);
1054 printf("Serial Number: %d\n",clabel->serial_number);
1055 printf("Mod counter: %d\n",clabel->mod_counter);
1056 printf("Row: %d\n", clabel->row);
1057 printf("Column: %d\n", clabel->column);
1058 printf("Num Rows: %d\n", clabel->num_rows);
1059 printf("Num Columns: %d\n", clabel->num_columns);
1060 printf("Clean: %d\n", clabel->clean);
1061 printf("Status: %d\n", clabel->status);
1062
1063 row = clabel->row;
1064 column = clabel->column;
1065
1066 if ((row < 0) || (row >= raidPtr->numRow) ||
1067 (column < 0) || (column >= raidPtr->numCol)) {
1068 return(EINVAL);
1069 }
1070
1071 /* XXX this isn't allowed to do anything for now :-) */
1072
1073 /* XXX and before it is, we need to fill in the rest
1074 of the fields!?!?!?! */
1075 #if 0
1076 raidwrite_component_label(
1077 raidPtr->Disks[row][column].dev,
1078 raidPtr->raid_cinfo[row][column].ci_vp,
1079 clabel );
1080 #endif
1081 return (0);
1082
1083 case RAIDFRAME_INIT_LABELS:
1084 clabel = (RF_ComponentLabel_t *) data;
1085 /*
1086 we only want the serial number from
1087 the above. We get all the rest of the information
1088 from the config that was used to create this RAID
1089 set.
1090 */
1091
1092 raidPtr->serial_number = clabel->serial_number;
1093
1094 raid_init_component_label(raidPtr, &ci_label);
1095 ci_label.serial_number = clabel->serial_number;
1096
1097 for(row=0;row<raidPtr->numRow;row++) {
1098 ci_label.row = row;
1099 for(column=0;column<raidPtr->numCol;column++) {
1100 diskPtr = &raidPtr->Disks[row][column];
1101 if (!RF_DEAD_DISK(diskPtr->status)) {
1102 ci_label.partitionSize = diskPtr->partitionSize;
1103 ci_label.column = column;
1104 raidwrite_component_label(
1105 raidPtr->Disks[row][column].dev,
1106 raidPtr->raid_cinfo[row][column].ci_vp,
1107 &ci_label );
1108 }
1109 }
1110 }
1111
1112 return (retcode);
1113 case RAIDFRAME_SET_AUTOCONFIG:
1114 d = rf_set_autoconfig(raidPtr, *(int *) data);
1115 printf("New autoconfig value is: %d\n", d);
1116 *(int *) data = d;
1117 return (retcode);
1118
1119 case RAIDFRAME_SET_ROOT:
1120 d = rf_set_rootpartition(raidPtr, *(int *) data);
1121 printf("New rootpartition value is: %d\n", d);
1122 *(int *) data = d;
1123 return (retcode);
1124
1125 /* initialize all parity */
1126 case RAIDFRAME_REWRITEPARITY:
1127
1128 if (raidPtr->Layout.map->faultsTolerated == 0) {
1129 /* Parity for RAID 0 is trivially correct */
1130 raidPtr->parity_good = RF_RAID_CLEAN;
1131 return(0);
1132 }
1133
1134 if (raidPtr->parity_rewrite_in_progress == 1) {
1135 /* Re-write is already in progress! */
1136 return(EINVAL);
1137 }
1138
1139 retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
1140 rf_RewriteParityThread,
1141 raidPtr,"raid_parity");
1142 return (retcode);
1143
1144
1145 case RAIDFRAME_ADD_HOT_SPARE:
1146 sparePtr = (RF_SingleComponent_t *) data;
1147 memcpy( &hot_spare, sparePtr, sizeof(RF_SingleComponent_t));
1148 retcode = rf_add_hot_spare(raidPtr, &hot_spare);
1149 return(retcode);
1150
1151 case RAIDFRAME_REMOVE_HOT_SPARE:
1152 return(retcode);
1153
1154 case RAIDFRAME_DELETE_COMPONENT:
1155 componentPtr = (RF_SingleComponent_t *)data;
1156 memcpy( &component, componentPtr,
1157 sizeof(RF_SingleComponent_t));
1158 retcode = rf_delete_component(raidPtr, &component);
1159 return(retcode);
1160
1161 case RAIDFRAME_INCORPORATE_HOT_SPARE:
1162 componentPtr = (RF_SingleComponent_t *)data;
1163 memcpy( &component, componentPtr,
1164 sizeof(RF_SingleComponent_t));
1165 retcode = rf_incorporate_hot_spare(raidPtr, &component);
1166 return(retcode);
1167
1168 case RAIDFRAME_REBUILD_IN_PLACE:
1169
1170 if (raidPtr->Layout.map->faultsTolerated == 0) {
1171 /* Can't do this on a RAID 0!! */
1172 return(EINVAL);
1173 }
1174
1175 if (raidPtr->recon_in_progress == 1) {
1176 /* a reconstruct is already in progress! */
1177 return(EINVAL);
1178 }
1179
1180 componentPtr = (RF_SingleComponent_t *) data;
1181 memcpy( &component, componentPtr,
1182 sizeof(RF_SingleComponent_t));
1183 row = component.row;
1184 column = component.column;
1185 printf("Rebuild: %d %d\n",row, column);
1186 if ((row < 0) || (row >= raidPtr->numRow) ||
1187 (column < 0) || (column >= raidPtr->numCol)) {
1188 return(EINVAL);
1189 }
1190
1191 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1192 if (rrcopy == NULL)
1193 return(ENOMEM);
1194
1195 rrcopy->raidPtr = (void *) raidPtr;
1196 rrcopy->row = row;
1197 rrcopy->col = column;
1198
1199 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1200 rf_ReconstructInPlaceThread,
1201 rrcopy,"raid_reconip");
1202 return(retcode);
1203
1204 case RAIDFRAME_GET_INFO:
1205 if (!raidPtr->valid)
1206 return (ENODEV);
1207 ucfgp = (RF_DeviceConfig_t **) data;
1208 RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t),
1209 (RF_DeviceConfig_t *));
1210 if (d_cfg == NULL)
1211 return (ENOMEM);
1212 memset((char *) d_cfg, 0, sizeof(RF_DeviceConfig_t));
1213 d_cfg->rows = raidPtr->numRow;
1214 d_cfg->cols = raidPtr->numCol;
1215 d_cfg->ndevs = raidPtr->numRow * raidPtr->numCol;
1216 if (d_cfg->ndevs >= RF_MAX_DISKS) {
1217 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1218 return (ENOMEM);
1219 }
1220 d_cfg->nspares = raidPtr->numSpare;
1221 if (d_cfg->nspares >= RF_MAX_DISKS) {
1222 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1223 return (ENOMEM);
1224 }
1225 d_cfg->maxqdepth = raidPtr->maxQueueDepth;
1226 d = 0;
1227 for (i = 0; i < d_cfg->rows; i++) {
1228 for (j = 0; j < d_cfg->cols; j++) {
1229 d_cfg->devs[d] = raidPtr->Disks[i][j];
1230 d++;
1231 }
1232 }
1233 for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) {
1234 d_cfg->spares[i] = raidPtr->Disks[0][j];
1235 }
1236 retcode = copyout((caddr_t) d_cfg, (caddr_t) * ucfgp,
1237 sizeof(RF_DeviceConfig_t));
1238 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1239
1240 return (retcode);
1241
1242 case RAIDFRAME_CHECK_PARITY:
1243 *(int *) data = raidPtr->parity_good;
1244 return (0);
1245
1246 case RAIDFRAME_RESET_ACCTOTALS:
1247 memset(&raidPtr->acc_totals, 0, sizeof(raidPtr->acc_totals));
1248 return (0);
1249
1250 case RAIDFRAME_GET_ACCTOTALS:
1251 totals = (RF_AccTotals_t *) data;
1252 *totals = raidPtr->acc_totals;
1253 return (0);
1254
1255 case RAIDFRAME_KEEP_ACCTOTALS:
1256 raidPtr->keep_acc_totals = *(int *)data;
1257 return (0);
1258
1259 case RAIDFRAME_GET_SIZE:
1260 *(int *) data = raidPtr->totalSectors;
1261 return (0);
1262
1263 /* fail a disk & optionally start reconstruction */
1264 case RAIDFRAME_FAIL_DISK:
1265
1266 if (raidPtr->Layout.map->faultsTolerated == 0) {
1267 /* Can't do this on a RAID 0!! */
1268 return(EINVAL);
1269 }
1270
1271 rr = (struct rf_recon_req *) data;
1272
1273 if (rr->row < 0 || rr->row >= raidPtr->numRow
1274 || rr->col < 0 || rr->col >= raidPtr->numCol)
1275 return (EINVAL);
1276
1277 printf("raid%d: Failing the disk: row: %d col: %d\n",
1278 unit, rr->row, rr->col);
1279
1280 /* make a copy of the recon request so that we don't rely on
1281 * the user's buffer */
1282 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1283 if (rrcopy == NULL)
1284 return(ENOMEM);
1285 bcopy(rr, rrcopy, sizeof(*rr));
1286 rrcopy->raidPtr = (void *) raidPtr;
1287
1288 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1289 rf_ReconThread,
1290 rrcopy,"raid_recon");
1291 return (0);
1292
1293 /* invoke a copyback operation after recon on whatever disk
1294 * needs it, if any */
1295 case RAIDFRAME_COPYBACK:
1296
1297 if (raidPtr->Layout.map->faultsTolerated == 0) {
1298 /* This makes no sense on a RAID 0!! */
1299 return(EINVAL);
1300 }
1301
1302 if (raidPtr->copyback_in_progress == 1) {
1303 /* Copyback is already in progress! */
1304 return(EINVAL);
1305 }
1306
1307 retcode = RF_CREATE_THREAD(raidPtr->copyback_thread,
1308 rf_CopybackThread,
1309 raidPtr,"raid_copyback");
1310 return (retcode);
1311
1312 /* return the percentage completion of reconstruction */
1313 case RAIDFRAME_CHECK_RECON_STATUS:
1314 if (raidPtr->Layout.map->faultsTolerated == 0) {
1315 /* This makes no sense on a RAID 0, so tell the
1316 user it's done. */
1317 *(int *) data = 100;
1318 return(0);
1319 }
1320 row = 0; /* XXX we only consider a single row... */
1321 if (raidPtr->status[row] != rf_rs_reconstructing)
1322 *(int *) data = 100;
1323 else
1324 *(int *) data = raidPtr->reconControl[row]->percentComplete;
1325 return (0);
1326 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
1327 progressInfoPtr = (RF_ProgressInfo_t **) data;
1328 row = 0; /* XXX we only consider a single row... */
1329 if (raidPtr->status[row] != rf_rs_reconstructing) {
1330 progressInfo.remaining = 0;
1331 progressInfo.completed = 100;
1332 progressInfo.total = 100;
1333 } else {
1334 progressInfo.total =
1335 raidPtr->reconControl[row]->numRUsTotal;
1336 progressInfo.completed =
1337 raidPtr->reconControl[row]->numRUsComplete;
1338 progressInfo.remaining = progressInfo.total -
1339 progressInfo.completed;
1340 }
1341 retcode = copyout((caddr_t) &progressInfo,
1342 (caddr_t) *progressInfoPtr,
1343 sizeof(RF_ProgressInfo_t));
1344 return (retcode);
1345
1346 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1347 if (raidPtr->Layout.map->faultsTolerated == 0) {
1348 /* This makes no sense on a RAID 0, so tell the
1349 user it's done. */
1350 *(int *) data = 100;
1351 return(0);
1352 }
1353 if (raidPtr->parity_rewrite_in_progress == 1) {
1354 *(int *) data = 100 *
1355 raidPtr->parity_rewrite_stripes_done /
1356 raidPtr->Layout.numStripe;
1357 } else {
1358 *(int *) data = 100;
1359 }
1360 return (0);
1361
1362 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
1363 progressInfoPtr = (RF_ProgressInfo_t **) data;
1364 if (raidPtr->parity_rewrite_in_progress == 1) {
1365 progressInfo.total = raidPtr->Layout.numStripe;
1366 progressInfo.completed =
1367 raidPtr->parity_rewrite_stripes_done;
1368 progressInfo.remaining = progressInfo.total -
1369 progressInfo.completed;
1370 } else {
1371 progressInfo.remaining = 0;
1372 progressInfo.completed = 100;
1373 progressInfo.total = 100;
1374 }
1375 retcode = copyout((caddr_t) &progressInfo,
1376 (caddr_t) *progressInfoPtr,
1377 sizeof(RF_ProgressInfo_t));
1378 return (retcode);
1379
1380 case RAIDFRAME_CHECK_COPYBACK_STATUS:
1381 if (raidPtr->Layout.map->faultsTolerated == 0) {
1382 /* This makes no sense on a RAID 0 */
1383 *(int *) data = 100;
1384 return(0);
1385 }
1386 if (raidPtr->copyback_in_progress == 1) {
1387 *(int *) data = 100 * raidPtr->copyback_stripes_done /
1388 raidPtr->Layout.numStripe;
1389 } else {
1390 *(int *) data = 100;
1391 }
1392 return (0);
1393
1394 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
1395 progressInfoPtr = (RF_ProgressInfo_t **) data;
1396 if (raidPtr->copyback_in_progress == 1) {
1397 progressInfo.total = raidPtr->Layout.numStripe;
1398 progressInfo.completed =
1399 raidPtr->copyback_stripes_done;
1400 progressInfo.remaining = progressInfo.total -
1401 progressInfo.completed;
1402 } else {
1403 progressInfo.remaining = 0;
1404 progressInfo.completed = 100;
1405 progressInfo.total = 100;
1406 }
1407 retcode = copyout((caddr_t) &progressInfo,
1408 (caddr_t) *progressInfoPtr,
1409 sizeof(RF_ProgressInfo_t));
1410 return (retcode);
1411
1412 /* the sparetable daemon calls this to wait for the kernel to
1413 * need a spare table. this ioctl does not return until a
1414 * spare table is needed. XXX -- calling mpsleep here in the
1415 * ioctl code is almost certainly wrong and evil. -- XXX XXX
1416 * -- I should either compute the spare table in the kernel,
1417 * or have a different -- XXX XXX -- interface (a different
1418 * character device) for delivering the table -- XXX */
1419 #if 0
1420 case RAIDFRAME_SPARET_WAIT:
1421 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1422 while (!rf_sparet_wait_queue)
1423 mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE);
1424 waitreq = rf_sparet_wait_queue;
1425 rf_sparet_wait_queue = rf_sparet_wait_queue->next;
1426 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1427
1428 /* structure assignment */
1429 *((RF_SparetWait_t *) data) = *waitreq;
1430
1431 RF_Free(waitreq, sizeof(*waitreq));
1432 return (0);
1433
1434 /* wakes up a process waiting on SPARET_WAIT and puts an error
1435 * code in it that will cause the dameon to exit */
1436 case RAIDFRAME_ABORT_SPARET_WAIT:
1437 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1438 waitreq->fcol = -1;
1439 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1440 waitreq->next = rf_sparet_wait_queue;
1441 rf_sparet_wait_queue = waitreq;
1442 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1443 wakeup(&rf_sparet_wait_queue);
1444 return (0);
1445
1446 /* used by the spare table daemon to deliver a spare table
1447 * into the kernel */
1448 case RAIDFRAME_SEND_SPARET:
1449
1450 /* install the spare table */
1451 retcode = rf_SetSpareTable(raidPtr, *(void **) data);
1452
1453 /* respond to the requestor. the return status of the spare
1454 * table installation is passed in the "fcol" field */
1455 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1456 waitreq->fcol = retcode;
1457 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1458 waitreq->next = rf_sparet_resp_queue;
1459 rf_sparet_resp_queue = waitreq;
1460 wakeup(&rf_sparet_resp_queue);
1461 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1462
1463 return (retcode);
1464 #endif
1465
1466 default:
1467 break; /* fall through to the os-specific code below */
1468
1469 }
1470
1471 if (!raidPtr->valid)
1472 return (EINVAL);
1473
1474 /*
1475 * Add support for "regular" device ioctls here.
1476 */
1477
1478 switch (cmd) {
1479 case DIOCGDINFO:
1480 *(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
1481 break;
1482 #ifdef __HAVE_OLD_DISKLABEL
1483 case ODIOCGDINFO:
1484 newlabel = *(rs->sc_dkdev.dk_label);
1485 if (newlabel.d_npartitions > OLDMAXPARTITIONS)
1486 return ENOTTY;
1487 memcpy(data, &newlabel, sizeof (struct olddisklabel));
1488 break;
1489 #endif
1490
1491 case DIOCGPART:
1492 ((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
1493 ((struct partinfo *) data)->part =
1494 &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
1495 break;
1496
1497 case DIOCWDINFO:
1498 case DIOCSDINFO:
1499 #ifdef __HAVE_OLD_DISKLABEL
1500 case ODIOCWDINFO:
1501 case ODIOCSDINFO:
1502 #endif
1503 {
1504 struct disklabel *lp;
1505 #ifdef __HAVE_OLD_DISKLABEL
1506 if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) {
1507 memset(&newlabel, 0, sizeof newlabel);
1508 memcpy(&newlabel, data, sizeof (struct olddisklabel));
1509 lp = &newlabel;
1510 } else
1511 #endif
1512 lp = (struct disklabel *)data;
1513
1514 if ((error = raidlock(rs)) != 0)
1515 return (error);
1516
1517 rs->sc_flags |= RAIDF_LABELLING;
1518
1519 error = setdisklabel(rs->sc_dkdev.dk_label,
1520 lp, 0, rs->sc_dkdev.dk_cpulabel);
1521 if (error == 0) {
1522 if (cmd == DIOCWDINFO
1523 #ifdef __HAVE_OLD_DISKLABEL
1524 || cmd == ODIOCWDINFO
1525 #endif
1526 )
1527 error = writedisklabel(RAIDLABELDEV(dev),
1528 raidstrategy, rs->sc_dkdev.dk_label,
1529 rs->sc_dkdev.dk_cpulabel);
1530 }
1531 rs->sc_flags &= ~RAIDF_LABELLING;
1532
1533 raidunlock(rs);
1534
1535 if (error)
1536 return (error);
1537 break;
1538 }
1539
1540 case DIOCWLABEL:
1541 if (*(int *) data != 0)
1542 rs->sc_flags |= RAIDF_WLABEL;
1543 else
1544 rs->sc_flags &= ~RAIDF_WLABEL;
1545 break;
1546
1547 case DIOCGDEFLABEL:
1548 raidgetdefaultlabel(raidPtr, rs, (struct disklabel *) data);
1549 break;
1550
1551 #ifdef __HAVE_OLD_DISKLABEL
1552 case ODIOCGDEFLABEL:
1553 raidgetdefaultlabel(raidPtr, rs, &newlabel);
1554 if (newlabel.d_npartitions > OLDMAXPARTITIONS)
1555 return ENOTTY;
1556 memcpy(data, &newlabel, sizeof (struct olddisklabel));
1557 break;
1558 #endif
1559
1560 default:
1561 retcode = ENOTTY;
1562 }
1563 return (retcode);
1564
1565 }
1566
1567
1568 /* raidinit -- complete the rest of the initialization for the
1569 RAIDframe device. */
1570
1571
1572 static void
1573 raidinit(raidPtr)
1574 RF_Raid_t *raidPtr;
1575 {
1576 struct raid_softc *rs;
1577 int unit;
1578
1579 unit = raidPtr->raidid;
1580
1581 rs = &raid_softc[unit];
1582
1583 /* XXX should check return code first... */
1584 rs->sc_flags |= RAIDF_INITED;
1585
1586 sprintf(rs->sc_xname, "raid%d", unit); /* XXX doesn't check bounds. */
1587
1588 rs->sc_dkdev.dk_name = rs->sc_xname;
1589
1590 /* disk_attach actually creates space for the CPU disklabel, among
1591 * other things, so it's critical to call this *BEFORE* we try putzing
1592 * with disklabels. */
1593
1594 disk_attach(&rs->sc_dkdev);
1595
1596 /* XXX There may be a weird interaction here between this, and
1597 * protectedSectors, as used in RAIDframe. */
1598
1599 rs->sc_size = raidPtr->totalSectors;
1600
1601 }
1602
1603 /* wake up the daemon & tell it to get us a spare table
1604 * XXX
1605 * the entries in the queues should be tagged with the raidPtr
1606 * so that in the extremely rare case that two recons happen at once,
1607 * we know for which device were requesting a spare table
1608 * XXX
1609 *
1610 * XXX This code is not currently used. GO
1611 */
1612 int
1613 rf_GetSpareTableFromDaemon(req)
1614 RF_SparetWait_t *req;
1615 {
1616 int retcode;
1617
1618 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1619 req->next = rf_sparet_wait_queue;
1620 rf_sparet_wait_queue = req;
1621 wakeup(&rf_sparet_wait_queue);
1622
1623 /* mpsleep unlocks the mutex */
1624 while (!rf_sparet_resp_queue) {
1625 tsleep(&rf_sparet_resp_queue, PRIBIO,
1626 "raidframe getsparetable", 0);
1627 }
1628 req = rf_sparet_resp_queue;
1629 rf_sparet_resp_queue = req->next;
1630 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1631
1632 retcode = req->fcol;
1633 RF_Free(req, sizeof(*req)); /* this is not the same req as we
1634 * alloc'd */
1635 return (retcode);
1636 }
1637
1638 /* a wrapper around rf_DoAccess that extracts appropriate info from the
1639 * bp & passes it down.
1640 * any calls originating in the kernel must use non-blocking I/O
1641 * do some extra sanity checking to return "appropriate" error values for
1642 * certain conditions (to make some standard utilities work)
1643 *
1644 * Formerly known as: rf_DoAccessKernel
1645 */
1646 void
1647 raidstart(raidPtr)
1648 RF_Raid_t *raidPtr;
1649 {
1650 RF_SectorCount_t num_blocks, pb, sum;
1651 RF_RaidAddr_t raid_addr;
1652 int retcode;
1653 struct partition *pp;
1654 daddr_t blocknum;
1655 int unit;
1656 struct raid_softc *rs;
1657 int do_async;
1658 struct buf *bp;
1659
1660 unit = raidPtr->raidid;
1661 rs = &raid_softc[unit];
1662
1663 /* quick check to see if anything has died recently */
1664 RF_LOCK_MUTEX(raidPtr->mutex);
1665 if (raidPtr->numNewFailures > 0) {
1666 rf_update_component_labels(raidPtr,
1667 RF_NORMAL_COMPONENT_UPDATE);
1668 raidPtr->numNewFailures--;
1669 }
1670 RF_UNLOCK_MUTEX(raidPtr->mutex);
1671
1672 /* Check to see if we're at the limit... */
1673 RF_LOCK_MUTEX(raidPtr->mutex);
1674 while (raidPtr->openings > 0) {
1675 RF_UNLOCK_MUTEX(raidPtr->mutex);
1676
1677 /* get the next item, if any, from the queue */
1678 if ((bp = BUFQ_FIRST(&rs->buf_queue)) == NULL) {
1679 /* nothing more to do */
1680 return;
1681 }
1682 BUFQ_REMOVE(&rs->buf_queue, bp);
1683
1684 /* Ok, for the bp we have here, bp->b_blkno is relative to the
1685 * partition.. Need to make it absolute to the underlying
1686 * device.. */
1687
1688 blocknum = bp->b_blkno;
1689 if (DISKPART(bp->b_dev) != RAW_PART) {
1690 pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
1691 blocknum += pp->p_offset;
1692 }
1693
1694 db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
1695 (int) blocknum));
1696
1697 db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
1698 db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
1699
1700 /* *THIS* is where we adjust what block we're going to...
1701 * but DO NOT TOUCH bp->b_blkno!!! */
1702 raid_addr = blocknum;
1703
1704 num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
1705 pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
1706 sum = raid_addr + num_blocks + pb;
1707 if (1 || rf_debugKernelAccess) {
1708 db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
1709 (int) raid_addr, (int) sum, (int) num_blocks,
1710 (int) pb, (int) bp->b_resid));
1711 }
1712 if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
1713 || (sum < num_blocks) || (sum < pb)) {
1714 bp->b_error = ENOSPC;
1715 bp->b_flags |= B_ERROR;
1716 bp->b_resid = bp->b_bcount;
1717 biodone(bp);
1718 RF_LOCK_MUTEX(raidPtr->mutex);
1719 continue;
1720 }
1721 /*
1722 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
1723 */
1724
1725 if (bp->b_bcount & raidPtr->sectorMask) {
1726 bp->b_error = EINVAL;
1727 bp->b_flags |= B_ERROR;
1728 bp->b_resid = bp->b_bcount;
1729 biodone(bp);
1730 RF_LOCK_MUTEX(raidPtr->mutex);
1731 continue;
1732
1733 }
1734 db1_printf(("Calling DoAccess..\n"));
1735
1736
1737 RF_LOCK_MUTEX(raidPtr->mutex);
1738 raidPtr->openings--;
1739 RF_UNLOCK_MUTEX(raidPtr->mutex);
1740
1741 /*
1742 * Everything is async.
1743 */
1744 do_async = 1;
1745
1746 disk_busy(&rs->sc_dkdev);
1747
1748 /* XXX we're still at splbio() here... do we *really*
1749 need to be? */
1750
1751 /* don't ever condition on bp->b_flags & B_WRITE.
1752 * always condition on B_READ instead */
1753
1754 retcode = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
1755 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
1756 do_async, raid_addr, num_blocks,
1757 bp->b_data, bp, RF_DAG_NONBLOCKING_IO);
1758
1759 RF_LOCK_MUTEX(raidPtr->mutex);
1760 }
1761 RF_UNLOCK_MUTEX(raidPtr->mutex);
1762 }
1763
1764
1765
1766
1767 /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */
1768
1769 int
1770 rf_DispatchKernelIO(queue, req)
1771 RF_DiskQueue_t *queue;
1772 RF_DiskQueueData_t *req;
1773 {
1774 int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
1775 struct buf *bp;
1776 struct raidbuf *raidbp = NULL;
1777 struct raid_softc *rs;
1778 int unit;
1779 int s;
1780
1781 s=0;
1782 /* s = splbio();*/ /* want to test this */
1783 /* XXX along with the vnode, we also need the softc associated with
1784 * this device.. */
1785
1786 req->queue = queue;
1787
1788 unit = queue->raidPtr->raidid;
1789
1790 db1_printf(("DispatchKernelIO unit: %d\n", unit));
1791
1792 if (unit >= numraid) {
1793 printf("Invalid unit number: %d %d\n", unit, numraid);
1794 panic("Invalid Unit number in rf_DispatchKernelIO\n");
1795 }
1796 rs = &raid_softc[unit];
1797
1798 bp = req->bp;
1799 #if 1
1800 /* XXX when there is a physical disk failure, someone is passing us a
1801 * buffer that contains old stuff!! Attempt to deal with this problem
1802 * without taking a performance hit... (not sure where the real bug
1803 * is. It's buried in RAIDframe somewhere) :-( GO ) */
1804
1805 if (bp->b_flags & B_ERROR) {
1806 bp->b_flags &= ~B_ERROR;
1807 }
1808 if (bp->b_error != 0) {
1809 bp->b_error = 0;
1810 }
1811 #endif
1812 raidbp = RAIDGETBUF(rs);
1813
1814 raidbp->rf_flags = 0; /* XXX not really used anywhere... */
1815
1816 /*
1817 * context for raidiodone
1818 */
1819 raidbp->rf_obp = bp;
1820 raidbp->req = req;
1821
1822 LIST_INIT(&raidbp->rf_buf.b_dep);
1823
1824 switch (req->type) {
1825 case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */
1826 /* XXX need to do something extra here.. */
1827 /* I'm leaving this in, as I've never actually seen it used,
1828 * and I'd like folks to report it... GO */
1829 printf(("WAKEUP CALLED\n"));
1830 queue->numOutstanding++;
1831
1832 /* XXX need to glue the original buffer into this?? */
1833
1834 KernelWakeupFunc(&raidbp->rf_buf);
1835 break;
1836
1837 case RF_IO_TYPE_READ:
1838 case RF_IO_TYPE_WRITE:
1839
1840 if (req->tracerec) {
1841 RF_ETIMER_START(req->tracerec->timer);
1842 }
1843 InitBP(&raidbp->rf_buf, queue->rf_cinfo->ci_vp,
1844 op | bp->b_flags, queue->rf_cinfo->ci_dev,
1845 req->sectorOffset, req->numSector,
1846 req->buf, KernelWakeupFunc, (void *) req,
1847 queue->raidPtr->logBytesPerSector, req->b_proc);
1848
1849 if (rf_debugKernelAccess) {
1850 db1_printf(("dispatch: bp->b_blkno = %ld\n",
1851 (long) bp->b_blkno));
1852 }
1853 queue->numOutstanding++;
1854 queue->last_deq_sector = req->sectorOffset;
1855 /* acc wouldn't have been let in if there were any pending
1856 * reqs at any other priority */
1857 queue->curPriority = req->priority;
1858
1859 db1_printf(("Going for %c to unit %d row %d col %d\n",
1860 req->type, unit, queue->row, queue->col));
1861 db1_printf(("sector %d count %d (%d bytes) %d\n",
1862 (int) req->sectorOffset, (int) req->numSector,
1863 (int) (req->numSector <<
1864 queue->raidPtr->logBytesPerSector),
1865 (int) queue->raidPtr->logBytesPerSector));
1866 if ((raidbp->rf_buf.b_flags & B_READ) == 0) {
1867 raidbp->rf_buf.b_vp->v_numoutput++;
1868 }
1869 VOP_STRATEGY(&raidbp->rf_buf);
1870
1871 break;
1872
1873 default:
1874 panic("bad req->type in rf_DispatchKernelIO");
1875 }
1876 db1_printf(("Exiting from DispatchKernelIO\n"));
1877 /* splx(s); */ /* want to test this */
1878 return (0);
1879 }
1880 /* this is the callback function associated with a I/O invoked from
1881 kernel code.
1882 */
1883 static void
1884 KernelWakeupFunc(vbp)
1885 struct buf *vbp;
1886 {
1887 RF_DiskQueueData_t *req = NULL;
1888 RF_DiskQueue_t *queue;
1889 struct raidbuf *raidbp = (struct raidbuf *) vbp;
1890 struct buf *bp;
1891 struct raid_softc *rs;
1892 int unit;
1893 int s;
1894
1895 s = splbio();
1896 db1_printf(("recovering the request queue:\n"));
1897 req = raidbp->req;
1898
1899 bp = raidbp->rf_obp;
1900
1901 queue = (RF_DiskQueue_t *) req->queue;
1902
1903 if (raidbp->rf_buf.b_flags & B_ERROR) {
1904 bp->b_flags |= B_ERROR;
1905 bp->b_error = raidbp->rf_buf.b_error ?
1906 raidbp->rf_buf.b_error : EIO;
1907 }
1908
1909 /* XXX methinks this could be wrong... */
1910 #if 1
1911 bp->b_resid = raidbp->rf_buf.b_resid;
1912 #endif
1913
1914 if (req->tracerec) {
1915 RF_ETIMER_STOP(req->tracerec->timer);
1916 RF_ETIMER_EVAL(req->tracerec->timer);
1917 RF_LOCK_MUTEX(rf_tracing_mutex);
1918 req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1919 req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1920 req->tracerec->num_phys_ios++;
1921 RF_UNLOCK_MUTEX(rf_tracing_mutex);
1922 }
1923 bp->b_bcount = raidbp->rf_buf.b_bcount; /* XXXX ?? */
1924
1925 unit = queue->raidPtr->raidid; /* *Much* simpler :-> */
1926
1927
1928 /* XXX Ok, let's get aggressive... If B_ERROR is set, let's go
1929 * ballistic, and mark the component as hosed... */
1930
1931 if (bp->b_flags & B_ERROR) {
1932 /* Mark the disk as dead */
1933 /* but only mark it once... */
1934 if (queue->raidPtr->Disks[queue->row][queue->col].status ==
1935 rf_ds_optimal) {
1936 printf("raid%d: IO Error. Marking %s as failed.\n",
1937 unit, queue->raidPtr->Disks[queue->row][queue->col].devname);
1938 queue->raidPtr->Disks[queue->row][queue->col].status =
1939 rf_ds_failed;
1940 queue->raidPtr->status[queue->row] = rf_rs_degraded;
1941 queue->raidPtr->numFailures++;
1942 queue->raidPtr->numNewFailures++;
1943 } else { /* Disk is already dead... */
1944 /* printf("Disk already marked as dead!\n"); */
1945 }
1946
1947 }
1948
1949 rs = &raid_softc[unit];
1950 RAIDPUTBUF(rs, raidbp);
1951
1952 rf_DiskIOComplete(queue, req, (bp->b_flags & B_ERROR) ? 1 : 0);
1953 (req->CompleteFunc) (req->argument, (bp->b_flags & B_ERROR) ? 1 : 0);
1954
1955 splx(s);
1956 }
1957
1958
1959
1960 /*
1961 * initialize a buf structure for doing an I/O in the kernel.
1962 */
1963 static void
1964 InitBP(bp, b_vp, rw_flag, dev, startSect, numSect, buf, cbFunc, cbArg,
1965 logBytesPerSector, b_proc)
1966 struct buf *bp;
1967 struct vnode *b_vp;
1968 unsigned rw_flag;
1969 dev_t dev;
1970 RF_SectorNum_t startSect;
1971 RF_SectorCount_t numSect;
1972 caddr_t buf;
1973 void (*cbFunc) (struct buf *);
1974 void *cbArg;
1975 int logBytesPerSector;
1976 struct proc *b_proc;
1977 {
1978 /* bp->b_flags = B_PHYS | rw_flag; */
1979 bp->b_flags = B_CALL | rw_flag; /* XXX need B_PHYS here too??? */
1980 bp->b_bcount = numSect << logBytesPerSector;
1981 bp->b_bufsize = bp->b_bcount;
1982 bp->b_error = 0;
1983 bp->b_dev = dev;
1984 bp->b_data = buf;
1985 bp->b_blkno = startSect;
1986 bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */
1987 if (bp->b_bcount == 0) {
1988 panic("bp->b_bcount is zero in InitBP!!\n");
1989 }
1990 bp->b_proc = b_proc;
1991 bp->b_iodone = cbFunc;
1992 bp->b_vp = b_vp;
1993
1994 }
1995
1996 static void
1997 raidgetdefaultlabel(raidPtr, rs, lp)
1998 RF_Raid_t *raidPtr;
1999 struct raid_softc *rs;
2000 struct disklabel *lp;
2001 {
2002 db1_printf(("Building a default label...\n"));
2003 memset(lp, 0, sizeof(*lp));
2004
2005 /* fabricate a label... */
2006 lp->d_secperunit = raidPtr->totalSectors;
2007 lp->d_secsize = raidPtr->bytesPerSector;
2008 lp->d_nsectors = raidPtr->Layout.dataSectorsPerStripe;
2009 lp->d_ntracks = 4 * raidPtr->numCol;
2010 lp->d_ncylinders = raidPtr->totalSectors /
2011 (lp->d_nsectors * lp->d_ntracks);
2012 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
2013
2014 strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
2015 lp->d_type = DTYPE_RAID;
2016 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
2017 lp->d_rpm = 3600;
2018 lp->d_interleave = 1;
2019 lp->d_flags = 0;
2020
2021 lp->d_partitions[RAW_PART].p_offset = 0;
2022 lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors;
2023 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
2024 lp->d_npartitions = RAW_PART + 1;
2025
2026 lp->d_magic = DISKMAGIC;
2027 lp->d_magic2 = DISKMAGIC;
2028 lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
2029
2030 }
2031 /*
2032 * Read the disklabel from the raid device. If one is not present, fake one
2033 * up.
2034 */
2035 static void
2036 raidgetdisklabel(dev)
2037 dev_t dev;
2038 {
2039 int unit = raidunit(dev);
2040 struct raid_softc *rs = &raid_softc[unit];
2041 char *errstring;
2042 struct disklabel *lp = rs->sc_dkdev.dk_label;
2043 struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel;
2044 RF_Raid_t *raidPtr;
2045
2046 db1_printf(("Getting the disklabel...\n"));
2047
2048 memset(clp, 0, sizeof(*clp));
2049
2050 raidPtr = raidPtrs[unit];
2051
2052 raidgetdefaultlabel(raidPtr, rs, lp);
2053
2054 /*
2055 * Call the generic disklabel extraction routine.
2056 */
2057 errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
2058 rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
2059 if (errstring)
2060 raidmakedisklabel(rs);
2061 else {
2062 int i;
2063 struct partition *pp;
2064
2065 /*
2066 * Sanity check whether the found disklabel is valid.
2067 *
2068 * This is necessary since total size of the raid device
2069 * may vary when an interleave is changed even though exactly
2070 * same componets are used, and old disklabel may used
2071 * if that is found.
2072 */
2073 if (lp->d_secperunit != rs->sc_size)
2074 printf("WARNING: %s: "
2075 "total sector size in disklabel (%d) != "
2076 "the size of raid (%ld)\n", rs->sc_xname,
2077 lp->d_secperunit, (long) rs->sc_size);
2078 for (i = 0; i < lp->d_npartitions; i++) {
2079 pp = &lp->d_partitions[i];
2080 if (pp->p_offset + pp->p_size > rs->sc_size)
2081 printf("WARNING: %s: end of partition `%c' "
2082 "exceeds the size of raid (%ld)\n",
2083 rs->sc_xname, 'a' + i, (long) rs->sc_size);
2084 }
2085 }
2086
2087 }
2088 /*
2089 * Take care of things one might want to take care of in the event
2090 * that a disklabel isn't present.
2091 */
2092 static void
2093 raidmakedisklabel(rs)
2094 struct raid_softc *rs;
2095 {
2096 struct disklabel *lp = rs->sc_dkdev.dk_label;
2097 db1_printf(("Making a label..\n"));
2098
2099 /*
2100 * For historical reasons, if there's no disklabel present
2101 * the raw partition must be marked FS_BSDFFS.
2102 */
2103
2104 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
2105
2106 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
2107
2108 lp->d_checksum = dkcksum(lp);
2109 }
2110 /*
2111 * Lookup the provided name in the filesystem. If the file exists,
2112 * is a valid block device, and isn't being used by anyone else,
2113 * set *vpp to the file's vnode.
2114 * You'll find the original of this in ccd.c
2115 */
2116 int
2117 raidlookup(path, p, vpp)
2118 char *path;
2119 struct proc *p;
2120 struct vnode **vpp; /* result */
2121 {
2122 struct nameidata nd;
2123 struct vnode *vp;
2124 struct vattr va;
2125 int error;
2126
2127 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, path, p);
2128 if ((error = vn_open(&nd, FREAD | FWRITE, 0)) != 0) {
2129 #ifdef DEBUG
2130 printf("RAIDframe: vn_open returned %d\n", error);
2131 #endif
2132 return (error);
2133 }
2134 vp = nd.ni_vp;
2135 if (vp->v_usecount > 1) {
2136 VOP_UNLOCK(vp, 0);
2137 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2138 return (EBUSY);
2139 }
2140 if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)) != 0) {
2141 VOP_UNLOCK(vp, 0);
2142 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2143 return (error);
2144 }
2145 /* XXX: eventually we should handle VREG, too. */
2146 if (va.va_type != VBLK) {
2147 VOP_UNLOCK(vp, 0);
2148 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2149 return (ENOTBLK);
2150 }
2151 VOP_UNLOCK(vp, 0);
2152 *vpp = vp;
2153 return (0);
2154 }
2155 /*
2156 * Wait interruptibly for an exclusive lock.
2157 *
2158 * XXX
2159 * Several drivers do this; it should be abstracted and made MP-safe.
2160 * (Hmm... where have we seen this warning before :-> GO )
2161 */
2162 static int
2163 raidlock(rs)
2164 struct raid_softc *rs;
2165 {
2166 int error;
2167
2168 while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
2169 rs->sc_flags |= RAIDF_WANTED;
2170 if ((error =
2171 tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
2172 return (error);
2173 }
2174 rs->sc_flags |= RAIDF_LOCKED;
2175 return (0);
2176 }
2177 /*
2178 * Unlock and wake up any waiters.
2179 */
2180 static void
2181 raidunlock(rs)
2182 struct raid_softc *rs;
2183 {
2184
2185 rs->sc_flags &= ~RAIDF_LOCKED;
2186 if ((rs->sc_flags & RAIDF_WANTED) != 0) {
2187 rs->sc_flags &= ~RAIDF_WANTED;
2188 wakeup(rs);
2189 }
2190 }
2191
2192
2193 #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */
2194 #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */
2195
2196 int
2197 raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter)
2198 {
2199 RF_ComponentLabel_t clabel;
2200 raidread_component_label(dev, b_vp, &clabel);
2201 clabel.mod_counter = mod_counter;
2202 clabel.clean = RF_RAID_CLEAN;
2203 raidwrite_component_label(dev, b_vp, &clabel);
2204 return(0);
2205 }
2206
2207
2208 int
2209 raidmarkdirty(dev_t dev, struct vnode *b_vp, int mod_counter)
2210 {
2211 RF_ComponentLabel_t clabel;
2212 raidread_component_label(dev, b_vp, &clabel);
2213 clabel.mod_counter = mod_counter;
2214 clabel.clean = RF_RAID_DIRTY;
2215 raidwrite_component_label(dev, b_vp, &clabel);
2216 return(0);
2217 }
2218
2219 /* ARGSUSED */
2220 int
2221 raidread_component_label(dev, b_vp, clabel)
2222 dev_t dev;
2223 struct vnode *b_vp;
2224 RF_ComponentLabel_t *clabel;
2225 {
2226 struct buf *bp;
2227 const struct bdevsw *bdev;
2228 int error;
2229
2230 /* XXX should probably ensure that we don't try to do this if
2231 someone has changed rf_protected_sectors. */
2232
2233 if (b_vp == NULL) {
2234 /* For whatever reason, this component is not valid.
2235 Don't try to read a component label from it. */
2236 return(EINVAL);
2237 }
2238
2239 /* get a block of the appropriate size... */
2240 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2241 bp->b_dev = dev;
2242
2243 /* get our ducks in a row for the read */
2244 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2245 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2246 bp->b_flags |= B_READ;
2247 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2248
2249 bdev = bdevsw_lookup(bp->b_dev);
2250 if (bdev == NULL)
2251 return (ENXIO);
2252 (*bdev->d_strategy)(bp);
2253
2254 error = biowait(bp);
2255
2256 if (!error) {
2257 memcpy(clabel, bp->b_data,
2258 sizeof(RF_ComponentLabel_t));
2259 #if 0
2260 rf_print_component_label( clabel );
2261 #endif
2262 } else {
2263 #if 0
2264 printf("Failed to read RAID component label!\n");
2265 #endif
2266 }
2267
2268 brelse(bp);
2269 return(error);
2270 }
2271 /* ARGSUSED */
2272 int
2273 raidwrite_component_label(dev, b_vp, clabel)
2274 dev_t dev;
2275 struct vnode *b_vp;
2276 RF_ComponentLabel_t *clabel;
2277 {
2278 struct buf *bp;
2279 const struct bdevsw *bdev;
2280 int error;
2281
2282 /* get a block of the appropriate size... */
2283 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2284 bp->b_dev = dev;
2285
2286 /* get our ducks in a row for the write */
2287 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2288 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2289 bp->b_flags |= B_WRITE;
2290 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2291
2292 memset(bp->b_data, 0, RF_COMPONENT_INFO_SIZE );
2293
2294 memcpy(bp->b_data, clabel, sizeof(RF_ComponentLabel_t));
2295
2296 bdev = bdevsw_lookup(bp->b_dev);
2297 if (bdev == NULL)
2298 return (ENXIO);
2299 (*bdev->d_strategy)(bp);
2300 error = biowait(bp);
2301 brelse(bp);
2302 if (error) {
2303 #if 1
2304 printf("Failed to write RAID component info!\n");
2305 #endif
2306 }
2307
2308 return(error);
2309 }
2310
2311 void
2312 rf_markalldirty(raidPtr)
2313 RF_Raid_t *raidPtr;
2314 {
2315 RF_ComponentLabel_t clabel;
2316 int r,c;
2317
2318 raidPtr->mod_counter++;
2319 for (r = 0; r < raidPtr->numRow; r++) {
2320 for (c = 0; c < raidPtr->numCol; c++) {
2321 /* we don't want to touch (at all) a disk that has
2322 failed */
2323 if (!RF_DEAD_DISK(raidPtr->Disks[r][c].status)) {
2324 raidread_component_label(
2325 raidPtr->Disks[r][c].dev,
2326 raidPtr->raid_cinfo[r][c].ci_vp,
2327 &clabel);
2328 if (clabel.status == rf_ds_spared) {
2329 /* XXX do something special...
2330 but whatever you do, don't
2331 try to access it!! */
2332 } else {
2333 #if 0
2334 clabel.status =
2335 raidPtr->Disks[r][c].status;
2336 raidwrite_component_label(
2337 raidPtr->Disks[r][c].dev,
2338 raidPtr->raid_cinfo[r][c].ci_vp,
2339 &clabel);
2340 #endif
2341 raidmarkdirty(
2342 raidPtr->Disks[r][c].dev,
2343 raidPtr->raid_cinfo[r][c].ci_vp,
2344 raidPtr->mod_counter);
2345 }
2346 }
2347 }
2348 }
2349 /* printf("Component labels marked dirty.\n"); */
2350 #if 0
2351 for( c = 0; c < raidPtr->numSpare ; c++) {
2352 sparecol = raidPtr->numCol + c;
2353 if (raidPtr->Disks[r][sparecol].status == rf_ds_used_spare) {
2354 /*
2355
2356 XXX this is where we get fancy and map this spare
2357 into it's correct spot in the array.
2358
2359 */
2360 /*
2361
2362 we claim this disk is "optimal" if it's
2363 rf_ds_used_spare, as that means it should be
2364 directly substitutable for the disk it replaced.
2365 We note that too...
2366
2367 */
2368
2369 for(i=0;i<raidPtr->numRow;i++) {
2370 for(j=0;j<raidPtr->numCol;j++) {
2371 if ((raidPtr->Disks[i][j].spareRow ==
2372 r) &&
2373 (raidPtr->Disks[i][j].spareCol ==
2374 sparecol)) {
2375 srow = r;
2376 scol = sparecol;
2377 break;
2378 }
2379 }
2380 }
2381
2382 raidread_component_label(
2383 raidPtr->Disks[r][sparecol].dev,
2384 raidPtr->raid_cinfo[r][sparecol].ci_vp,
2385 &clabel);
2386 /* make sure status is noted */
2387 clabel.version = RF_COMPONENT_LABEL_VERSION;
2388 clabel.mod_counter = raidPtr->mod_counter;
2389 clabel.serial_number = raidPtr->serial_number;
2390 clabel.row = srow;
2391 clabel.column = scol;
2392 clabel.num_rows = raidPtr->numRow;
2393 clabel.num_columns = raidPtr->numCol;
2394 clabel.clean = RF_RAID_DIRTY; /* changed in a bit*/
2395 clabel.status = rf_ds_optimal;
2396 raidwrite_component_label(
2397 raidPtr->Disks[r][sparecol].dev,
2398 raidPtr->raid_cinfo[r][sparecol].ci_vp,
2399 &clabel);
2400 raidmarkclean( raidPtr->Disks[r][sparecol].dev,
2401 raidPtr->raid_cinfo[r][sparecol].ci_vp);
2402 }
2403 }
2404
2405 #endif
2406 }
2407
2408
2409 void
2410 rf_update_component_labels(raidPtr, final)
2411 RF_Raid_t *raidPtr;
2412 int final;
2413 {
2414 RF_ComponentLabel_t clabel;
2415 int sparecol;
2416 int r,c;
2417 int i,j;
2418 int srow, scol;
2419
2420 srow = -1;
2421 scol = -1;
2422
2423 /* XXX should do extra checks to make sure things really are clean,
2424 rather than blindly setting the clean bit... */
2425
2426 raidPtr->mod_counter++;
2427
2428 for (r = 0; r < raidPtr->numRow; r++) {
2429 for (c = 0; c < raidPtr->numCol; c++) {
2430 if (raidPtr->Disks[r][c].status == rf_ds_optimal) {
2431 raidread_component_label(
2432 raidPtr->Disks[r][c].dev,
2433 raidPtr->raid_cinfo[r][c].ci_vp,
2434 &clabel);
2435 /* make sure status is noted */
2436 clabel.status = rf_ds_optimal;
2437 /* bump the counter */
2438 clabel.mod_counter = raidPtr->mod_counter;
2439
2440 raidwrite_component_label(
2441 raidPtr->Disks[r][c].dev,
2442 raidPtr->raid_cinfo[r][c].ci_vp,
2443 &clabel);
2444 if (final == RF_FINAL_COMPONENT_UPDATE) {
2445 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2446 raidmarkclean(
2447 raidPtr->Disks[r][c].dev,
2448 raidPtr->raid_cinfo[r][c].ci_vp,
2449 raidPtr->mod_counter);
2450 }
2451 }
2452 }
2453 /* else we don't touch it.. */
2454 }
2455 }
2456
2457 for( c = 0; c < raidPtr->numSpare ; c++) {
2458 sparecol = raidPtr->numCol + c;
2459 /* Need to ensure that the reconstruct actually completed! */
2460 if (raidPtr->Disks[0][sparecol].status == rf_ds_used_spare) {
2461 /*
2462
2463 we claim this disk is "optimal" if it's
2464 rf_ds_used_spare, as that means it should be
2465 directly substitutable for the disk it replaced.
2466 We note that too...
2467
2468 */
2469
2470 for(i=0;i<raidPtr->numRow;i++) {
2471 for(j=0;j<raidPtr->numCol;j++) {
2472 if ((raidPtr->Disks[i][j].spareRow ==
2473 0) &&
2474 (raidPtr->Disks[i][j].spareCol ==
2475 sparecol)) {
2476 srow = i;
2477 scol = j;
2478 break;
2479 }
2480 }
2481 }
2482
2483 /* XXX shouldn't *really* need this... */
2484 raidread_component_label(
2485 raidPtr->Disks[0][sparecol].dev,
2486 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2487 &clabel);
2488 /* make sure status is noted */
2489
2490 raid_init_component_label(raidPtr, &clabel);
2491
2492 clabel.mod_counter = raidPtr->mod_counter;
2493 clabel.row = srow;
2494 clabel.column = scol;
2495 clabel.status = rf_ds_optimal;
2496
2497 raidwrite_component_label(
2498 raidPtr->Disks[0][sparecol].dev,
2499 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2500 &clabel);
2501 if (final == RF_FINAL_COMPONENT_UPDATE) {
2502 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2503 raidmarkclean( raidPtr->Disks[0][sparecol].dev,
2504 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2505 raidPtr->mod_counter);
2506 }
2507 }
2508 }
2509 }
2510 /* printf("Component labels updated\n"); */
2511 }
2512
2513 void
2514 rf_close_component(raidPtr, vp, auto_configured)
2515 RF_Raid_t *raidPtr;
2516 struct vnode *vp;
2517 int auto_configured;
2518 {
2519 struct proc *p;
2520
2521 p = raidPtr->engine_thread;
2522
2523 if (vp != NULL) {
2524 if (auto_configured == 1) {
2525 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2526 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
2527 vput(vp);
2528
2529 } else {
2530 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2531 }
2532 } else {
2533 printf("vnode was NULL\n");
2534 }
2535 }
2536
2537
2538 void
2539 rf_UnconfigureVnodes(raidPtr)
2540 RF_Raid_t *raidPtr;
2541 {
2542 int r,c;
2543 struct proc *p;
2544 struct vnode *vp;
2545 int acd;
2546
2547
2548 /* We take this opportunity to close the vnodes like we should.. */
2549
2550 p = raidPtr->engine_thread;
2551
2552 for (r = 0; r < raidPtr->numRow; r++) {
2553 for (c = 0; c < raidPtr->numCol; c++) {
2554 printf("Closing vnode for row: %d col: %d\n", r, c);
2555 vp = raidPtr->raid_cinfo[r][c].ci_vp;
2556 acd = raidPtr->Disks[r][c].auto_configured;
2557 rf_close_component(raidPtr, vp, acd);
2558 raidPtr->raid_cinfo[r][c].ci_vp = NULL;
2559 raidPtr->Disks[r][c].auto_configured = 0;
2560 }
2561 }
2562 for (r = 0; r < raidPtr->numSpare; r++) {
2563 printf("Closing vnode for spare: %d\n", r);
2564 vp = raidPtr->raid_cinfo[0][raidPtr->numCol + r].ci_vp;
2565 acd = raidPtr->Disks[0][raidPtr->numCol + r].auto_configured;
2566 rf_close_component(raidPtr, vp, acd);
2567 raidPtr->raid_cinfo[0][raidPtr->numCol + r].ci_vp = NULL;
2568 raidPtr->Disks[0][raidPtr->numCol + r].auto_configured = 0;
2569 }
2570 }
2571
2572
2573 void
2574 rf_ReconThread(req)
2575 struct rf_recon_req *req;
2576 {
2577 int s;
2578 RF_Raid_t *raidPtr;
2579
2580 s = splbio();
2581 raidPtr = (RF_Raid_t *) req->raidPtr;
2582 raidPtr->recon_in_progress = 1;
2583
2584 rf_FailDisk((RF_Raid_t *) req->raidPtr, req->row, req->col,
2585 ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
2586
2587 /* XXX get rid of this! we don't need it at all.. */
2588 RF_Free(req, sizeof(*req));
2589
2590 raidPtr->recon_in_progress = 0;
2591 splx(s);
2592
2593 /* That's all... */
2594 kthread_exit(0); /* does not return */
2595 }
2596
2597 void
2598 rf_RewriteParityThread(raidPtr)
2599 RF_Raid_t *raidPtr;
2600 {
2601 int retcode;
2602 int s;
2603
2604 raidPtr->parity_rewrite_in_progress = 1;
2605 s = splbio();
2606 retcode = rf_RewriteParity(raidPtr);
2607 splx(s);
2608 if (retcode) {
2609 printf("raid%d: Error re-writing parity!\n",raidPtr->raidid);
2610 } else {
2611 /* set the clean bit! If we shutdown correctly,
2612 the clean bit on each component label will get
2613 set */
2614 raidPtr->parity_good = RF_RAID_CLEAN;
2615 }
2616 raidPtr->parity_rewrite_in_progress = 0;
2617
2618 /* Anyone waiting for us to stop? If so, inform them... */
2619 if (raidPtr->waitShutdown) {
2620 wakeup(&raidPtr->parity_rewrite_in_progress);
2621 }
2622
2623 /* That's all... */
2624 kthread_exit(0); /* does not return */
2625 }
2626
2627
2628 void
2629 rf_CopybackThread(raidPtr)
2630 RF_Raid_t *raidPtr;
2631 {
2632 int s;
2633
2634 raidPtr->copyback_in_progress = 1;
2635 s = splbio();
2636 rf_CopybackReconstructedData(raidPtr);
2637 splx(s);
2638 raidPtr->copyback_in_progress = 0;
2639
2640 /* That's all... */
2641 kthread_exit(0); /* does not return */
2642 }
2643
2644
2645 void
2646 rf_ReconstructInPlaceThread(req)
2647 struct rf_recon_req *req;
2648 {
2649 int retcode;
2650 int s;
2651 RF_Raid_t *raidPtr;
2652
2653 s = splbio();
2654 raidPtr = req->raidPtr;
2655 raidPtr->recon_in_progress = 1;
2656 retcode = rf_ReconstructInPlace(raidPtr, req->row, req->col);
2657 RF_Free(req, sizeof(*req));
2658 raidPtr->recon_in_progress = 0;
2659 splx(s);
2660
2661 /* That's all... */
2662 kthread_exit(0); /* does not return */
2663 }
2664
2665 void
2666 rf_mountroot_hook(dev)
2667 struct device *dev;
2668 {
2669
2670 }
2671
2672
2673 RF_AutoConfig_t *
2674 rf_find_raid_components()
2675 {
2676 struct vnode *vp;
2677 struct disklabel label;
2678 struct device *dv;
2679 dev_t dev;
2680 int bmajor;
2681 int error;
2682 int i;
2683 int good_one;
2684 RF_ComponentLabel_t *clabel;
2685 RF_AutoConfig_t *ac_list;
2686 RF_AutoConfig_t *ac;
2687
2688
2689 /* initialize the AutoConfig list */
2690 ac_list = NULL;
2691
2692 /* we begin by trolling through *all* the devices on the system */
2693
2694 for (dv = alldevs.tqh_first; dv != NULL;
2695 dv = dv->dv_list.tqe_next) {
2696
2697 /* we are only interested in disks... */
2698 if (dv->dv_class != DV_DISK)
2699 continue;
2700
2701 /* we don't care about floppies... */
2702 if (!strcmp(dv->dv_cfdata->cf_driver->cd_name,"fd")) {
2703 continue;
2704 }
2705
2706 /* need to find the device_name_to_block_device_major stuff */
2707 bmajor = devsw_name2blk(dv->dv_xname, NULL, 0);
2708
2709 /* get a vnode for the raw partition of this disk */
2710
2711 dev = MAKEDISKDEV(bmajor, dv->dv_unit, RAW_PART);
2712 if (bdevvp(dev, &vp))
2713 panic("RAID can't alloc vnode");
2714
2715 error = VOP_OPEN(vp, FREAD, NOCRED, 0);
2716
2717 if (error) {
2718 /* "Who cares." Continue looking
2719 for something that exists*/
2720 vput(vp);
2721 continue;
2722 }
2723
2724 /* Ok, the disk exists. Go get the disklabel. */
2725 error = VOP_IOCTL(vp, DIOCGDINFO, (caddr_t)&label,
2726 FREAD, NOCRED, 0);
2727 if (error) {
2728 /*
2729 * XXX can't happen - open() would
2730 * have errored out (or faked up one)
2731 */
2732 printf("can't get label for dev %s%c (%d)!?!?\n",
2733 dv->dv_xname, 'a' + RAW_PART, error);
2734 }
2735
2736 /* don't need this any more. We'll allocate it again
2737 a little later if we really do... */
2738 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2739 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
2740 vput(vp);
2741
2742 for (i=0; i < label.d_npartitions; i++) {
2743 /* We only support partitions marked as RAID */
2744 if (label.d_partitions[i].p_fstype != FS_RAID)
2745 continue;
2746
2747 dev = MAKEDISKDEV(bmajor, dv->dv_unit, i);
2748 if (bdevvp(dev, &vp))
2749 panic("RAID can't alloc vnode");
2750
2751 error = VOP_OPEN(vp, FREAD, NOCRED, 0);
2752 if (error) {
2753 /* Whatever... */
2754 vput(vp);
2755 continue;
2756 }
2757
2758 good_one = 0;
2759
2760 clabel = (RF_ComponentLabel_t *)
2761 malloc(sizeof(RF_ComponentLabel_t),
2762 M_RAIDFRAME, M_NOWAIT);
2763 if (clabel == NULL) {
2764 /* XXX CLEANUP HERE */
2765 printf("RAID auto config: out of memory!\n");
2766 return(NULL); /* XXX probably should panic? */
2767 }
2768
2769 if (!raidread_component_label(dev, vp, clabel)) {
2770 /* Got the label. Does it look reasonable? */
2771 if (rf_reasonable_label(clabel) &&
2772 (clabel->partitionSize <=
2773 label.d_partitions[i].p_size)) {
2774 #if DEBUG
2775 printf("Component on: %s%c: %d\n",
2776 dv->dv_xname, 'a'+i,
2777 label.d_partitions[i].p_size);
2778 rf_print_component_label(clabel);
2779 #endif
2780 /* if it's reasonable, add it,
2781 else ignore it. */
2782 ac = (RF_AutoConfig_t *)
2783 malloc(sizeof(RF_AutoConfig_t),
2784 M_RAIDFRAME,
2785 M_NOWAIT);
2786 if (ac == NULL) {
2787 /* XXX should panic?? */
2788 return(NULL);
2789 }
2790
2791 sprintf(ac->devname, "%s%c",
2792 dv->dv_xname, 'a'+i);
2793 ac->dev = dev;
2794 ac->vp = vp;
2795 ac->clabel = clabel;
2796 ac->next = ac_list;
2797 ac_list = ac;
2798 good_one = 1;
2799 }
2800 }
2801 if (!good_one) {
2802 /* cleanup */
2803 free(clabel, M_RAIDFRAME);
2804 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2805 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
2806 vput(vp);
2807 }
2808 }
2809 }
2810 return(ac_list);
2811 }
2812
2813 static int
2814 rf_reasonable_label(clabel)
2815 RF_ComponentLabel_t *clabel;
2816 {
2817
2818 if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) ||
2819 (clabel->version==RF_COMPONENT_LABEL_VERSION)) &&
2820 ((clabel->clean == RF_RAID_CLEAN) ||
2821 (clabel->clean == RF_RAID_DIRTY)) &&
2822 clabel->row >=0 &&
2823 clabel->column >= 0 &&
2824 clabel->num_rows > 0 &&
2825 clabel->num_columns > 0 &&
2826 clabel->row < clabel->num_rows &&
2827 clabel->column < clabel->num_columns &&
2828 clabel->blockSize > 0 &&
2829 clabel->numBlocks > 0) {
2830 /* label looks reasonable enough... */
2831 return(1);
2832 }
2833 return(0);
2834 }
2835
2836
2837 void
2838 rf_print_component_label(clabel)
2839 RF_ComponentLabel_t *clabel;
2840 {
2841 printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
2842 clabel->row, clabel->column,
2843 clabel->num_rows, clabel->num_columns);
2844 printf(" Version: %d Serial Number: %d Mod Counter: %d\n",
2845 clabel->version, clabel->serial_number,
2846 clabel->mod_counter);
2847 printf(" Clean: %s Status: %d\n",
2848 clabel->clean ? "Yes" : "No", clabel->status );
2849 printf(" sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
2850 clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
2851 printf(" RAID Level: %c blocksize: %d numBlocks: %d\n",
2852 (char) clabel->parityConfig, clabel->blockSize,
2853 clabel->numBlocks);
2854 printf(" Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No" );
2855 printf(" Contains root partition: %s\n",
2856 clabel->root_partition ? "Yes" : "No" );
2857 printf(" Last configured as: raid%d\n", clabel->last_unit );
2858 #if 0
2859 printf(" Config order: %d\n", clabel->config_order);
2860 #endif
2861
2862 }
2863
2864 RF_ConfigSet_t *
2865 rf_create_auto_sets(ac_list)
2866 RF_AutoConfig_t *ac_list;
2867 {
2868 RF_AutoConfig_t *ac;
2869 RF_ConfigSet_t *config_sets;
2870 RF_ConfigSet_t *cset;
2871 RF_AutoConfig_t *ac_next;
2872
2873
2874 config_sets = NULL;
2875
2876 /* Go through the AutoConfig list, and figure out which components
2877 belong to what sets. */
2878 ac = ac_list;
2879 while(ac!=NULL) {
2880 /* we're going to putz with ac->next, so save it here
2881 for use at the end of the loop */
2882 ac_next = ac->next;
2883
2884 if (config_sets == NULL) {
2885 /* will need at least this one... */
2886 config_sets = (RF_ConfigSet_t *)
2887 malloc(sizeof(RF_ConfigSet_t),
2888 M_RAIDFRAME, M_NOWAIT);
2889 if (config_sets == NULL) {
2890 panic("rf_create_auto_sets: No memory!\n");
2891 }
2892 /* this one is easy :) */
2893 config_sets->ac = ac;
2894 config_sets->next = NULL;
2895 config_sets->rootable = 0;
2896 ac->next = NULL;
2897 } else {
2898 /* which set does this component fit into? */
2899 cset = config_sets;
2900 while(cset!=NULL) {
2901 if (rf_does_it_fit(cset, ac)) {
2902 /* looks like it matches... */
2903 ac->next = cset->ac;
2904 cset->ac = ac;
2905 break;
2906 }
2907 cset = cset->next;
2908 }
2909 if (cset==NULL) {
2910 /* didn't find a match above... new set..*/
2911 cset = (RF_ConfigSet_t *)
2912 malloc(sizeof(RF_ConfigSet_t),
2913 M_RAIDFRAME, M_NOWAIT);
2914 if (cset == NULL) {
2915 panic("rf_create_auto_sets: No memory!\n");
2916 }
2917 cset->ac = ac;
2918 ac->next = NULL;
2919 cset->next = config_sets;
2920 cset->rootable = 0;
2921 config_sets = cset;
2922 }
2923 }
2924 ac = ac_next;
2925 }
2926
2927
2928 return(config_sets);
2929 }
2930
2931 static int
2932 rf_does_it_fit(cset, ac)
2933 RF_ConfigSet_t *cset;
2934 RF_AutoConfig_t *ac;
2935 {
2936 RF_ComponentLabel_t *clabel1, *clabel2;
2937
2938 /* If this one matches the *first* one in the set, that's good
2939 enough, since the other members of the set would have been
2940 through here too... */
2941 /* note that we are not checking partitionSize here..
2942
2943 Note that we are also not checking the mod_counters here.
2944 If everything else matches execpt the mod_counter, that's
2945 good enough for this test. We will deal with the mod_counters
2946 a little later in the autoconfiguration process.
2947
2948 (clabel1->mod_counter == clabel2->mod_counter) &&
2949
2950 The reason we don't check for this is that failed disks
2951 will have lower modification counts. If those disks are
2952 not added to the set they used to belong to, then they will
2953 form their own set, which may result in 2 different sets,
2954 for example, competing to be configured at raid0, and
2955 perhaps competing to be the root filesystem set. If the
2956 wrong ones get configured, or both attempt to become /,
2957 weird behaviour and or serious lossage will occur. Thus we
2958 need to bring them into the fold here, and kick them out at
2959 a later point.
2960
2961 */
2962
2963 clabel1 = cset->ac->clabel;
2964 clabel2 = ac->clabel;
2965 if ((clabel1->version == clabel2->version) &&
2966 (clabel1->serial_number == clabel2->serial_number) &&
2967 (clabel1->num_rows == clabel2->num_rows) &&
2968 (clabel1->num_columns == clabel2->num_columns) &&
2969 (clabel1->sectPerSU == clabel2->sectPerSU) &&
2970 (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
2971 (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
2972 (clabel1->parityConfig == clabel2->parityConfig) &&
2973 (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
2974 (clabel1->blockSize == clabel2->blockSize) &&
2975 (clabel1->numBlocks == clabel2->numBlocks) &&
2976 (clabel1->autoconfigure == clabel2->autoconfigure) &&
2977 (clabel1->root_partition == clabel2->root_partition) &&
2978 (clabel1->last_unit == clabel2->last_unit) &&
2979 (clabel1->config_order == clabel2->config_order)) {
2980 /* if it get's here, it almost *has* to be a match */
2981 } else {
2982 /* it's not consistent with somebody in the set..
2983 punt */
2984 return(0);
2985 }
2986 /* all was fine.. it must fit... */
2987 return(1);
2988 }
2989
2990 int
2991 rf_have_enough_components(cset)
2992 RF_ConfigSet_t *cset;
2993 {
2994 RF_AutoConfig_t *ac;
2995 RF_AutoConfig_t *auto_config;
2996 RF_ComponentLabel_t *clabel;
2997 int r,c;
2998 int num_rows;
2999 int num_cols;
3000 int num_missing;
3001 int mod_counter;
3002 int mod_counter_found;
3003 int even_pair_failed;
3004 char parity_type;
3005
3006
3007 /* check to see that we have enough 'live' components
3008 of this set. If so, we can configure it if necessary */
3009
3010 num_rows = cset->ac->clabel->num_rows;
3011 num_cols = cset->ac->clabel->num_columns;
3012 parity_type = cset->ac->clabel->parityConfig;
3013
3014 /* XXX Check for duplicate components!?!?!? */
3015
3016 /* Determine what the mod_counter is supposed to be for this set. */
3017
3018 mod_counter_found = 0;
3019 mod_counter = 0;
3020 ac = cset->ac;
3021 while(ac!=NULL) {
3022 if (mod_counter_found==0) {
3023 mod_counter = ac->clabel->mod_counter;
3024 mod_counter_found = 1;
3025 } else {
3026 if (ac->clabel->mod_counter > mod_counter) {
3027 mod_counter = ac->clabel->mod_counter;
3028 }
3029 }
3030 ac = ac->next;
3031 }
3032
3033 num_missing = 0;
3034 auto_config = cset->ac;
3035
3036 for(r=0; r<num_rows; r++) {
3037 even_pair_failed = 0;
3038 for(c=0; c<num_cols; c++) {
3039 ac = auto_config;
3040 while(ac!=NULL) {
3041 if ((ac->clabel->row == r) &&
3042 (ac->clabel->column == c) &&
3043 (ac->clabel->mod_counter == mod_counter)) {
3044 /* it's this one... */
3045 #if DEBUG
3046 printf("Found: %s at %d,%d\n",
3047 ac->devname,r,c);
3048 #endif
3049 break;
3050 }
3051 ac=ac->next;
3052 }
3053 if (ac==NULL) {
3054 /* Didn't find one here! */
3055 /* special case for RAID 1, especially
3056 where there are more than 2
3057 components (where RAIDframe treats
3058 things a little differently :( ) */
3059 if (parity_type == '1') {
3060 if (c%2 == 0) { /* even component */
3061 even_pair_failed = 1;
3062 } else { /* odd component. If
3063 we're failed, and
3064 so is the even
3065 component, it's
3066 "Good Night, Charlie" */
3067 if (even_pair_failed == 1) {
3068 return(0);
3069 }
3070 }
3071 } else {
3072 /* normal accounting */
3073 num_missing++;
3074 }
3075 }
3076 if ((parity_type == '1') && (c%2 == 1)) {
3077 /* Just did an even component, and we didn't
3078 bail.. reset the even_pair_failed flag,
3079 and go on to the next component.... */
3080 even_pair_failed = 0;
3081 }
3082 }
3083 }
3084
3085 clabel = cset->ac->clabel;
3086
3087 if (((clabel->parityConfig == '0') && (num_missing > 0)) ||
3088 ((clabel->parityConfig == '4') && (num_missing > 1)) ||
3089 ((clabel->parityConfig == '5') && (num_missing > 1))) {
3090 /* XXX this needs to be made *much* more general */
3091 /* Too many failures */
3092 return(0);
3093 }
3094 /* otherwise, all is well, and we've got enough to take a kick
3095 at autoconfiguring this set */
3096 return(1);
3097 }
3098
3099 void
3100 rf_create_configuration(ac,config,raidPtr)
3101 RF_AutoConfig_t *ac;
3102 RF_Config_t *config;
3103 RF_Raid_t *raidPtr;
3104 {
3105 RF_ComponentLabel_t *clabel;
3106 int i;
3107
3108 clabel = ac->clabel;
3109
3110 /* 1. Fill in the common stuff */
3111 config->numRow = clabel->num_rows;
3112 config->numCol = clabel->num_columns;
3113 config->numSpare = 0; /* XXX should this be set here? */
3114 config->sectPerSU = clabel->sectPerSU;
3115 config->SUsPerPU = clabel->SUsPerPU;
3116 config->SUsPerRU = clabel->SUsPerRU;
3117 config->parityConfig = clabel->parityConfig;
3118 /* XXX... */
3119 strcpy(config->diskQueueType,"fifo");
3120 config->maxOutstandingDiskReqs = clabel->maxOutstanding;
3121 config->layoutSpecificSize = 0; /* XXX ?? */
3122
3123 while(ac!=NULL) {
3124 /* row/col values will be in range due to the checks
3125 in reasonable_label() */
3126 strcpy(config->devnames[ac->clabel->row][ac->clabel->column],
3127 ac->devname);
3128 ac = ac->next;
3129 }
3130
3131 for(i=0;i<RF_MAXDBGV;i++) {
3132 config->debugVars[i][0] = NULL;
3133 }
3134 }
3135
3136 int
3137 rf_set_autoconfig(raidPtr, new_value)
3138 RF_Raid_t *raidPtr;
3139 int new_value;
3140 {
3141 RF_ComponentLabel_t clabel;
3142 struct vnode *vp;
3143 dev_t dev;
3144 int row, column;
3145
3146 raidPtr->autoconfigure = new_value;
3147 for(row=0; row<raidPtr->numRow; row++) {
3148 for(column=0; column<raidPtr->numCol; column++) {
3149 if (raidPtr->Disks[row][column].status ==
3150 rf_ds_optimal) {
3151 dev = raidPtr->Disks[row][column].dev;
3152 vp = raidPtr->raid_cinfo[row][column].ci_vp;
3153 raidread_component_label(dev, vp, &clabel);
3154 clabel.autoconfigure = new_value;
3155 raidwrite_component_label(dev, vp, &clabel);
3156 }
3157 }
3158 }
3159 return(new_value);
3160 }
3161
3162 int
3163 rf_set_rootpartition(raidPtr, new_value)
3164 RF_Raid_t *raidPtr;
3165 int new_value;
3166 {
3167 RF_ComponentLabel_t clabel;
3168 struct vnode *vp;
3169 dev_t dev;
3170 int row, column;
3171
3172 raidPtr->root_partition = new_value;
3173 for(row=0; row<raidPtr->numRow; row++) {
3174 for(column=0; column<raidPtr->numCol; column++) {
3175 if (raidPtr->Disks[row][column].status ==
3176 rf_ds_optimal) {
3177 dev = raidPtr->Disks[row][column].dev;
3178 vp = raidPtr->raid_cinfo[row][column].ci_vp;
3179 raidread_component_label(dev, vp, &clabel);
3180 clabel.root_partition = new_value;
3181 raidwrite_component_label(dev, vp, &clabel);
3182 }
3183 }
3184 }
3185 return(new_value);
3186 }
3187
3188 void
3189 rf_release_all_vps(cset)
3190 RF_ConfigSet_t *cset;
3191 {
3192 RF_AutoConfig_t *ac;
3193
3194 ac = cset->ac;
3195 while(ac!=NULL) {
3196 /* Close the vp, and give it back */
3197 if (ac->vp) {
3198 vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY);
3199 VOP_CLOSE(ac->vp, FREAD, NOCRED, 0);
3200 vput(ac->vp);
3201 ac->vp = NULL;
3202 }
3203 ac = ac->next;
3204 }
3205 }
3206
3207
3208 void
3209 rf_cleanup_config_set(cset)
3210 RF_ConfigSet_t *cset;
3211 {
3212 RF_AutoConfig_t *ac;
3213 RF_AutoConfig_t *next_ac;
3214
3215 ac = cset->ac;
3216 while(ac!=NULL) {
3217 next_ac = ac->next;
3218 /* nuke the label */
3219 free(ac->clabel, M_RAIDFRAME);
3220 /* cleanup the config structure */
3221 free(ac, M_RAIDFRAME);
3222 /* "next.." */
3223 ac = next_ac;
3224 }
3225 /* and, finally, nuke the config set */
3226 free(cset, M_RAIDFRAME);
3227 }
3228
3229
3230 void
3231 raid_init_component_label(raidPtr, clabel)
3232 RF_Raid_t *raidPtr;
3233 RF_ComponentLabel_t *clabel;
3234 {
3235 /* current version number */
3236 clabel->version = RF_COMPONENT_LABEL_VERSION;
3237 clabel->serial_number = raidPtr->serial_number;
3238 clabel->mod_counter = raidPtr->mod_counter;
3239 clabel->num_rows = raidPtr->numRow;
3240 clabel->num_columns = raidPtr->numCol;
3241 clabel->clean = RF_RAID_DIRTY; /* not clean */
3242 clabel->status = rf_ds_optimal; /* "It's good!" */
3243
3244 clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
3245 clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
3246 clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
3247
3248 clabel->blockSize = raidPtr->bytesPerSector;
3249 clabel->numBlocks = raidPtr->sectorsPerDisk;
3250
3251 /* XXX not portable */
3252 clabel->parityConfig = raidPtr->Layout.map->parityConfig;
3253 clabel->maxOutstanding = raidPtr->maxOutstanding;
3254 clabel->autoconfigure = raidPtr->autoconfigure;
3255 clabel->root_partition = raidPtr->root_partition;
3256 clabel->last_unit = raidPtr->raidid;
3257 clabel->config_order = raidPtr->config_order;
3258 }
3259
3260 int
3261 rf_auto_config_set(cset,unit)
3262 RF_ConfigSet_t *cset;
3263 int *unit;
3264 {
3265 RF_Raid_t *raidPtr;
3266 RF_Config_t *config;
3267 int raidID;
3268 int retcode;
3269
3270 printf("RAID autoconfigure\n");
3271
3272 retcode = 0;
3273 *unit = -1;
3274
3275 /* 1. Create a config structure */
3276
3277 config = (RF_Config_t *)malloc(sizeof(RF_Config_t),
3278 M_RAIDFRAME,
3279 M_NOWAIT);
3280 if (config==NULL) {
3281 printf("Out of mem!?!?\n");
3282 /* XXX do something more intelligent here. */
3283 return(1);
3284 }
3285
3286 memset(config, 0, sizeof(RF_Config_t));
3287
3288 /* XXX raidID needs to be set correctly.. */
3289
3290 /*
3291 2. Figure out what RAID ID this one is supposed to live at
3292 See if we can get the same RAID dev that it was configured
3293 on last time..
3294 */
3295
3296 raidID = cset->ac->clabel->last_unit;
3297 if ((raidID < 0) || (raidID >= numraid)) {
3298 /* let's not wander off into lala land. */
3299 raidID = numraid - 1;
3300 }
3301 if (raidPtrs[raidID]->valid != 0) {
3302
3303 /*
3304 Nope... Go looking for an alternative...
3305 Start high so we don't immediately use raid0 if that's
3306 not taken.
3307 */
3308
3309 for(raidID = numraid - 1; raidID >= 0; raidID--) {
3310 if (raidPtrs[raidID]->valid == 0) {
3311 /* can use this one! */
3312 break;
3313 }
3314 }
3315 }
3316
3317 if (raidID < 0) {
3318 /* punt... */
3319 printf("Unable to auto configure this set!\n");
3320 printf("(Out of RAID devs!)\n");
3321 return(1);
3322 }
3323 printf("Configuring raid%d:\n",raidID);
3324 raidPtr = raidPtrs[raidID];
3325
3326 /* XXX all this stuff should be done SOMEWHERE ELSE! */
3327 raidPtr->raidid = raidID;
3328 raidPtr->openings = RAIDOUTSTANDING;
3329
3330 /* 3. Build the configuration structure */
3331 rf_create_configuration(cset->ac, config, raidPtr);
3332
3333 /* 4. Do the configuration */
3334 retcode = rf_Configure(raidPtr, config, cset->ac);
3335
3336 if (retcode == 0) {
3337
3338 raidinit(raidPtrs[raidID]);
3339
3340 rf_markalldirty(raidPtrs[raidID]);
3341 raidPtrs[raidID]->autoconfigure = 1; /* XXX do this here? */
3342 if (cset->ac->clabel->root_partition==1) {
3343 /* everything configured just fine. Make a note
3344 that this set is eligible to be root. */
3345 cset->rootable = 1;
3346 /* XXX do this here? */
3347 raidPtrs[raidID]->root_partition = 1;
3348 }
3349 }
3350
3351 /* 5. Cleanup */
3352 free(config, M_RAIDFRAME);
3353
3354 *unit = raidID;
3355 return(retcode);
3356 }
3357
3358 void
3359 rf_disk_unbusy(desc)
3360 RF_RaidAccessDesc_t *desc;
3361 {
3362 struct buf *bp;
3363
3364 bp = (struct buf *)desc->bp;
3365 disk_unbusy(&raid_softc[desc->raidPtr->raidid].sc_dkdev,
3366 (bp->b_bcount - bp->b_resid));
3367 }
3368