rf_netbsdkintf.c revision 1.117.6.2 1 /* $NetBSD: rf_netbsdkintf.c,v 1.117.6.2 2002/05/30 14:47:03 gehenna Exp $ */
2 /*-
3 * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
4 * All rights reserved.
5 *
6 * This code is derived from software contributed to The NetBSD Foundation
7 * by Greg Oster; Jason R. Thorpe.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed by the NetBSD
20 * Foundation, Inc. and its contributors.
21 * 4. Neither the name of The NetBSD Foundation nor the names of its
22 * contributors may be used to endorse or promote products derived
23 * from this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
26 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
36 */
37
38 /*
39 * Copyright (c) 1988 University of Utah.
40 * Copyright (c) 1990, 1993
41 * The Regents of the University of California. All rights reserved.
42 *
43 * This code is derived from software contributed to Berkeley by
44 * the Systems Programming Group of the University of Utah Computer
45 * Science Department.
46 *
47 * Redistribution and use in source and binary forms, with or without
48 * modification, are permitted provided that the following conditions
49 * are met:
50 * 1. Redistributions of source code must retain the above copyright
51 * notice, this list of conditions and the following disclaimer.
52 * 2. Redistributions in binary form must reproduce the above copyright
53 * notice, this list of conditions and the following disclaimer in the
54 * documentation and/or other materials provided with the distribution.
55 * 3. All advertising materials mentioning features or use of this software
56 * must display the following acknowledgement:
57 * This product includes software developed by the University of
58 * California, Berkeley and its contributors.
59 * 4. Neither the name of the University nor the names of its contributors
60 * may be used to endorse or promote products derived from this software
61 * without specific prior written permission.
62 *
63 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
64 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
65 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
66 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
67 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
68 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
69 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
70 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
71 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
72 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
73 * SUCH DAMAGE.
74 *
75 * from: Utah $Hdr: cd.c 1.6 90/11/28$
76 *
77 * @(#)cd.c 8.2 (Berkeley) 11/16/93
78 */
79
80
81
82
83 /*
84 * Copyright (c) 1995 Carnegie-Mellon University.
85 * All rights reserved.
86 *
87 * Authors: Mark Holland, Jim Zelenka
88 *
89 * Permission to use, copy, modify and distribute this software and
90 * its documentation is hereby granted, provided that both the copyright
91 * notice and this permission notice appear in all copies of the
92 * software, derivative works or modified versions, and any portions
93 * thereof, and that both notices appear in supporting documentation.
94 *
95 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
96 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
97 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
98 *
99 * Carnegie Mellon requests users of this software to return to
100 *
101 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
102 * School of Computer Science
103 * Carnegie Mellon University
104 * Pittsburgh PA 15213-3890
105 *
106 * any improvements or extensions that they make and grant Carnegie the
107 * rights to redistribute these changes.
108 */
109
110 /***********************************************************
111 *
112 * rf_kintf.c -- the kernel interface routines for RAIDframe
113 *
114 ***********************************************************/
115
116 #include <sys/cdefs.h>
117 __KERNEL_RCSID(0, "$NetBSD: rf_netbsdkintf.c,v 1.117.6.2 2002/05/30 14:47:03 gehenna Exp $");
118
119 #include <sys/param.h>
120 #include <sys/errno.h>
121 #include <sys/pool.h>
122 #include <sys/queue.h>
123 #include <sys/disk.h>
124 #include <sys/device.h>
125 #include <sys/stat.h>
126 #include <sys/ioctl.h>
127 #include <sys/fcntl.h>
128 #include <sys/systm.h>
129 #include <sys/namei.h>
130 #include <sys/vnode.h>
131 #include <sys/disklabel.h>
132 #include <sys/conf.h>
133 #include <sys/lock.h>
134 #include <sys/buf.h>
135 #include <sys/user.h>
136 #include <sys/reboot.h>
137
138 #include <dev/raidframe/raidframevar.h>
139 #include <dev/raidframe/raidframeio.h>
140 #include "raid.h"
141 #include "opt_raid_autoconfig.h"
142 #include "rf_raid.h"
143 #include "rf_copyback.h"
144 #include "rf_dag.h"
145 #include "rf_dagflags.h"
146 #include "rf_desc.h"
147 #include "rf_diskqueue.h"
148 #include "rf_acctrace.h"
149 #include "rf_etimer.h"
150 #include "rf_general.h"
151 #include "rf_debugMem.h"
152 #include "rf_kintf.h"
153 #include "rf_options.h"
154 #include "rf_driver.h"
155 #include "rf_parityscan.h"
156 #include "rf_debugprint.h"
157 #include "rf_threadstuff.h"
158
159 int rf_kdebug_level = 0;
160
161 #ifdef DEBUG
162 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
163 #else /* DEBUG */
164 #define db1_printf(a) { }
165 #endif /* DEBUG */
166
167 static RF_Raid_t **raidPtrs; /* global raid device descriptors */
168
169 RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex)
170
171 static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a
172 * spare table */
173 static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from
174 * installation process */
175
176 /* prototypes */
177 static void KernelWakeupFunc(struct buf * bp);
178 static void InitBP(struct buf * bp, struct vnode *, unsigned rw_flag,
179 dev_t dev, RF_SectorNum_t startSect,
180 RF_SectorCount_t numSect, caddr_t buf,
181 void (*cbFunc) (struct buf *), void *cbArg,
182 int logBytesPerSector, struct proc * b_proc);
183 static void raidinit(RF_Raid_t *);
184
185 void raidattach(int);
186
187 dev_type_open(raidopen);
188 dev_type_close(raidclose);
189 dev_type_read(raidread);
190 dev_type_write(raidwrite);
191 dev_type_ioctl(raidioctl);
192 dev_type_strategy(raidstrategy);
193 dev_type_dump(raiddump);
194 dev_type_size(raidsize);
195
196 const struct bdevsw raid_bdevsw = {
197 raidopen, raidclose, raidstrategy, raidioctl,
198 raiddump, raidsize, D_DISK
199 };
200
201 const struct cdevsw raid_cdevsw = {
202 raidopen, raidclose, raidread, raidwrite, raidioctl,
203 nostop, notty, nopoll, nommap, D_DISK
204 };
205
206 /*
207 * Pilfered from ccd.c
208 */
209
210 struct raidbuf {
211 struct buf rf_buf; /* new I/O buf. MUST BE FIRST!!! */
212 struct buf *rf_obp; /* ptr. to original I/O buf */
213 int rf_flags; /* misc. flags */
214 RF_DiskQueueData_t *req;/* the request that this was part of.. */
215 };
216
217 /* component buffer pool */
218 struct pool raidframe_cbufpool;
219
220 #define RAIDGETBUF(rs) pool_get(&raidframe_cbufpool, PR_NOWAIT)
221 #define RAIDPUTBUF(rs, cbp) pool_put(&raidframe_cbufpool, cbp)
222
223 /* XXX Not sure if the following should be replacing the raidPtrs above,
224 or if it should be used in conjunction with that...
225 */
226
227 struct raid_softc {
228 int sc_flags; /* flags */
229 int sc_cflags; /* configuration flags */
230 size_t sc_size; /* size of the raid device */
231 char sc_xname[20]; /* XXX external name */
232 struct disk sc_dkdev; /* generic disk device info */
233 struct buf_queue buf_queue; /* used for the device queue */
234 };
235 /* sc_flags */
236 #define RAIDF_INITED 0x01 /* unit has been initialized */
237 #define RAIDF_WLABEL 0x02 /* label area is writable */
238 #define RAIDF_LABELLING 0x04 /* unit is currently being labelled */
239 #define RAIDF_WANTED 0x40 /* someone is waiting to obtain a lock */
240 #define RAIDF_LOCKED 0x80 /* unit is locked */
241
242 #define raidunit(x) DISKUNIT(x)
243 int numraid = 0;
244
245 /*
246 * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
247 * Be aware that large numbers can allow the driver to consume a lot of
248 * kernel memory, especially on writes, and in degraded mode reads.
249 *
250 * For example: with a stripe width of 64 blocks (32k) and 5 disks,
251 * a single 64K write will typically require 64K for the old data,
252 * 64K for the old parity, and 64K for the new parity, for a total
253 * of 192K (if the parity buffer is not re-used immediately).
254 * Even it if is used immediately, that's still 128K, which when multiplied
255 * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
256 *
257 * Now in degraded mode, for example, a 64K read on the above setup may
258 * require data reconstruction, which will require *all* of the 4 remaining
259 * disks to participate -- 4 * 32K/disk == 128K again.
260 */
261
262 #ifndef RAIDOUTSTANDING
263 #define RAIDOUTSTANDING 6
264 #endif
265
266 #define RAIDLABELDEV(dev) \
267 (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
268
269 /* declared here, and made public, for the benefit of KVM stuff.. */
270 struct raid_softc *raid_softc;
271
272 static void raidgetdefaultlabel(RF_Raid_t *, struct raid_softc *,
273 struct disklabel *);
274 static void raidgetdisklabel(dev_t);
275 static void raidmakedisklabel(struct raid_softc *);
276
277 static int raidlock(struct raid_softc *);
278 static void raidunlock(struct raid_softc *);
279
280 static void rf_markalldirty(RF_Raid_t *);
281 void rf_mountroot_hook(struct device *);
282
283 struct device *raidrootdev;
284
285 void rf_ReconThread(struct rf_recon_req *);
286 /* XXX what I want is: */
287 /*void rf_ReconThread(RF_Raid_t *raidPtr); */
288 void rf_RewriteParityThread(RF_Raid_t *raidPtr);
289 void rf_CopybackThread(RF_Raid_t *raidPtr);
290 void rf_ReconstructInPlaceThread(struct rf_recon_req *);
291 void rf_buildroothack(void *);
292
293 RF_AutoConfig_t *rf_find_raid_components(void);
294 RF_ConfigSet_t *rf_create_auto_sets(RF_AutoConfig_t *);
295 static int rf_does_it_fit(RF_ConfigSet_t *,RF_AutoConfig_t *);
296 static int rf_reasonable_label(RF_ComponentLabel_t *);
297 void rf_create_configuration(RF_AutoConfig_t *,RF_Config_t *, RF_Raid_t *);
298 int rf_set_autoconfig(RF_Raid_t *, int);
299 int rf_set_rootpartition(RF_Raid_t *, int);
300 void rf_release_all_vps(RF_ConfigSet_t *);
301 void rf_cleanup_config_set(RF_ConfigSet_t *);
302 int rf_have_enough_components(RF_ConfigSet_t *);
303 int rf_auto_config_set(RF_ConfigSet_t *, int *);
304
305 static int raidautoconfig = 0; /* Debugging, mostly. Set to 0 to not
306 allow autoconfig to take place.
307 Note that this is overridden by having
308 RAID_AUTOCONFIG as an option in the
309 kernel config file. */
310
311 void
312 raidattach(num)
313 int num;
314 {
315 int raidID;
316 int i, rc;
317 RF_AutoConfig_t *ac_list; /* autoconfig list */
318 RF_ConfigSet_t *config_sets;
319
320 #ifdef DEBUG
321 printf("raidattach: Asked for %d units\n", num);
322 #endif
323
324 if (num <= 0) {
325 #ifdef DIAGNOSTIC
326 panic("raidattach: count <= 0");
327 #endif
328 return;
329 }
330 /* This is where all the initialization stuff gets done. */
331
332 numraid = num;
333
334 /* Make some space for requested number of units... */
335
336 RF_Calloc(raidPtrs, num, sizeof(RF_Raid_t *), (RF_Raid_t **));
337 if (raidPtrs == NULL) {
338 panic("raidPtrs is NULL!!\n");
339 }
340
341 /* Initialize the component buffer pool. */
342 pool_init(&raidframe_cbufpool, sizeof(struct raidbuf), 0,
343 0, 0, "raidpl", NULL);
344
345 rc = rf_mutex_init(&rf_sparet_wait_mutex);
346 if (rc) {
347 RF_PANIC();
348 }
349
350 rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
351
352 for (i = 0; i < num; i++)
353 raidPtrs[i] = NULL;
354 rc = rf_BootRaidframe();
355 if (rc == 0)
356 printf("Kernelized RAIDframe activated\n");
357 else
358 panic("Serious error booting RAID!!\n");
359
360 /* put together some datastructures like the CCD device does.. This
361 * lets us lock the device and what-not when it gets opened. */
362
363 raid_softc = (struct raid_softc *)
364 malloc(num * sizeof(struct raid_softc),
365 M_RAIDFRAME, M_NOWAIT);
366 if (raid_softc == NULL) {
367 printf("WARNING: no memory for RAIDframe driver\n");
368 return;
369 }
370
371 memset(raid_softc, 0, num * sizeof(struct raid_softc));
372
373 raidrootdev = (struct device *)malloc(num * sizeof(struct device),
374 M_RAIDFRAME, M_NOWAIT);
375 if (raidrootdev == NULL) {
376 panic("No memory for RAIDframe driver!!?!?!\n");
377 }
378
379 for (raidID = 0; raidID < num; raidID++) {
380 BUFQ_INIT(&raid_softc[raidID].buf_queue);
381
382 raidrootdev[raidID].dv_class = DV_DISK;
383 raidrootdev[raidID].dv_cfdata = NULL;
384 raidrootdev[raidID].dv_unit = raidID;
385 raidrootdev[raidID].dv_parent = NULL;
386 raidrootdev[raidID].dv_flags = 0;
387 sprintf(raidrootdev[raidID].dv_xname,"raid%d",raidID);
388
389 RF_Calloc(raidPtrs[raidID], 1, sizeof(RF_Raid_t),
390 (RF_Raid_t *));
391 if (raidPtrs[raidID] == NULL) {
392 printf("WARNING: raidPtrs[%d] is NULL\n", raidID);
393 numraid = raidID;
394 return;
395 }
396 }
397
398 #ifdef RAID_AUTOCONFIG
399 raidautoconfig = 1;
400 #endif
401
402 if (raidautoconfig) {
403 /* 1. locate all RAID components on the system */
404
405 #if DEBUG
406 printf("Searching for raid components...\n");
407 #endif
408 ac_list = rf_find_raid_components();
409
410 /* 2. sort them into their respective sets */
411
412 config_sets = rf_create_auto_sets(ac_list);
413
414 /* 3. evaluate each set and configure the valid ones
415 This gets done in rf_buildroothack() */
416
417 /* schedule the creation of the thread to do the
418 "/ on RAID" stuff */
419
420 kthread_create(rf_buildroothack,config_sets);
421
422 #if 0
423 mountroothook_establish(rf_mountroot_hook, &raidrootdev[0]);
424 #endif
425 }
426
427 }
428
429 void
430 rf_buildroothack(arg)
431 void *arg;
432 {
433 RF_ConfigSet_t *config_sets = arg;
434 RF_ConfigSet_t *cset;
435 RF_ConfigSet_t *next_cset;
436 int retcode;
437 int raidID;
438 int rootID;
439 int num_root;
440
441 rootID = 0;
442 num_root = 0;
443 cset = config_sets;
444 while(cset != NULL ) {
445 next_cset = cset->next;
446 if (rf_have_enough_components(cset) &&
447 cset->ac->clabel->autoconfigure==1) {
448 retcode = rf_auto_config_set(cset,&raidID);
449 if (!retcode) {
450 if (cset->rootable) {
451 rootID = raidID;
452 num_root++;
453 }
454 } else {
455 /* The autoconfig didn't work :( */
456 #if DEBUG
457 printf("Autoconfig failed with code %d for raid%d\n", retcode, raidID);
458 #endif
459 rf_release_all_vps(cset);
460 }
461 } else {
462 /* we're not autoconfiguring this set...
463 release the associated resources */
464 rf_release_all_vps(cset);
465 }
466 /* cleanup */
467 rf_cleanup_config_set(cset);
468 cset = next_cset;
469 }
470 if (boothowto & RB_ASKNAME) {
471 /* We don't auto-config... */
472 } else {
473 /* They didn't ask, and we found something bootable... */
474
475 if (num_root == 1) {
476 booted_device = &raidrootdev[rootID];
477 } else if (num_root > 1) {
478 /* we can't guess.. require the user to answer... */
479 boothowto |= RB_ASKNAME;
480 }
481 }
482 }
483
484
485 int
486 raidsize(dev)
487 dev_t dev;
488 {
489 struct raid_softc *rs;
490 struct disklabel *lp;
491 int part, unit, omask, size;
492
493 unit = raidunit(dev);
494 if (unit >= numraid)
495 return (-1);
496 rs = &raid_softc[unit];
497
498 if ((rs->sc_flags & RAIDF_INITED) == 0)
499 return (-1);
500
501 part = DISKPART(dev);
502 omask = rs->sc_dkdev.dk_openmask & (1 << part);
503 lp = rs->sc_dkdev.dk_label;
504
505 if (omask == 0 && raidopen(dev, 0, S_IFBLK, curproc))
506 return (-1);
507
508 if (lp->d_partitions[part].p_fstype != FS_SWAP)
509 size = -1;
510 else
511 size = lp->d_partitions[part].p_size *
512 (lp->d_secsize / DEV_BSIZE);
513
514 if (omask == 0 && raidclose(dev, 0, S_IFBLK, curproc))
515 return (-1);
516
517 return (size);
518
519 }
520
521 int
522 raiddump(dev, blkno, va, size)
523 dev_t dev;
524 daddr_t blkno;
525 caddr_t va;
526 size_t size;
527 {
528 /* Not implemented. */
529 return ENXIO;
530 }
531 /* ARGSUSED */
532 int
533 raidopen(dev, flags, fmt, p)
534 dev_t dev;
535 int flags, fmt;
536 struct proc *p;
537 {
538 int unit = raidunit(dev);
539 struct raid_softc *rs;
540 struct disklabel *lp;
541 int part, pmask;
542 int error = 0;
543
544 if (unit >= numraid)
545 return (ENXIO);
546 rs = &raid_softc[unit];
547
548 if ((error = raidlock(rs)) != 0)
549 return (error);
550 lp = rs->sc_dkdev.dk_label;
551
552 part = DISKPART(dev);
553 pmask = (1 << part);
554
555 db1_printf(("Opening raid device number: %d partition: %d\n",
556 unit, part));
557
558
559 if ((rs->sc_flags & RAIDF_INITED) &&
560 (rs->sc_dkdev.dk_openmask == 0))
561 raidgetdisklabel(dev);
562
563 /* make sure that this partition exists */
564
565 if (part != RAW_PART) {
566 db1_printf(("Not a raw partition..\n"));
567 if (((rs->sc_flags & RAIDF_INITED) == 0) ||
568 ((part >= lp->d_npartitions) ||
569 (lp->d_partitions[part].p_fstype == FS_UNUSED))) {
570 error = ENXIO;
571 raidunlock(rs);
572 db1_printf(("Bailing out...\n"));
573 return (error);
574 }
575 }
576 /* Prevent this unit from being unconfigured while open. */
577 switch (fmt) {
578 case S_IFCHR:
579 rs->sc_dkdev.dk_copenmask |= pmask;
580 break;
581
582 case S_IFBLK:
583 rs->sc_dkdev.dk_bopenmask |= pmask;
584 break;
585 }
586
587 if ((rs->sc_dkdev.dk_openmask == 0) &&
588 ((rs->sc_flags & RAIDF_INITED) != 0)) {
589 /* First one... mark things as dirty... Note that we *MUST*
590 have done a configure before this. I DO NOT WANT TO BE
591 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
592 THAT THEY BELONG TOGETHER!!!!! */
593 /* XXX should check to see if we're only open for reading
594 here... If so, we needn't do this, but then need some
595 other way of keeping track of what's happened.. */
596
597 rf_markalldirty( raidPtrs[unit] );
598 }
599
600
601 rs->sc_dkdev.dk_openmask =
602 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
603
604 raidunlock(rs);
605
606 return (error);
607
608
609 }
610 /* ARGSUSED */
611 int
612 raidclose(dev, flags, fmt, p)
613 dev_t dev;
614 int flags, fmt;
615 struct proc *p;
616 {
617 int unit = raidunit(dev);
618 struct raid_softc *rs;
619 int error = 0;
620 int part;
621
622 if (unit >= numraid)
623 return (ENXIO);
624 rs = &raid_softc[unit];
625
626 if ((error = raidlock(rs)) != 0)
627 return (error);
628
629 part = DISKPART(dev);
630
631 /* ...that much closer to allowing unconfiguration... */
632 switch (fmt) {
633 case S_IFCHR:
634 rs->sc_dkdev.dk_copenmask &= ~(1 << part);
635 break;
636
637 case S_IFBLK:
638 rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
639 break;
640 }
641 rs->sc_dkdev.dk_openmask =
642 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
643
644 if ((rs->sc_dkdev.dk_openmask == 0) &&
645 ((rs->sc_flags & RAIDF_INITED) != 0)) {
646 /* Last one... device is not unconfigured yet.
647 Device shutdown has taken care of setting the
648 clean bits if RAIDF_INITED is not set
649 mark things as clean... */
650 #if 0
651 printf("Last one on raid%d. Updating status.\n",unit);
652 #endif
653 rf_update_component_labels(raidPtrs[unit],
654 RF_FINAL_COMPONENT_UPDATE);
655 if (doing_shutdown) {
656 /* last one, and we're going down, so
657 lights out for this RAID set too. */
658 error = rf_Shutdown(raidPtrs[unit]);
659
660 /* It's no longer initialized... */
661 rs->sc_flags &= ~RAIDF_INITED;
662
663 /* Detach the disk. */
664 disk_detach(&rs->sc_dkdev);
665 }
666 }
667
668 raidunlock(rs);
669 return (0);
670
671 }
672
673 void
674 raidstrategy(bp)
675 struct buf *bp;
676 {
677 int s;
678
679 unsigned int raidID = raidunit(bp->b_dev);
680 RF_Raid_t *raidPtr;
681 struct raid_softc *rs = &raid_softc[raidID];
682 struct disklabel *lp;
683 int wlabel;
684
685 if ((rs->sc_flags & RAIDF_INITED) ==0) {
686 bp->b_error = ENXIO;
687 bp->b_flags |= B_ERROR;
688 bp->b_resid = bp->b_bcount;
689 biodone(bp);
690 return;
691 }
692 if (raidID >= numraid || !raidPtrs[raidID]) {
693 bp->b_error = ENODEV;
694 bp->b_flags |= B_ERROR;
695 bp->b_resid = bp->b_bcount;
696 biodone(bp);
697 return;
698 }
699 raidPtr = raidPtrs[raidID];
700 if (!raidPtr->valid) {
701 bp->b_error = ENODEV;
702 bp->b_flags |= B_ERROR;
703 bp->b_resid = bp->b_bcount;
704 biodone(bp);
705 return;
706 }
707 if (bp->b_bcount == 0) {
708 db1_printf(("b_bcount is zero..\n"));
709 biodone(bp);
710 return;
711 }
712 lp = rs->sc_dkdev.dk_label;
713
714 /*
715 * Do bounds checking and adjust transfer. If there's an
716 * error, the bounds check will flag that for us.
717 */
718
719 wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
720 if (DISKPART(bp->b_dev) != RAW_PART)
721 if (bounds_check_with_label(bp, lp, wlabel) <= 0) {
722 db1_printf(("Bounds check failed!!:%d %d\n",
723 (int) bp->b_blkno, (int) wlabel));
724 biodone(bp);
725 return;
726 }
727 s = splbio();
728
729 bp->b_resid = 0;
730
731 /* stuff it onto our queue */
732 BUFQ_INSERT_TAIL(&rs->buf_queue, bp);
733
734 raidstart(raidPtrs[raidID]);
735
736 splx(s);
737 }
738 /* ARGSUSED */
739 int
740 raidread(dev, uio, flags)
741 dev_t dev;
742 struct uio *uio;
743 int flags;
744 {
745 int unit = raidunit(dev);
746 struct raid_softc *rs;
747 int part;
748
749 if (unit >= numraid)
750 return (ENXIO);
751 rs = &raid_softc[unit];
752
753 if ((rs->sc_flags & RAIDF_INITED) == 0)
754 return (ENXIO);
755 part = DISKPART(dev);
756
757 db1_printf(("raidread: unit: %d partition: %d\n", unit, part));
758
759 return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
760
761 }
762 /* ARGSUSED */
763 int
764 raidwrite(dev, uio, flags)
765 dev_t dev;
766 struct uio *uio;
767 int flags;
768 {
769 int unit = raidunit(dev);
770 struct raid_softc *rs;
771
772 if (unit >= numraid)
773 return (ENXIO);
774 rs = &raid_softc[unit];
775
776 if ((rs->sc_flags & RAIDF_INITED) == 0)
777 return (ENXIO);
778 db1_printf(("raidwrite\n"));
779 return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
780
781 }
782
783 int
784 raidioctl(dev, cmd, data, flag, p)
785 dev_t dev;
786 u_long cmd;
787 caddr_t data;
788 int flag;
789 struct proc *p;
790 {
791 int unit = raidunit(dev);
792 int error = 0;
793 int part, pmask;
794 struct raid_softc *rs;
795 RF_Config_t *k_cfg, *u_cfg;
796 RF_Raid_t *raidPtr;
797 RF_RaidDisk_t *diskPtr;
798 RF_AccTotals_t *totals;
799 RF_DeviceConfig_t *d_cfg, **ucfgp;
800 u_char *specific_buf;
801 int retcode = 0;
802 int row;
803 int column;
804 struct rf_recon_req *rrcopy, *rr;
805 RF_ComponentLabel_t *clabel;
806 RF_ComponentLabel_t ci_label;
807 RF_ComponentLabel_t **clabel_ptr;
808 RF_SingleComponent_t *sparePtr,*componentPtr;
809 RF_SingleComponent_t hot_spare;
810 RF_SingleComponent_t component;
811 RF_ProgressInfo_t progressInfo, **progressInfoPtr;
812 int i, j, d;
813 #ifdef __HAVE_OLD_DISKLABEL
814 struct disklabel newlabel;
815 #endif
816
817 if (unit >= numraid)
818 return (ENXIO);
819 rs = &raid_softc[unit];
820 raidPtr = raidPtrs[unit];
821
822 db1_printf(("raidioctl: %d %d %d %d\n", (int) dev,
823 (int) DISKPART(dev), (int) unit, (int) cmd));
824
825 /* Must be open for writes for these commands... */
826 switch (cmd) {
827 case DIOCSDINFO:
828 case DIOCWDINFO:
829 #ifdef __HAVE_OLD_DISKLABEL
830 case ODIOCWDINFO:
831 case ODIOCSDINFO:
832 #endif
833 case DIOCWLABEL:
834 if ((flag & FWRITE) == 0)
835 return (EBADF);
836 }
837
838 /* Must be initialized for these... */
839 switch (cmd) {
840 case DIOCGDINFO:
841 case DIOCSDINFO:
842 case DIOCWDINFO:
843 #ifdef __HAVE_OLD_DISKLABEL
844 case ODIOCGDINFO:
845 case ODIOCWDINFO:
846 case ODIOCSDINFO:
847 case ODIOCGDEFLABEL:
848 #endif
849 case DIOCGPART:
850 case DIOCWLABEL:
851 case DIOCGDEFLABEL:
852 case RAIDFRAME_SHUTDOWN:
853 case RAIDFRAME_REWRITEPARITY:
854 case RAIDFRAME_GET_INFO:
855 case RAIDFRAME_RESET_ACCTOTALS:
856 case RAIDFRAME_GET_ACCTOTALS:
857 case RAIDFRAME_KEEP_ACCTOTALS:
858 case RAIDFRAME_GET_SIZE:
859 case RAIDFRAME_FAIL_DISK:
860 case RAIDFRAME_COPYBACK:
861 case RAIDFRAME_CHECK_RECON_STATUS:
862 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
863 case RAIDFRAME_GET_COMPONENT_LABEL:
864 case RAIDFRAME_SET_COMPONENT_LABEL:
865 case RAIDFRAME_ADD_HOT_SPARE:
866 case RAIDFRAME_REMOVE_HOT_SPARE:
867 case RAIDFRAME_INIT_LABELS:
868 case RAIDFRAME_REBUILD_IN_PLACE:
869 case RAIDFRAME_CHECK_PARITY:
870 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
871 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
872 case RAIDFRAME_CHECK_COPYBACK_STATUS:
873 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
874 case RAIDFRAME_SET_AUTOCONFIG:
875 case RAIDFRAME_SET_ROOT:
876 case RAIDFRAME_DELETE_COMPONENT:
877 case RAIDFRAME_INCORPORATE_HOT_SPARE:
878 if ((rs->sc_flags & RAIDF_INITED) == 0)
879 return (ENXIO);
880 }
881
882 switch (cmd) {
883
884 /* configure the system */
885 case RAIDFRAME_CONFIGURE:
886
887 if (raidPtr->valid) {
888 /* There is a valid RAID set running on this unit! */
889 printf("raid%d: Device already configured!\n",unit);
890 return(EINVAL);
891 }
892
893 /* copy-in the configuration information */
894 /* data points to a pointer to the configuration structure */
895
896 u_cfg = *((RF_Config_t **) data);
897 RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
898 if (k_cfg == NULL) {
899 return (ENOMEM);
900 }
901 retcode = copyin((caddr_t) u_cfg, (caddr_t) k_cfg,
902 sizeof(RF_Config_t));
903 if (retcode) {
904 RF_Free(k_cfg, sizeof(RF_Config_t));
905 db1_printf(("rf_ioctl: retcode=%d copyin.1\n",
906 retcode));
907 return (retcode);
908 }
909 /* allocate a buffer for the layout-specific data, and copy it
910 * in */
911 if (k_cfg->layoutSpecificSize) {
912 if (k_cfg->layoutSpecificSize > 10000) {
913 /* sanity check */
914 RF_Free(k_cfg, sizeof(RF_Config_t));
915 return (EINVAL);
916 }
917 RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
918 (u_char *));
919 if (specific_buf == NULL) {
920 RF_Free(k_cfg, sizeof(RF_Config_t));
921 return (ENOMEM);
922 }
923 retcode = copyin(k_cfg->layoutSpecific,
924 (caddr_t) specific_buf,
925 k_cfg->layoutSpecificSize);
926 if (retcode) {
927 RF_Free(k_cfg, sizeof(RF_Config_t));
928 RF_Free(specific_buf,
929 k_cfg->layoutSpecificSize);
930 db1_printf(("rf_ioctl: retcode=%d copyin.2\n",
931 retcode));
932 return (retcode);
933 }
934 } else
935 specific_buf = NULL;
936 k_cfg->layoutSpecific = specific_buf;
937
938 /* should do some kind of sanity check on the configuration.
939 * Store the sum of all the bytes in the last byte? */
940
941 /* configure the system */
942
943 /*
944 * Clear the entire RAID descriptor, just to make sure
945 * there is no stale data left in the case of a
946 * reconfiguration
947 */
948 memset((char *) raidPtr, 0, sizeof(RF_Raid_t));
949 raidPtr->raidid = unit;
950
951 retcode = rf_Configure(raidPtr, k_cfg, NULL);
952
953 if (retcode == 0) {
954
955 /* allow this many simultaneous IO's to
956 this RAID device */
957 raidPtr->openings = RAIDOUTSTANDING;
958
959 raidinit(raidPtr);
960 rf_markalldirty(raidPtr);
961 }
962 /* free the buffers. No return code here. */
963 if (k_cfg->layoutSpecificSize) {
964 RF_Free(specific_buf, k_cfg->layoutSpecificSize);
965 }
966 RF_Free(k_cfg, sizeof(RF_Config_t));
967
968 return (retcode);
969
970 /* shutdown the system */
971 case RAIDFRAME_SHUTDOWN:
972
973 if ((error = raidlock(rs)) != 0)
974 return (error);
975
976 /*
977 * If somebody has a partition mounted, we shouldn't
978 * shutdown.
979 */
980
981 part = DISKPART(dev);
982 pmask = (1 << part);
983 if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
984 ((rs->sc_dkdev.dk_bopenmask & pmask) &&
985 (rs->sc_dkdev.dk_copenmask & pmask))) {
986 raidunlock(rs);
987 return (EBUSY);
988 }
989
990 retcode = rf_Shutdown(raidPtr);
991
992 /* It's no longer initialized... */
993 rs->sc_flags &= ~RAIDF_INITED;
994
995 /* Detach the disk. */
996 disk_detach(&rs->sc_dkdev);
997
998 raidunlock(rs);
999
1000 return (retcode);
1001 case RAIDFRAME_GET_COMPONENT_LABEL:
1002 clabel_ptr = (RF_ComponentLabel_t **) data;
1003 /* need to read the component label for the disk indicated
1004 by row,column in clabel */
1005
1006 /* For practice, let's get it directly fromdisk, rather
1007 than from the in-core copy */
1008 RF_Malloc( clabel, sizeof( RF_ComponentLabel_t ),
1009 (RF_ComponentLabel_t *));
1010 if (clabel == NULL)
1011 return (ENOMEM);
1012
1013 memset((char *) clabel, 0, sizeof(RF_ComponentLabel_t));
1014
1015 retcode = copyin( *clabel_ptr, clabel,
1016 sizeof(RF_ComponentLabel_t));
1017
1018 if (retcode) {
1019 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
1020 return(retcode);
1021 }
1022
1023 row = clabel->row;
1024 column = clabel->column;
1025
1026 if ((row < 0) || (row >= raidPtr->numRow) ||
1027 (column < 0) || (column >= raidPtr->numCol +
1028 raidPtr->numSpare)) {
1029 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
1030 return(EINVAL);
1031 }
1032
1033 raidread_component_label(raidPtr->Disks[row][column].dev,
1034 raidPtr->raid_cinfo[row][column].ci_vp,
1035 clabel );
1036
1037 retcode = copyout((caddr_t) clabel,
1038 (caddr_t) *clabel_ptr,
1039 sizeof(RF_ComponentLabel_t));
1040 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
1041 return (retcode);
1042
1043 case RAIDFRAME_SET_COMPONENT_LABEL:
1044 clabel = (RF_ComponentLabel_t *) data;
1045
1046 /* XXX check the label for valid stuff... */
1047 /* Note that some things *should not* get modified --
1048 the user should be re-initing the labels instead of
1049 trying to patch things.
1050 */
1051
1052 printf("Got component label:\n");
1053 printf("Version: %d\n",clabel->version);
1054 printf("Serial Number: %d\n",clabel->serial_number);
1055 printf("Mod counter: %d\n",clabel->mod_counter);
1056 printf("Row: %d\n", clabel->row);
1057 printf("Column: %d\n", clabel->column);
1058 printf("Num Rows: %d\n", clabel->num_rows);
1059 printf("Num Columns: %d\n", clabel->num_columns);
1060 printf("Clean: %d\n", clabel->clean);
1061 printf("Status: %d\n", clabel->status);
1062
1063 row = clabel->row;
1064 column = clabel->column;
1065
1066 if ((row < 0) || (row >= raidPtr->numRow) ||
1067 (column < 0) || (column >= raidPtr->numCol)) {
1068 return(EINVAL);
1069 }
1070
1071 /* XXX this isn't allowed to do anything for now :-) */
1072
1073 /* XXX and before it is, we need to fill in the rest
1074 of the fields!?!?!?! */
1075 #if 0
1076 raidwrite_component_label(
1077 raidPtr->Disks[row][column].dev,
1078 raidPtr->raid_cinfo[row][column].ci_vp,
1079 clabel );
1080 #endif
1081 return (0);
1082
1083 case RAIDFRAME_INIT_LABELS:
1084 clabel = (RF_ComponentLabel_t *) data;
1085 /*
1086 we only want the serial number from
1087 the above. We get all the rest of the information
1088 from the config that was used to create this RAID
1089 set.
1090 */
1091
1092 raidPtr->serial_number = clabel->serial_number;
1093
1094 raid_init_component_label(raidPtr, &ci_label);
1095 ci_label.serial_number = clabel->serial_number;
1096
1097 for(row=0;row<raidPtr->numRow;row++) {
1098 ci_label.row = row;
1099 for(column=0;column<raidPtr->numCol;column++) {
1100 diskPtr = &raidPtr->Disks[row][column];
1101 if (!RF_DEAD_DISK(diskPtr->status)) {
1102 ci_label.partitionSize = diskPtr->partitionSize;
1103 ci_label.column = column;
1104 raidwrite_component_label(
1105 raidPtr->Disks[row][column].dev,
1106 raidPtr->raid_cinfo[row][column].ci_vp,
1107 &ci_label );
1108 }
1109 }
1110 }
1111
1112 return (retcode);
1113 case RAIDFRAME_SET_AUTOCONFIG:
1114 d = rf_set_autoconfig(raidPtr, *(int *) data);
1115 printf("New autoconfig value is: %d\n", d);
1116 *(int *) data = d;
1117 return (retcode);
1118
1119 case RAIDFRAME_SET_ROOT:
1120 d = rf_set_rootpartition(raidPtr, *(int *) data);
1121 printf("New rootpartition value is: %d\n", d);
1122 *(int *) data = d;
1123 return (retcode);
1124
1125 /* initialize all parity */
1126 case RAIDFRAME_REWRITEPARITY:
1127
1128 if (raidPtr->Layout.map->faultsTolerated == 0) {
1129 /* Parity for RAID 0 is trivially correct */
1130 raidPtr->parity_good = RF_RAID_CLEAN;
1131 return(0);
1132 }
1133
1134 if (raidPtr->parity_rewrite_in_progress == 1) {
1135 /* Re-write is already in progress! */
1136 return(EINVAL);
1137 }
1138
1139 retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
1140 rf_RewriteParityThread,
1141 raidPtr,"raid_parity");
1142 return (retcode);
1143
1144
1145 case RAIDFRAME_ADD_HOT_SPARE:
1146 sparePtr = (RF_SingleComponent_t *) data;
1147 memcpy( &hot_spare, sparePtr, sizeof(RF_SingleComponent_t));
1148 retcode = rf_add_hot_spare(raidPtr, &hot_spare);
1149 return(retcode);
1150
1151 case RAIDFRAME_REMOVE_HOT_SPARE:
1152 return(retcode);
1153
1154 case RAIDFRAME_DELETE_COMPONENT:
1155 componentPtr = (RF_SingleComponent_t *)data;
1156 memcpy( &component, componentPtr,
1157 sizeof(RF_SingleComponent_t));
1158 retcode = rf_delete_component(raidPtr, &component);
1159 return(retcode);
1160
1161 case RAIDFRAME_INCORPORATE_HOT_SPARE:
1162 componentPtr = (RF_SingleComponent_t *)data;
1163 memcpy( &component, componentPtr,
1164 sizeof(RF_SingleComponent_t));
1165 retcode = rf_incorporate_hot_spare(raidPtr, &component);
1166 return(retcode);
1167
1168 case RAIDFRAME_REBUILD_IN_PLACE:
1169
1170 if (raidPtr->Layout.map->faultsTolerated == 0) {
1171 /* Can't do this on a RAID 0!! */
1172 return(EINVAL);
1173 }
1174
1175 if (raidPtr->recon_in_progress == 1) {
1176 /* a reconstruct is already in progress! */
1177 return(EINVAL);
1178 }
1179
1180 componentPtr = (RF_SingleComponent_t *) data;
1181 memcpy( &component, componentPtr,
1182 sizeof(RF_SingleComponent_t));
1183 row = component.row;
1184 column = component.column;
1185 printf("Rebuild: %d %d\n",row, column);
1186 if ((row < 0) || (row >= raidPtr->numRow) ||
1187 (column < 0) || (column >= raidPtr->numCol)) {
1188 return(EINVAL);
1189 }
1190
1191 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1192 if (rrcopy == NULL)
1193 return(ENOMEM);
1194
1195 rrcopy->raidPtr = (void *) raidPtr;
1196 rrcopy->row = row;
1197 rrcopy->col = column;
1198
1199 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1200 rf_ReconstructInPlaceThread,
1201 rrcopy,"raid_reconip");
1202 return(retcode);
1203
1204 case RAIDFRAME_GET_INFO:
1205 if (!raidPtr->valid)
1206 return (ENODEV);
1207 ucfgp = (RF_DeviceConfig_t **) data;
1208 RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t),
1209 (RF_DeviceConfig_t *));
1210 if (d_cfg == NULL)
1211 return (ENOMEM);
1212 memset((char *) d_cfg, 0, sizeof(RF_DeviceConfig_t));
1213 d_cfg->rows = raidPtr->numRow;
1214 d_cfg->cols = raidPtr->numCol;
1215 d_cfg->ndevs = raidPtr->numRow * raidPtr->numCol;
1216 if (d_cfg->ndevs >= RF_MAX_DISKS) {
1217 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1218 return (ENOMEM);
1219 }
1220 d_cfg->nspares = raidPtr->numSpare;
1221 if (d_cfg->nspares >= RF_MAX_DISKS) {
1222 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1223 return (ENOMEM);
1224 }
1225 d_cfg->maxqdepth = raidPtr->maxQueueDepth;
1226 d = 0;
1227 for (i = 0; i < d_cfg->rows; i++) {
1228 for (j = 0; j < d_cfg->cols; j++) {
1229 d_cfg->devs[d] = raidPtr->Disks[i][j];
1230 d++;
1231 }
1232 }
1233 for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) {
1234 d_cfg->spares[i] = raidPtr->Disks[0][j];
1235 }
1236 retcode = copyout((caddr_t) d_cfg, (caddr_t) * ucfgp,
1237 sizeof(RF_DeviceConfig_t));
1238 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1239
1240 return (retcode);
1241
1242 case RAIDFRAME_CHECK_PARITY:
1243 *(int *) data = raidPtr->parity_good;
1244 return (0);
1245
1246 case RAIDFRAME_RESET_ACCTOTALS:
1247 memset(&raidPtr->acc_totals, 0, sizeof(raidPtr->acc_totals));
1248 return (0);
1249
1250 case RAIDFRAME_GET_ACCTOTALS:
1251 totals = (RF_AccTotals_t *) data;
1252 *totals = raidPtr->acc_totals;
1253 return (0);
1254
1255 case RAIDFRAME_KEEP_ACCTOTALS:
1256 raidPtr->keep_acc_totals = *(int *)data;
1257 return (0);
1258
1259 case RAIDFRAME_GET_SIZE:
1260 *(int *) data = raidPtr->totalSectors;
1261 return (0);
1262
1263 /* fail a disk & optionally start reconstruction */
1264 case RAIDFRAME_FAIL_DISK:
1265
1266 if (raidPtr->Layout.map->faultsTolerated == 0) {
1267 /* Can't do this on a RAID 0!! */
1268 return(EINVAL);
1269 }
1270
1271 rr = (struct rf_recon_req *) data;
1272
1273 if (rr->row < 0 || rr->row >= raidPtr->numRow
1274 || rr->col < 0 || rr->col >= raidPtr->numCol)
1275 return (EINVAL);
1276
1277 printf("raid%d: Failing the disk: row: %d col: %d\n",
1278 unit, rr->row, rr->col);
1279
1280 /* make a copy of the recon request so that we don't rely on
1281 * the user's buffer */
1282 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1283 if (rrcopy == NULL)
1284 return(ENOMEM);
1285 memcpy(rrcopy, rr, sizeof(*rr));
1286 rrcopy->raidPtr = (void *) raidPtr;
1287
1288 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1289 rf_ReconThread,
1290 rrcopy,"raid_recon");
1291 return (0);
1292
1293 /* invoke a copyback operation after recon on whatever disk
1294 * needs it, if any */
1295 case RAIDFRAME_COPYBACK:
1296
1297 if (raidPtr->Layout.map->faultsTolerated == 0) {
1298 /* This makes no sense on a RAID 0!! */
1299 return(EINVAL);
1300 }
1301
1302 if (raidPtr->copyback_in_progress == 1) {
1303 /* Copyback is already in progress! */
1304 return(EINVAL);
1305 }
1306
1307 retcode = RF_CREATE_THREAD(raidPtr->copyback_thread,
1308 rf_CopybackThread,
1309 raidPtr,"raid_copyback");
1310 return (retcode);
1311
1312 /* return the percentage completion of reconstruction */
1313 case RAIDFRAME_CHECK_RECON_STATUS:
1314 if (raidPtr->Layout.map->faultsTolerated == 0) {
1315 /* This makes no sense on a RAID 0, so tell the
1316 user it's done. */
1317 *(int *) data = 100;
1318 return(0);
1319 }
1320 row = 0; /* XXX we only consider a single row... */
1321 if (raidPtr->status[row] != rf_rs_reconstructing)
1322 *(int *) data = 100;
1323 else
1324 *(int *) data = raidPtr->reconControl[row]->percentComplete;
1325 return (0);
1326 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
1327 progressInfoPtr = (RF_ProgressInfo_t **) data;
1328 row = 0; /* XXX we only consider a single row... */
1329 if (raidPtr->status[row] != rf_rs_reconstructing) {
1330 progressInfo.remaining = 0;
1331 progressInfo.completed = 100;
1332 progressInfo.total = 100;
1333 } else {
1334 progressInfo.total =
1335 raidPtr->reconControl[row]->numRUsTotal;
1336 progressInfo.completed =
1337 raidPtr->reconControl[row]->numRUsComplete;
1338 progressInfo.remaining = progressInfo.total -
1339 progressInfo.completed;
1340 }
1341 retcode = copyout((caddr_t) &progressInfo,
1342 (caddr_t) *progressInfoPtr,
1343 sizeof(RF_ProgressInfo_t));
1344 return (retcode);
1345
1346 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1347 if (raidPtr->Layout.map->faultsTolerated == 0) {
1348 /* This makes no sense on a RAID 0, so tell the
1349 user it's done. */
1350 *(int *) data = 100;
1351 return(0);
1352 }
1353 if (raidPtr->parity_rewrite_in_progress == 1) {
1354 *(int *) data = 100 *
1355 raidPtr->parity_rewrite_stripes_done /
1356 raidPtr->Layout.numStripe;
1357 } else {
1358 *(int *) data = 100;
1359 }
1360 return (0);
1361
1362 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
1363 progressInfoPtr = (RF_ProgressInfo_t **) data;
1364 if (raidPtr->parity_rewrite_in_progress == 1) {
1365 progressInfo.total = raidPtr->Layout.numStripe;
1366 progressInfo.completed =
1367 raidPtr->parity_rewrite_stripes_done;
1368 progressInfo.remaining = progressInfo.total -
1369 progressInfo.completed;
1370 } else {
1371 progressInfo.remaining = 0;
1372 progressInfo.completed = 100;
1373 progressInfo.total = 100;
1374 }
1375 retcode = copyout((caddr_t) &progressInfo,
1376 (caddr_t) *progressInfoPtr,
1377 sizeof(RF_ProgressInfo_t));
1378 return (retcode);
1379
1380 case RAIDFRAME_CHECK_COPYBACK_STATUS:
1381 if (raidPtr->Layout.map->faultsTolerated == 0) {
1382 /* This makes no sense on a RAID 0 */
1383 *(int *) data = 100;
1384 return(0);
1385 }
1386 if (raidPtr->copyback_in_progress == 1) {
1387 *(int *) data = 100 * raidPtr->copyback_stripes_done /
1388 raidPtr->Layout.numStripe;
1389 } else {
1390 *(int *) data = 100;
1391 }
1392 return (0);
1393
1394 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
1395 progressInfoPtr = (RF_ProgressInfo_t **) data;
1396 if (raidPtr->copyback_in_progress == 1) {
1397 progressInfo.total = raidPtr->Layout.numStripe;
1398 progressInfo.completed =
1399 raidPtr->copyback_stripes_done;
1400 progressInfo.remaining = progressInfo.total -
1401 progressInfo.completed;
1402 } else {
1403 progressInfo.remaining = 0;
1404 progressInfo.completed = 100;
1405 progressInfo.total = 100;
1406 }
1407 retcode = copyout((caddr_t) &progressInfo,
1408 (caddr_t) *progressInfoPtr,
1409 sizeof(RF_ProgressInfo_t));
1410 return (retcode);
1411
1412 /* the sparetable daemon calls this to wait for the kernel to
1413 * need a spare table. this ioctl does not return until a
1414 * spare table is needed. XXX -- calling mpsleep here in the
1415 * ioctl code is almost certainly wrong and evil. -- XXX XXX
1416 * -- I should either compute the spare table in the kernel,
1417 * or have a different -- XXX XXX -- interface (a different
1418 * character device) for delivering the table -- XXX */
1419 #if 0
1420 case RAIDFRAME_SPARET_WAIT:
1421 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1422 while (!rf_sparet_wait_queue)
1423 mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE);
1424 waitreq = rf_sparet_wait_queue;
1425 rf_sparet_wait_queue = rf_sparet_wait_queue->next;
1426 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1427
1428 /* structure assignment */
1429 *((RF_SparetWait_t *) data) = *waitreq;
1430
1431 RF_Free(waitreq, sizeof(*waitreq));
1432 return (0);
1433
1434 /* wakes up a process waiting on SPARET_WAIT and puts an error
1435 * code in it that will cause the dameon to exit */
1436 case RAIDFRAME_ABORT_SPARET_WAIT:
1437 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1438 waitreq->fcol = -1;
1439 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1440 waitreq->next = rf_sparet_wait_queue;
1441 rf_sparet_wait_queue = waitreq;
1442 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1443 wakeup(&rf_sparet_wait_queue);
1444 return (0);
1445
1446 /* used by the spare table daemon to deliver a spare table
1447 * into the kernel */
1448 case RAIDFRAME_SEND_SPARET:
1449
1450 /* install the spare table */
1451 retcode = rf_SetSpareTable(raidPtr, *(void **) data);
1452
1453 /* respond to the requestor. the return status of the spare
1454 * table installation is passed in the "fcol" field */
1455 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1456 waitreq->fcol = retcode;
1457 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1458 waitreq->next = rf_sparet_resp_queue;
1459 rf_sparet_resp_queue = waitreq;
1460 wakeup(&rf_sparet_resp_queue);
1461 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1462
1463 return (retcode);
1464 #endif
1465
1466 default:
1467 break; /* fall through to the os-specific code below */
1468
1469 }
1470
1471 if (!raidPtr->valid)
1472 return (EINVAL);
1473
1474 /*
1475 * Add support for "regular" device ioctls here.
1476 */
1477
1478 switch (cmd) {
1479 case DIOCGDINFO:
1480 *(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
1481 break;
1482 #ifdef __HAVE_OLD_DISKLABEL
1483 case ODIOCGDINFO:
1484 newlabel = *(rs->sc_dkdev.dk_label);
1485 if (newlabel.d_npartitions > OLDMAXPARTITIONS)
1486 return ENOTTY;
1487 memcpy(data, &newlabel, sizeof (struct olddisklabel));
1488 break;
1489 #endif
1490
1491 case DIOCGPART:
1492 ((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
1493 ((struct partinfo *) data)->part =
1494 &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
1495 break;
1496
1497 case DIOCWDINFO:
1498 case DIOCSDINFO:
1499 #ifdef __HAVE_OLD_DISKLABEL
1500 case ODIOCWDINFO:
1501 case ODIOCSDINFO:
1502 #endif
1503 {
1504 struct disklabel *lp;
1505 #ifdef __HAVE_OLD_DISKLABEL
1506 if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) {
1507 memset(&newlabel, 0, sizeof newlabel);
1508 memcpy(&newlabel, data, sizeof (struct olddisklabel));
1509 lp = &newlabel;
1510 } else
1511 #endif
1512 lp = (struct disklabel *)data;
1513
1514 if ((error = raidlock(rs)) != 0)
1515 return (error);
1516
1517 rs->sc_flags |= RAIDF_LABELLING;
1518
1519 error = setdisklabel(rs->sc_dkdev.dk_label,
1520 lp, 0, rs->sc_dkdev.dk_cpulabel);
1521 if (error == 0) {
1522 if (cmd == DIOCWDINFO
1523 #ifdef __HAVE_OLD_DISKLABEL
1524 || cmd == ODIOCWDINFO
1525 #endif
1526 )
1527 error = writedisklabel(RAIDLABELDEV(dev),
1528 raidstrategy, rs->sc_dkdev.dk_label,
1529 rs->sc_dkdev.dk_cpulabel);
1530 }
1531 rs->sc_flags &= ~RAIDF_LABELLING;
1532
1533 raidunlock(rs);
1534
1535 if (error)
1536 return (error);
1537 break;
1538 }
1539
1540 case DIOCWLABEL:
1541 if (*(int *) data != 0)
1542 rs->sc_flags |= RAIDF_WLABEL;
1543 else
1544 rs->sc_flags &= ~RAIDF_WLABEL;
1545 break;
1546
1547 case DIOCGDEFLABEL:
1548 raidgetdefaultlabel(raidPtr, rs, (struct disklabel *) data);
1549 break;
1550
1551 #ifdef __HAVE_OLD_DISKLABEL
1552 case ODIOCGDEFLABEL:
1553 raidgetdefaultlabel(raidPtr, rs, &newlabel);
1554 if (newlabel.d_npartitions > OLDMAXPARTITIONS)
1555 return ENOTTY;
1556 memcpy(data, &newlabel, sizeof (struct olddisklabel));
1557 break;
1558 #endif
1559
1560 default:
1561 retcode = ENOTTY;
1562 }
1563 return (retcode);
1564
1565 }
1566
1567
1568 /* raidinit -- complete the rest of the initialization for the
1569 RAIDframe device. */
1570
1571
1572 static void
1573 raidinit(raidPtr)
1574 RF_Raid_t *raidPtr;
1575 {
1576 struct raid_softc *rs;
1577 int unit;
1578
1579 unit = raidPtr->raidid;
1580
1581 rs = &raid_softc[unit];
1582
1583 /* XXX should check return code first... */
1584 rs->sc_flags |= RAIDF_INITED;
1585
1586 sprintf(rs->sc_xname, "raid%d", unit); /* XXX doesn't check bounds. */
1587
1588 rs->sc_dkdev.dk_name = rs->sc_xname;
1589
1590 /* disk_attach actually creates space for the CPU disklabel, among
1591 * other things, so it's critical to call this *BEFORE* we try putzing
1592 * with disklabels. */
1593
1594 disk_attach(&rs->sc_dkdev);
1595
1596 /* XXX There may be a weird interaction here between this, and
1597 * protectedSectors, as used in RAIDframe. */
1598
1599 rs->sc_size = raidPtr->totalSectors;
1600
1601 }
1602
1603 /* wake up the daemon & tell it to get us a spare table
1604 * XXX
1605 * the entries in the queues should be tagged with the raidPtr
1606 * so that in the extremely rare case that two recons happen at once,
1607 * we know for which device were requesting a spare table
1608 * XXX
1609 *
1610 * XXX This code is not currently used. GO
1611 */
1612 int
1613 rf_GetSpareTableFromDaemon(req)
1614 RF_SparetWait_t *req;
1615 {
1616 int retcode;
1617
1618 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1619 req->next = rf_sparet_wait_queue;
1620 rf_sparet_wait_queue = req;
1621 wakeup(&rf_sparet_wait_queue);
1622
1623 /* mpsleep unlocks the mutex */
1624 while (!rf_sparet_resp_queue) {
1625 tsleep(&rf_sparet_resp_queue, PRIBIO,
1626 "raidframe getsparetable", 0);
1627 }
1628 req = rf_sparet_resp_queue;
1629 rf_sparet_resp_queue = req->next;
1630 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1631
1632 retcode = req->fcol;
1633 RF_Free(req, sizeof(*req)); /* this is not the same req as we
1634 * alloc'd */
1635 return (retcode);
1636 }
1637
1638 /* a wrapper around rf_DoAccess that extracts appropriate info from the
1639 * bp & passes it down.
1640 * any calls originating in the kernel must use non-blocking I/O
1641 * do some extra sanity checking to return "appropriate" error values for
1642 * certain conditions (to make some standard utilities work)
1643 *
1644 * Formerly known as: rf_DoAccessKernel
1645 */
1646 void
1647 raidstart(raidPtr)
1648 RF_Raid_t *raidPtr;
1649 {
1650 RF_SectorCount_t num_blocks, pb, sum;
1651 RF_RaidAddr_t raid_addr;
1652 int retcode;
1653 struct partition *pp;
1654 daddr_t blocknum;
1655 int unit;
1656 struct raid_softc *rs;
1657 int do_async;
1658 struct buf *bp;
1659
1660 unit = raidPtr->raidid;
1661 rs = &raid_softc[unit];
1662
1663 /* quick check to see if anything has died recently */
1664 RF_LOCK_MUTEX(raidPtr->mutex);
1665 if (raidPtr->numNewFailures > 0) {
1666 rf_update_component_labels(raidPtr,
1667 RF_NORMAL_COMPONENT_UPDATE);
1668 raidPtr->numNewFailures--;
1669 }
1670 RF_UNLOCK_MUTEX(raidPtr->mutex);
1671
1672 /* Check to see if we're at the limit... */
1673 RF_LOCK_MUTEX(raidPtr->mutex);
1674 while (raidPtr->openings > 0) {
1675 RF_UNLOCK_MUTEX(raidPtr->mutex);
1676
1677 /* get the next item, if any, from the queue */
1678 if ((bp = BUFQ_FIRST(&rs->buf_queue)) == NULL) {
1679 /* nothing more to do */
1680 return;
1681 }
1682 BUFQ_REMOVE(&rs->buf_queue, bp);
1683
1684 /* Ok, for the bp we have here, bp->b_blkno is relative to the
1685 * partition.. Need to make it absolute to the underlying
1686 * device.. */
1687
1688 blocknum = bp->b_blkno;
1689 if (DISKPART(bp->b_dev) != RAW_PART) {
1690 pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
1691 blocknum += pp->p_offset;
1692 }
1693
1694 db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
1695 (int) blocknum));
1696
1697 db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
1698 db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
1699
1700 /* *THIS* is where we adjust what block we're going to...
1701 * but DO NOT TOUCH bp->b_blkno!!! */
1702 raid_addr = blocknum;
1703
1704 num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
1705 pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
1706 sum = raid_addr + num_blocks + pb;
1707 if (1 || rf_debugKernelAccess) {
1708 db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
1709 (int) raid_addr, (int) sum, (int) num_blocks,
1710 (int) pb, (int) bp->b_resid));
1711 }
1712 if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
1713 || (sum < num_blocks) || (sum < pb)) {
1714 bp->b_error = ENOSPC;
1715 bp->b_flags |= B_ERROR;
1716 bp->b_resid = bp->b_bcount;
1717 biodone(bp);
1718 RF_LOCK_MUTEX(raidPtr->mutex);
1719 continue;
1720 }
1721 /*
1722 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
1723 */
1724
1725 if (bp->b_bcount & raidPtr->sectorMask) {
1726 bp->b_error = EINVAL;
1727 bp->b_flags |= B_ERROR;
1728 bp->b_resid = bp->b_bcount;
1729 biodone(bp);
1730 RF_LOCK_MUTEX(raidPtr->mutex);
1731 continue;
1732
1733 }
1734 db1_printf(("Calling DoAccess..\n"));
1735
1736
1737 RF_LOCK_MUTEX(raidPtr->mutex);
1738 raidPtr->openings--;
1739 RF_UNLOCK_MUTEX(raidPtr->mutex);
1740
1741 /*
1742 * Everything is async.
1743 */
1744 do_async = 1;
1745
1746 disk_busy(&rs->sc_dkdev);
1747
1748 /* XXX we're still at splbio() here... do we *really*
1749 need to be? */
1750
1751 /* don't ever condition on bp->b_flags & B_WRITE.
1752 * always condition on B_READ instead */
1753
1754 retcode = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
1755 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
1756 do_async, raid_addr, num_blocks,
1757 bp->b_data, bp, RF_DAG_NONBLOCKING_IO);
1758
1759 RF_LOCK_MUTEX(raidPtr->mutex);
1760 }
1761 RF_UNLOCK_MUTEX(raidPtr->mutex);
1762 }
1763
1764
1765
1766
1767 /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */
1768
1769 int
1770 rf_DispatchKernelIO(queue, req)
1771 RF_DiskQueue_t *queue;
1772 RF_DiskQueueData_t *req;
1773 {
1774 int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
1775 struct buf *bp;
1776 struct raidbuf *raidbp = NULL;
1777 struct raid_softc *rs;
1778 int unit;
1779 int s;
1780
1781 s=0;
1782 /* s = splbio();*/ /* want to test this */
1783 /* XXX along with the vnode, we also need the softc associated with
1784 * this device.. */
1785
1786 req->queue = queue;
1787
1788 unit = queue->raidPtr->raidid;
1789
1790 db1_printf(("DispatchKernelIO unit: %d\n", unit));
1791
1792 if (unit >= numraid) {
1793 printf("Invalid unit number: %d %d\n", unit, numraid);
1794 panic("Invalid Unit number in rf_DispatchKernelIO\n");
1795 }
1796 rs = &raid_softc[unit];
1797
1798 bp = req->bp;
1799 #if 1
1800 /* XXX when there is a physical disk failure, someone is passing us a
1801 * buffer that contains old stuff!! Attempt to deal with this problem
1802 * without taking a performance hit... (not sure where the real bug
1803 * is. It's buried in RAIDframe somewhere) :-( GO ) */
1804
1805 if (bp->b_flags & B_ERROR) {
1806 bp->b_flags &= ~B_ERROR;
1807 }
1808 if (bp->b_error != 0) {
1809 bp->b_error = 0;
1810 }
1811 #endif
1812 raidbp = RAIDGETBUF(rs);
1813
1814 raidbp->rf_flags = 0; /* XXX not really used anywhere... */
1815
1816 /*
1817 * context for raidiodone
1818 */
1819 raidbp->rf_obp = bp;
1820 raidbp->req = req;
1821
1822 LIST_INIT(&raidbp->rf_buf.b_dep);
1823
1824 switch (req->type) {
1825 case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */
1826 /* XXX need to do something extra here.. */
1827 /* I'm leaving this in, as I've never actually seen it used,
1828 * and I'd like folks to report it... GO */
1829 printf(("WAKEUP CALLED\n"));
1830 queue->numOutstanding++;
1831
1832 /* XXX need to glue the original buffer into this?? */
1833
1834 KernelWakeupFunc(&raidbp->rf_buf);
1835 break;
1836
1837 case RF_IO_TYPE_READ:
1838 case RF_IO_TYPE_WRITE:
1839
1840 if (req->tracerec) {
1841 RF_ETIMER_START(req->tracerec->timer);
1842 }
1843 InitBP(&raidbp->rf_buf, queue->rf_cinfo->ci_vp,
1844 op | bp->b_flags, queue->rf_cinfo->ci_dev,
1845 req->sectorOffset, req->numSector,
1846 req->buf, KernelWakeupFunc, (void *) req,
1847 queue->raidPtr->logBytesPerSector, req->b_proc);
1848
1849 if (rf_debugKernelAccess) {
1850 db1_printf(("dispatch: bp->b_blkno = %ld\n",
1851 (long) bp->b_blkno));
1852 }
1853 queue->numOutstanding++;
1854 queue->last_deq_sector = req->sectorOffset;
1855 /* acc wouldn't have been let in if there were any pending
1856 * reqs at any other priority */
1857 queue->curPriority = req->priority;
1858
1859 db1_printf(("Going for %c to unit %d row %d col %d\n",
1860 req->type, unit, queue->row, queue->col));
1861 db1_printf(("sector %d count %d (%d bytes) %d\n",
1862 (int) req->sectorOffset, (int) req->numSector,
1863 (int) (req->numSector <<
1864 queue->raidPtr->logBytesPerSector),
1865 (int) queue->raidPtr->logBytesPerSector));
1866 if ((raidbp->rf_buf.b_flags & B_READ) == 0) {
1867 raidbp->rf_buf.b_vp->v_numoutput++;
1868 }
1869 VOP_STRATEGY(&raidbp->rf_buf);
1870
1871 break;
1872
1873 default:
1874 panic("bad req->type in rf_DispatchKernelIO");
1875 }
1876 db1_printf(("Exiting from DispatchKernelIO\n"));
1877 /* splx(s); */ /* want to test this */
1878 return (0);
1879 }
1880 /* this is the callback function associated with a I/O invoked from
1881 kernel code.
1882 */
1883 static void
1884 KernelWakeupFunc(vbp)
1885 struct buf *vbp;
1886 {
1887 RF_DiskQueueData_t *req = NULL;
1888 RF_DiskQueue_t *queue;
1889 struct raidbuf *raidbp = (struct raidbuf *) vbp;
1890 struct buf *bp;
1891 struct raid_softc *rs;
1892 int unit;
1893 int s;
1894
1895 s = splbio();
1896 db1_printf(("recovering the request queue:\n"));
1897 req = raidbp->req;
1898
1899 bp = raidbp->rf_obp;
1900
1901 queue = (RF_DiskQueue_t *) req->queue;
1902
1903 if (raidbp->rf_buf.b_flags & B_ERROR) {
1904 bp->b_flags |= B_ERROR;
1905 bp->b_error = raidbp->rf_buf.b_error ?
1906 raidbp->rf_buf.b_error : EIO;
1907 }
1908
1909 /* XXX methinks this could be wrong... */
1910 #if 1
1911 bp->b_resid = raidbp->rf_buf.b_resid;
1912 #endif
1913
1914 if (req->tracerec) {
1915 RF_ETIMER_STOP(req->tracerec->timer);
1916 RF_ETIMER_EVAL(req->tracerec->timer);
1917 RF_LOCK_MUTEX(rf_tracing_mutex);
1918 req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1919 req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1920 req->tracerec->num_phys_ios++;
1921 RF_UNLOCK_MUTEX(rf_tracing_mutex);
1922 }
1923 bp->b_bcount = raidbp->rf_buf.b_bcount; /* XXXX ?? */
1924
1925 unit = queue->raidPtr->raidid; /* *Much* simpler :-> */
1926
1927
1928 /* XXX Ok, let's get aggressive... If B_ERROR is set, let's go
1929 * ballistic, and mark the component as hosed... */
1930
1931 if (bp->b_flags & B_ERROR) {
1932 /* Mark the disk as dead */
1933 /* but only mark it once... */
1934 if (queue->raidPtr->Disks[queue->row][queue->col].status ==
1935 rf_ds_optimal) {
1936 printf("raid%d: IO Error. Marking %s as failed.\n",
1937 unit, queue->raidPtr->Disks[queue->row][queue->col].devname);
1938 queue->raidPtr->Disks[queue->row][queue->col].status =
1939 rf_ds_failed;
1940 queue->raidPtr->status[queue->row] = rf_rs_degraded;
1941 queue->raidPtr->numFailures++;
1942 queue->raidPtr->numNewFailures++;
1943 } else { /* Disk is already dead... */
1944 /* printf("Disk already marked as dead!\n"); */
1945 }
1946
1947 }
1948
1949 rs = &raid_softc[unit];
1950 RAIDPUTBUF(rs, raidbp);
1951
1952 rf_DiskIOComplete(queue, req, (bp->b_flags & B_ERROR) ? 1 : 0);
1953 (req->CompleteFunc) (req->argument, (bp->b_flags & B_ERROR) ? 1 : 0);
1954
1955 splx(s);
1956 }
1957
1958
1959
1960 /*
1961 * initialize a buf structure for doing an I/O in the kernel.
1962 */
1963 static void
1964 InitBP(bp, b_vp, rw_flag, dev, startSect, numSect, buf, cbFunc, cbArg,
1965 logBytesPerSector, b_proc)
1966 struct buf *bp;
1967 struct vnode *b_vp;
1968 unsigned rw_flag;
1969 dev_t dev;
1970 RF_SectorNum_t startSect;
1971 RF_SectorCount_t numSect;
1972 caddr_t buf;
1973 void (*cbFunc) (struct buf *);
1974 void *cbArg;
1975 int logBytesPerSector;
1976 struct proc *b_proc;
1977 {
1978 /* bp->b_flags = B_PHYS | rw_flag; */
1979 bp->b_flags = B_CALL | rw_flag; /* XXX need B_PHYS here too??? */
1980 bp->b_bcount = numSect << logBytesPerSector;
1981 bp->b_bufsize = bp->b_bcount;
1982 bp->b_error = 0;
1983 bp->b_dev = dev;
1984 bp->b_data = buf;
1985 bp->b_blkno = startSect;
1986 bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */
1987 if (bp->b_bcount == 0) {
1988 panic("bp->b_bcount is zero in InitBP!!\n");
1989 }
1990 bp->b_proc = b_proc;
1991 bp->b_iodone = cbFunc;
1992 bp->b_vp = b_vp;
1993
1994 }
1995
1996 static void
1997 raidgetdefaultlabel(raidPtr, rs, lp)
1998 RF_Raid_t *raidPtr;
1999 struct raid_softc *rs;
2000 struct disklabel *lp;
2001 {
2002 db1_printf(("Building a default label...\n"));
2003 memset(lp, 0, sizeof(*lp));
2004
2005 /* fabricate a label... */
2006 lp->d_secperunit = raidPtr->totalSectors;
2007 lp->d_secsize = raidPtr->bytesPerSector;
2008 lp->d_nsectors = raidPtr->Layout.dataSectorsPerStripe;
2009 lp->d_ntracks = 4 * raidPtr->numCol;
2010 lp->d_ncylinders = raidPtr->totalSectors /
2011 (lp->d_nsectors * lp->d_ntracks);
2012 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
2013
2014 strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
2015 lp->d_type = DTYPE_RAID;
2016 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
2017 lp->d_rpm = 3600;
2018 lp->d_interleave = 1;
2019 lp->d_flags = 0;
2020
2021 lp->d_partitions[RAW_PART].p_offset = 0;
2022 lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors;
2023 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
2024 lp->d_npartitions = RAW_PART + 1;
2025
2026 lp->d_magic = DISKMAGIC;
2027 lp->d_magic2 = DISKMAGIC;
2028 lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
2029
2030 }
2031 /*
2032 * Read the disklabel from the raid device. If one is not present, fake one
2033 * up.
2034 */
2035 static void
2036 raidgetdisklabel(dev)
2037 dev_t dev;
2038 {
2039 int unit = raidunit(dev);
2040 struct raid_softc *rs = &raid_softc[unit];
2041 char *errstring;
2042 struct disklabel *lp = rs->sc_dkdev.dk_label;
2043 struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel;
2044 RF_Raid_t *raidPtr;
2045
2046 db1_printf(("Getting the disklabel...\n"));
2047
2048 memset(clp, 0, sizeof(*clp));
2049
2050 raidPtr = raidPtrs[unit];
2051
2052 raidgetdefaultlabel(raidPtr, rs, lp);
2053
2054 /*
2055 * Call the generic disklabel extraction routine.
2056 */
2057 errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
2058 rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
2059 if (errstring)
2060 raidmakedisklabel(rs);
2061 else {
2062 int i;
2063 struct partition *pp;
2064
2065 /*
2066 * Sanity check whether the found disklabel is valid.
2067 *
2068 * This is necessary since total size of the raid device
2069 * may vary when an interleave is changed even though exactly
2070 * same componets are used, and old disklabel may used
2071 * if that is found.
2072 */
2073 if (lp->d_secperunit != rs->sc_size)
2074 printf("WARNING: %s: "
2075 "total sector size in disklabel (%d) != "
2076 "the size of raid (%ld)\n", rs->sc_xname,
2077 lp->d_secperunit, (long) rs->sc_size);
2078 for (i = 0; i < lp->d_npartitions; i++) {
2079 pp = &lp->d_partitions[i];
2080 if (pp->p_offset + pp->p_size > rs->sc_size)
2081 printf("WARNING: %s: end of partition `%c' "
2082 "exceeds the size of raid (%ld)\n",
2083 rs->sc_xname, 'a' + i, (long) rs->sc_size);
2084 }
2085 }
2086
2087 }
2088 /*
2089 * Take care of things one might want to take care of in the event
2090 * that a disklabel isn't present.
2091 */
2092 static void
2093 raidmakedisklabel(rs)
2094 struct raid_softc *rs;
2095 {
2096 struct disklabel *lp = rs->sc_dkdev.dk_label;
2097 db1_printf(("Making a label..\n"));
2098
2099 /*
2100 * For historical reasons, if there's no disklabel present
2101 * the raw partition must be marked FS_BSDFFS.
2102 */
2103
2104 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
2105
2106 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
2107
2108 lp->d_checksum = dkcksum(lp);
2109 }
2110 /*
2111 * Lookup the provided name in the filesystem. If the file exists,
2112 * is a valid block device, and isn't being used by anyone else,
2113 * set *vpp to the file's vnode.
2114 * You'll find the original of this in ccd.c
2115 */
2116 int
2117 raidlookup(path, p, vpp)
2118 char *path;
2119 struct proc *p;
2120 struct vnode **vpp; /* result */
2121 {
2122 struct nameidata nd;
2123 struct vnode *vp;
2124 struct vattr va;
2125 int error;
2126
2127 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, path, p);
2128 if ((error = vn_open(&nd, FREAD | FWRITE, 0)) != 0) {
2129 #ifdef DEBUG
2130 printf("RAIDframe: vn_open returned %d\n", error);
2131 #endif
2132 return (error);
2133 }
2134 vp = nd.ni_vp;
2135 if (vp->v_usecount > 1) {
2136 VOP_UNLOCK(vp, 0);
2137 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2138 return (EBUSY);
2139 }
2140 if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)) != 0) {
2141 VOP_UNLOCK(vp, 0);
2142 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2143 return (error);
2144 }
2145 /* XXX: eventually we should handle VREG, too. */
2146 if (va.va_type != VBLK) {
2147 VOP_UNLOCK(vp, 0);
2148 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2149 return (ENOTBLK);
2150 }
2151 VOP_UNLOCK(vp, 0);
2152 *vpp = vp;
2153 return (0);
2154 }
2155 /*
2156 * Wait interruptibly for an exclusive lock.
2157 *
2158 * XXX
2159 * Several drivers do this; it should be abstracted and made MP-safe.
2160 * (Hmm... where have we seen this warning before :-> GO )
2161 */
2162 static int
2163 raidlock(rs)
2164 struct raid_softc *rs;
2165 {
2166 int error;
2167
2168 while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
2169 rs->sc_flags |= RAIDF_WANTED;
2170 if ((error =
2171 tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
2172 return (error);
2173 }
2174 rs->sc_flags |= RAIDF_LOCKED;
2175 return (0);
2176 }
2177 /*
2178 * Unlock and wake up any waiters.
2179 */
2180 static void
2181 raidunlock(rs)
2182 struct raid_softc *rs;
2183 {
2184
2185 rs->sc_flags &= ~RAIDF_LOCKED;
2186 if ((rs->sc_flags & RAIDF_WANTED) != 0) {
2187 rs->sc_flags &= ~RAIDF_WANTED;
2188 wakeup(rs);
2189 }
2190 }
2191
2192
2193 #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */
2194 #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */
2195
2196 int
2197 raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter)
2198 {
2199 RF_ComponentLabel_t clabel;
2200 raidread_component_label(dev, b_vp, &clabel);
2201 clabel.mod_counter = mod_counter;
2202 clabel.clean = RF_RAID_CLEAN;
2203 raidwrite_component_label(dev, b_vp, &clabel);
2204 return(0);
2205 }
2206
2207
2208 int
2209 raidmarkdirty(dev_t dev, struct vnode *b_vp, int mod_counter)
2210 {
2211 RF_ComponentLabel_t clabel;
2212 raidread_component_label(dev, b_vp, &clabel);
2213 clabel.mod_counter = mod_counter;
2214 clabel.clean = RF_RAID_DIRTY;
2215 raidwrite_component_label(dev, b_vp, &clabel);
2216 return(0);
2217 }
2218
2219 /* ARGSUSED */
2220 int
2221 raidread_component_label(dev, b_vp, clabel)
2222 dev_t dev;
2223 struct vnode *b_vp;
2224 RF_ComponentLabel_t *clabel;
2225 {
2226 struct buf *bp;
2227 const struct bdevsw *bdev;
2228 int error;
2229
2230 /* XXX should probably ensure that we don't try to do this if
2231 someone has changed rf_protected_sectors. */
2232
2233 if (b_vp == NULL) {
2234 /* For whatever reason, this component is not valid.
2235 Don't try to read a component label from it. */
2236 return(EINVAL);
2237 }
2238
2239 /* get a block of the appropriate size... */
2240 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2241 bp->b_dev = dev;
2242
2243 /* get our ducks in a row for the read */
2244 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2245 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2246 bp->b_flags |= B_READ;
2247 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2248
2249 bdev = bdevsw_lookup(bp->b_dev);
2250 if (bdev == NULL)
2251 return (ENXIO);
2252 (*bdev->d_strategy)(bp);
2253
2254 error = biowait(bp);
2255
2256 if (!error) {
2257 memcpy(clabel, bp->b_data,
2258 sizeof(RF_ComponentLabel_t));
2259 #if 0
2260 rf_print_component_label( clabel );
2261 #endif
2262 } else {
2263 #if 0
2264 printf("Failed to read RAID component label!\n");
2265 #endif
2266 }
2267
2268 brelse(bp);
2269 return(error);
2270 }
2271 /* ARGSUSED */
2272 int
2273 raidwrite_component_label(dev, b_vp, clabel)
2274 dev_t dev;
2275 struct vnode *b_vp;
2276 RF_ComponentLabel_t *clabel;
2277 {
2278 struct buf *bp;
2279 const struct bdevsw *bdev;
2280 int error;
2281
2282 /* get a block of the appropriate size... */
2283 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2284 bp->b_dev = dev;
2285
2286 /* get our ducks in a row for the write */
2287 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2288 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2289 bp->b_flags |= B_WRITE;
2290 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2291
2292 memset(bp->b_data, 0, RF_COMPONENT_INFO_SIZE );
2293
2294 memcpy(bp->b_data, clabel, sizeof(RF_ComponentLabel_t));
2295
2296 bdev = bdevsw_lookup(bp->b_dev);
2297 if (bdev == NULL)
2298 return (ENXIO);
2299 (*bdev->d_strategy)(bp);
2300 error = biowait(bp);
2301 brelse(bp);
2302 if (error) {
2303 #if 1
2304 printf("Failed to write RAID component info!\n");
2305 #endif
2306 }
2307
2308 return(error);
2309 }
2310
2311 void
2312 rf_markalldirty(raidPtr)
2313 RF_Raid_t *raidPtr;
2314 {
2315 RF_ComponentLabel_t clabel;
2316 int r,c;
2317
2318 raidPtr->mod_counter++;
2319 for (r = 0; r < raidPtr->numRow; r++) {
2320 for (c = 0; c < raidPtr->numCol; c++) {
2321 /* we don't want to touch (at all) a disk that has
2322 failed */
2323 if (!RF_DEAD_DISK(raidPtr->Disks[r][c].status)) {
2324 raidread_component_label(
2325 raidPtr->Disks[r][c].dev,
2326 raidPtr->raid_cinfo[r][c].ci_vp,
2327 &clabel);
2328 if (clabel.status == rf_ds_spared) {
2329 /* XXX do something special...
2330 but whatever you do, don't
2331 try to access it!! */
2332 } else {
2333 #if 0
2334 clabel.status =
2335 raidPtr->Disks[r][c].status;
2336 raidwrite_component_label(
2337 raidPtr->Disks[r][c].dev,
2338 raidPtr->raid_cinfo[r][c].ci_vp,
2339 &clabel);
2340 #endif
2341 raidmarkdirty(
2342 raidPtr->Disks[r][c].dev,
2343 raidPtr->raid_cinfo[r][c].ci_vp,
2344 raidPtr->mod_counter);
2345 }
2346 }
2347 }
2348 }
2349 /* printf("Component labels marked dirty.\n"); */
2350 #if 0
2351 for( c = 0; c < raidPtr->numSpare ; c++) {
2352 sparecol = raidPtr->numCol + c;
2353 if (raidPtr->Disks[r][sparecol].status == rf_ds_used_spare) {
2354 /*
2355
2356 XXX this is where we get fancy and map this spare
2357 into it's correct spot in the array.
2358
2359 */
2360 /*
2361
2362 we claim this disk is "optimal" if it's
2363 rf_ds_used_spare, as that means it should be
2364 directly substitutable for the disk it replaced.
2365 We note that too...
2366
2367 */
2368
2369 for(i=0;i<raidPtr->numRow;i++) {
2370 for(j=0;j<raidPtr->numCol;j++) {
2371 if ((raidPtr->Disks[i][j].spareRow ==
2372 r) &&
2373 (raidPtr->Disks[i][j].spareCol ==
2374 sparecol)) {
2375 srow = r;
2376 scol = sparecol;
2377 break;
2378 }
2379 }
2380 }
2381
2382 raidread_component_label(
2383 raidPtr->Disks[r][sparecol].dev,
2384 raidPtr->raid_cinfo[r][sparecol].ci_vp,
2385 &clabel);
2386 /* make sure status is noted */
2387 clabel.version = RF_COMPONENT_LABEL_VERSION;
2388 clabel.mod_counter = raidPtr->mod_counter;
2389 clabel.serial_number = raidPtr->serial_number;
2390 clabel.row = srow;
2391 clabel.column = scol;
2392 clabel.num_rows = raidPtr->numRow;
2393 clabel.num_columns = raidPtr->numCol;
2394 clabel.clean = RF_RAID_DIRTY; /* changed in a bit*/
2395 clabel.status = rf_ds_optimal;
2396 raidwrite_component_label(
2397 raidPtr->Disks[r][sparecol].dev,
2398 raidPtr->raid_cinfo[r][sparecol].ci_vp,
2399 &clabel);
2400 raidmarkclean( raidPtr->Disks[r][sparecol].dev,
2401 raidPtr->raid_cinfo[r][sparecol].ci_vp);
2402 }
2403 }
2404
2405 #endif
2406 }
2407
2408
2409 void
2410 rf_update_component_labels(raidPtr, final)
2411 RF_Raid_t *raidPtr;
2412 int final;
2413 {
2414 RF_ComponentLabel_t clabel;
2415 int sparecol;
2416 int r,c;
2417 int i,j;
2418 int srow, scol;
2419
2420 srow = -1;
2421 scol = -1;
2422
2423 /* XXX should do extra checks to make sure things really are clean,
2424 rather than blindly setting the clean bit... */
2425
2426 raidPtr->mod_counter++;
2427
2428 for (r = 0; r < raidPtr->numRow; r++) {
2429 for (c = 0; c < raidPtr->numCol; c++) {
2430 if (raidPtr->Disks[r][c].status == rf_ds_optimal) {
2431 raidread_component_label(
2432 raidPtr->Disks[r][c].dev,
2433 raidPtr->raid_cinfo[r][c].ci_vp,
2434 &clabel);
2435 /* make sure status is noted */
2436 clabel.status = rf_ds_optimal;
2437 /* bump the counter */
2438 clabel.mod_counter = raidPtr->mod_counter;
2439
2440 raidwrite_component_label(
2441 raidPtr->Disks[r][c].dev,
2442 raidPtr->raid_cinfo[r][c].ci_vp,
2443 &clabel);
2444 if (final == RF_FINAL_COMPONENT_UPDATE) {
2445 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2446 raidmarkclean(
2447 raidPtr->Disks[r][c].dev,
2448 raidPtr->raid_cinfo[r][c].ci_vp,
2449 raidPtr->mod_counter);
2450 }
2451 }
2452 }
2453 /* else we don't touch it.. */
2454 }
2455 }
2456
2457 for( c = 0; c < raidPtr->numSpare ; c++) {
2458 sparecol = raidPtr->numCol + c;
2459 /* Need to ensure that the reconstruct actually completed! */
2460 if (raidPtr->Disks[0][sparecol].status == rf_ds_used_spare) {
2461 /*
2462
2463 we claim this disk is "optimal" if it's
2464 rf_ds_used_spare, as that means it should be
2465 directly substitutable for the disk it replaced.
2466 We note that too...
2467
2468 */
2469
2470 for(i=0;i<raidPtr->numRow;i++) {
2471 for(j=0;j<raidPtr->numCol;j++) {
2472 if ((raidPtr->Disks[i][j].spareRow ==
2473 0) &&
2474 (raidPtr->Disks[i][j].spareCol ==
2475 sparecol)) {
2476 srow = i;
2477 scol = j;
2478 break;
2479 }
2480 }
2481 }
2482
2483 /* XXX shouldn't *really* need this... */
2484 raidread_component_label(
2485 raidPtr->Disks[0][sparecol].dev,
2486 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2487 &clabel);
2488 /* make sure status is noted */
2489
2490 raid_init_component_label(raidPtr, &clabel);
2491
2492 clabel.mod_counter = raidPtr->mod_counter;
2493 clabel.row = srow;
2494 clabel.column = scol;
2495 clabel.status = rf_ds_optimal;
2496
2497 raidwrite_component_label(
2498 raidPtr->Disks[0][sparecol].dev,
2499 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2500 &clabel);
2501 if (final == RF_FINAL_COMPONENT_UPDATE) {
2502 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2503 raidmarkclean( raidPtr->Disks[0][sparecol].dev,
2504 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2505 raidPtr->mod_counter);
2506 }
2507 }
2508 }
2509 }
2510 /* printf("Component labels updated\n"); */
2511 }
2512
2513 void
2514 rf_close_component(raidPtr, vp, auto_configured)
2515 RF_Raid_t *raidPtr;
2516 struct vnode *vp;
2517 int auto_configured;
2518 {
2519 struct proc *p;
2520
2521 p = raidPtr->engine_thread;
2522
2523 if (vp != NULL) {
2524 if (auto_configured == 1) {
2525 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2526 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
2527 vput(vp);
2528
2529 } else {
2530 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2531 }
2532 } else {
2533 printf("vnode was NULL\n");
2534 }
2535 }
2536
2537
2538 void
2539 rf_UnconfigureVnodes(raidPtr)
2540 RF_Raid_t *raidPtr;
2541 {
2542 int r,c;
2543 struct proc *p;
2544 struct vnode *vp;
2545 int acd;
2546
2547
2548 /* We take this opportunity to close the vnodes like we should.. */
2549
2550 p = raidPtr->engine_thread;
2551
2552 for (r = 0; r < raidPtr->numRow; r++) {
2553 for (c = 0; c < raidPtr->numCol; c++) {
2554 printf("Closing vnode for row: %d col: %d\n", r, c);
2555 vp = raidPtr->raid_cinfo[r][c].ci_vp;
2556 acd = raidPtr->Disks[r][c].auto_configured;
2557 rf_close_component(raidPtr, vp, acd);
2558 raidPtr->raid_cinfo[r][c].ci_vp = NULL;
2559 raidPtr->Disks[r][c].auto_configured = 0;
2560 }
2561 }
2562 for (r = 0; r < raidPtr->numSpare; r++) {
2563 printf("Closing vnode for spare: %d\n", r);
2564 vp = raidPtr->raid_cinfo[0][raidPtr->numCol + r].ci_vp;
2565 acd = raidPtr->Disks[0][raidPtr->numCol + r].auto_configured;
2566 rf_close_component(raidPtr, vp, acd);
2567 raidPtr->raid_cinfo[0][raidPtr->numCol + r].ci_vp = NULL;
2568 raidPtr->Disks[0][raidPtr->numCol + r].auto_configured = 0;
2569 }
2570 }
2571
2572
2573 void
2574 rf_ReconThread(req)
2575 struct rf_recon_req *req;
2576 {
2577 int s;
2578 RF_Raid_t *raidPtr;
2579
2580 s = splbio();
2581 raidPtr = (RF_Raid_t *) req->raidPtr;
2582 raidPtr->recon_in_progress = 1;
2583
2584 rf_FailDisk((RF_Raid_t *) req->raidPtr, req->row, req->col,
2585 ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
2586
2587 /* XXX get rid of this! we don't need it at all.. */
2588 RF_Free(req, sizeof(*req));
2589
2590 raidPtr->recon_in_progress = 0;
2591 splx(s);
2592
2593 /* That's all... */
2594 kthread_exit(0); /* does not return */
2595 }
2596
2597 void
2598 rf_RewriteParityThread(raidPtr)
2599 RF_Raid_t *raidPtr;
2600 {
2601 int retcode;
2602 int s;
2603
2604 raidPtr->parity_rewrite_in_progress = 1;
2605 s = splbio();
2606 retcode = rf_RewriteParity(raidPtr);
2607 splx(s);
2608 if (retcode) {
2609 printf("raid%d: Error re-writing parity!\n",raidPtr->raidid);
2610 } else {
2611 /* set the clean bit! If we shutdown correctly,
2612 the clean bit on each component label will get
2613 set */
2614 raidPtr->parity_good = RF_RAID_CLEAN;
2615 }
2616 raidPtr->parity_rewrite_in_progress = 0;
2617
2618 /* Anyone waiting for us to stop? If so, inform them... */
2619 if (raidPtr->waitShutdown) {
2620 wakeup(&raidPtr->parity_rewrite_in_progress);
2621 }
2622
2623 /* That's all... */
2624 kthread_exit(0); /* does not return */
2625 }
2626
2627
2628 void
2629 rf_CopybackThread(raidPtr)
2630 RF_Raid_t *raidPtr;
2631 {
2632 int s;
2633
2634 raidPtr->copyback_in_progress = 1;
2635 s = splbio();
2636 rf_CopybackReconstructedData(raidPtr);
2637 splx(s);
2638 raidPtr->copyback_in_progress = 0;
2639
2640 /* That's all... */
2641 kthread_exit(0); /* does not return */
2642 }
2643
2644
2645 void
2646 rf_ReconstructInPlaceThread(req)
2647 struct rf_recon_req *req;
2648 {
2649 int retcode;
2650 int s;
2651 RF_Raid_t *raidPtr;
2652
2653 s = splbio();
2654 raidPtr = req->raidPtr;
2655 raidPtr->recon_in_progress = 1;
2656 retcode = rf_ReconstructInPlace(raidPtr, req->row, req->col);
2657 RF_Free(req, sizeof(*req));
2658 raidPtr->recon_in_progress = 0;
2659 splx(s);
2660
2661 /* That's all... */
2662 kthread_exit(0); /* does not return */
2663 }
2664
2665 void
2666 rf_mountroot_hook(dev)
2667 struct device *dev;
2668 {
2669
2670 }
2671
2672
2673 RF_AutoConfig_t *
2674 rf_find_raid_components()
2675 {
2676 struct vnode *vp;
2677 struct disklabel label;
2678 struct device *dv;
2679 dev_t dev;
2680 int bmajor;
2681 int error;
2682 int i;
2683 int good_one;
2684 RF_ComponentLabel_t *clabel;
2685 RF_AutoConfig_t *ac_list;
2686 RF_AutoConfig_t *ac;
2687
2688
2689 /* initialize the AutoConfig list */
2690 ac_list = NULL;
2691
2692 /* we begin by trolling through *all* the devices on the system */
2693
2694 for (dv = alldevs.tqh_first; dv != NULL;
2695 dv = dv->dv_list.tqe_next) {
2696
2697 /* we are only interested in disks... */
2698 if (dv->dv_class != DV_DISK)
2699 continue;
2700
2701 /* we don't care about floppies... */
2702 if (!strcmp(dv->dv_cfdata->cf_driver->cd_name,"fd")) {
2703 continue;
2704 }
2705 /* hdfd is the Atari/Hades floppy driver */
2706 if (!strcmp(dv->dv_cfdata->cf_driver->cd_name,"hdfd")) {
2707 continue;
2708 }
2709
2710 /* need to find the device_name_to_block_device_major stuff */
2711 bmajor = devsw_name2blk(dv->dv_xname, NULL, 0);
2712
2713 /* get a vnode for the raw partition of this disk */
2714
2715 dev = MAKEDISKDEV(bmajor, dv->dv_unit, RAW_PART);
2716 if (bdevvp(dev, &vp))
2717 panic("RAID can't alloc vnode");
2718
2719 error = VOP_OPEN(vp, FREAD, NOCRED, 0);
2720
2721 if (error) {
2722 /* "Who cares." Continue looking
2723 for something that exists*/
2724 vput(vp);
2725 continue;
2726 }
2727
2728 /* Ok, the disk exists. Go get the disklabel. */
2729 error = VOP_IOCTL(vp, DIOCGDINFO, (caddr_t)&label,
2730 FREAD, NOCRED, 0);
2731 if (error) {
2732 /*
2733 * XXX can't happen - open() would
2734 * have errored out (or faked up one)
2735 */
2736 printf("can't get label for dev %s%c (%d)!?!?\n",
2737 dv->dv_xname, 'a' + RAW_PART, error);
2738 }
2739
2740 /* don't need this any more. We'll allocate it again
2741 a little later if we really do... */
2742 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2743 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
2744 vput(vp);
2745
2746 for (i=0; i < label.d_npartitions; i++) {
2747 /* We only support partitions marked as RAID */
2748 if (label.d_partitions[i].p_fstype != FS_RAID)
2749 continue;
2750
2751 dev = MAKEDISKDEV(bmajor, dv->dv_unit, i);
2752 if (bdevvp(dev, &vp))
2753 panic("RAID can't alloc vnode");
2754
2755 error = VOP_OPEN(vp, FREAD, NOCRED, 0);
2756 if (error) {
2757 /* Whatever... */
2758 vput(vp);
2759 continue;
2760 }
2761
2762 good_one = 0;
2763
2764 clabel = (RF_ComponentLabel_t *)
2765 malloc(sizeof(RF_ComponentLabel_t),
2766 M_RAIDFRAME, M_NOWAIT);
2767 if (clabel == NULL) {
2768 /* XXX CLEANUP HERE */
2769 printf("RAID auto config: out of memory!\n");
2770 return(NULL); /* XXX probably should panic? */
2771 }
2772
2773 if (!raidread_component_label(dev, vp, clabel)) {
2774 /* Got the label. Does it look reasonable? */
2775 if (rf_reasonable_label(clabel) &&
2776 (clabel->partitionSize <=
2777 label.d_partitions[i].p_size)) {
2778 #if DEBUG
2779 printf("Component on: %s%c: %d\n",
2780 dv->dv_xname, 'a'+i,
2781 label.d_partitions[i].p_size);
2782 rf_print_component_label(clabel);
2783 #endif
2784 /* if it's reasonable, add it,
2785 else ignore it. */
2786 ac = (RF_AutoConfig_t *)
2787 malloc(sizeof(RF_AutoConfig_t),
2788 M_RAIDFRAME,
2789 M_NOWAIT);
2790 if (ac == NULL) {
2791 /* XXX should panic?? */
2792 return(NULL);
2793 }
2794
2795 sprintf(ac->devname, "%s%c",
2796 dv->dv_xname, 'a'+i);
2797 ac->dev = dev;
2798 ac->vp = vp;
2799 ac->clabel = clabel;
2800 ac->next = ac_list;
2801 ac_list = ac;
2802 good_one = 1;
2803 }
2804 }
2805 if (!good_one) {
2806 /* cleanup */
2807 free(clabel, M_RAIDFRAME);
2808 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2809 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
2810 vput(vp);
2811 }
2812 }
2813 }
2814 return(ac_list);
2815 }
2816
2817 static int
2818 rf_reasonable_label(clabel)
2819 RF_ComponentLabel_t *clabel;
2820 {
2821
2822 if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) ||
2823 (clabel->version==RF_COMPONENT_LABEL_VERSION)) &&
2824 ((clabel->clean == RF_RAID_CLEAN) ||
2825 (clabel->clean == RF_RAID_DIRTY)) &&
2826 clabel->row >=0 &&
2827 clabel->column >= 0 &&
2828 clabel->num_rows > 0 &&
2829 clabel->num_columns > 0 &&
2830 clabel->row < clabel->num_rows &&
2831 clabel->column < clabel->num_columns &&
2832 clabel->blockSize > 0 &&
2833 clabel->numBlocks > 0) {
2834 /* label looks reasonable enough... */
2835 return(1);
2836 }
2837 return(0);
2838 }
2839
2840
2841 void
2842 rf_print_component_label(clabel)
2843 RF_ComponentLabel_t *clabel;
2844 {
2845 printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
2846 clabel->row, clabel->column,
2847 clabel->num_rows, clabel->num_columns);
2848 printf(" Version: %d Serial Number: %d Mod Counter: %d\n",
2849 clabel->version, clabel->serial_number,
2850 clabel->mod_counter);
2851 printf(" Clean: %s Status: %d\n",
2852 clabel->clean ? "Yes" : "No", clabel->status );
2853 printf(" sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
2854 clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
2855 printf(" RAID Level: %c blocksize: %d numBlocks: %d\n",
2856 (char) clabel->parityConfig, clabel->blockSize,
2857 clabel->numBlocks);
2858 printf(" Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No" );
2859 printf(" Contains root partition: %s\n",
2860 clabel->root_partition ? "Yes" : "No" );
2861 printf(" Last configured as: raid%d\n", clabel->last_unit );
2862 #if 0
2863 printf(" Config order: %d\n", clabel->config_order);
2864 #endif
2865
2866 }
2867
2868 RF_ConfigSet_t *
2869 rf_create_auto_sets(ac_list)
2870 RF_AutoConfig_t *ac_list;
2871 {
2872 RF_AutoConfig_t *ac;
2873 RF_ConfigSet_t *config_sets;
2874 RF_ConfigSet_t *cset;
2875 RF_AutoConfig_t *ac_next;
2876
2877
2878 config_sets = NULL;
2879
2880 /* Go through the AutoConfig list, and figure out which components
2881 belong to what sets. */
2882 ac = ac_list;
2883 while(ac!=NULL) {
2884 /* we're going to putz with ac->next, so save it here
2885 for use at the end of the loop */
2886 ac_next = ac->next;
2887
2888 if (config_sets == NULL) {
2889 /* will need at least this one... */
2890 config_sets = (RF_ConfigSet_t *)
2891 malloc(sizeof(RF_ConfigSet_t),
2892 M_RAIDFRAME, M_NOWAIT);
2893 if (config_sets == NULL) {
2894 panic("rf_create_auto_sets: No memory!\n");
2895 }
2896 /* this one is easy :) */
2897 config_sets->ac = ac;
2898 config_sets->next = NULL;
2899 config_sets->rootable = 0;
2900 ac->next = NULL;
2901 } else {
2902 /* which set does this component fit into? */
2903 cset = config_sets;
2904 while(cset!=NULL) {
2905 if (rf_does_it_fit(cset, ac)) {
2906 /* looks like it matches... */
2907 ac->next = cset->ac;
2908 cset->ac = ac;
2909 break;
2910 }
2911 cset = cset->next;
2912 }
2913 if (cset==NULL) {
2914 /* didn't find a match above... new set..*/
2915 cset = (RF_ConfigSet_t *)
2916 malloc(sizeof(RF_ConfigSet_t),
2917 M_RAIDFRAME, M_NOWAIT);
2918 if (cset == NULL) {
2919 panic("rf_create_auto_sets: No memory!\n");
2920 }
2921 cset->ac = ac;
2922 ac->next = NULL;
2923 cset->next = config_sets;
2924 cset->rootable = 0;
2925 config_sets = cset;
2926 }
2927 }
2928 ac = ac_next;
2929 }
2930
2931
2932 return(config_sets);
2933 }
2934
2935 static int
2936 rf_does_it_fit(cset, ac)
2937 RF_ConfigSet_t *cset;
2938 RF_AutoConfig_t *ac;
2939 {
2940 RF_ComponentLabel_t *clabel1, *clabel2;
2941
2942 /* If this one matches the *first* one in the set, that's good
2943 enough, since the other members of the set would have been
2944 through here too... */
2945 /* note that we are not checking partitionSize here..
2946
2947 Note that we are also not checking the mod_counters here.
2948 If everything else matches execpt the mod_counter, that's
2949 good enough for this test. We will deal with the mod_counters
2950 a little later in the autoconfiguration process.
2951
2952 (clabel1->mod_counter == clabel2->mod_counter) &&
2953
2954 The reason we don't check for this is that failed disks
2955 will have lower modification counts. If those disks are
2956 not added to the set they used to belong to, then they will
2957 form their own set, which may result in 2 different sets,
2958 for example, competing to be configured at raid0, and
2959 perhaps competing to be the root filesystem set. If the
2960 wrong ones get configured, or both attempt to become /,
2961 weird behaviour and or serious lossage will occur. Thus we
2962 need to bring them into the fold here, and kick them out at
2963 a later point.
2964
2965 */
2966
2967 clabel1 = cset->ac->clabel;
2968 clabel2 = ac->clabel;
2969 if ((clabel1->version == clabel2->version) &&
2970 (clabel1->serial_number == clabel2->serial_number) &&
2971 (clabel1->num_rows == clabel2->num_rows) &&
2972 (clabel1->num_columns == clabel2->num_columns) &&
2973 (clabel1->sectPerSU == clabel2->sectPerSU) &&
2974 (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
2975 (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
2976 (clabel1->parityConfig == clabel2->parityConfig) &&
2977 (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
2978 (clabel1->blockSize == clabel2->blockSize) &&
2979 (clabel1->numBlocks == clabel2->numBlocks) &&
2980 (clabel1->autoconfigure == clabel2->autoconfigure) &&
2981 (clabel1->root_partition == clabel2->root_partition) &&
2982 (clabel1->last_unit == clabel2->last_unit) &&
2983 (clabel1->config_order == clabel2->config_order)) {
2984 /* if it get's here, it almost *has* to be a match */
2985 } else {
2986 /* it's not consistent with somebody in the set..
2987 punt */
2988 return(0);
2989 }
2990 /* all was fine.. it must fit... */
2991 return(1);
2992 }
2993
2994 int
2995 rf_have_enough_components(cset)
2996 RF_ConfigSet_t *cset;
2997 {
2998 RF_AutoConfig_t *ac;
2999 RF_AutoConfig_t *auto_config;
3000 RF_ComponentLabel_t *clabel;
3001 int r,c;
3002 int num_rows;
3003 int num_cols;
3004 int num_missing;
3005 int mod_counter;
3006 int mod_counter_found;
3007 int even_pair_failed;
3008 char parity_type;
3009
3010
3011 /* check to see that we have enough 'live' components
3012 of this set. If so, we can configure it if necessary */
3013
3014 num_rows = cset->ac->clabel->num_rows;
3015 num_cols = cset->ac->clabel->num_columns;
3016 parity_type = cset->ac->clabel->parityConfig;
3017
3018 /* XXX Check for duplicate components!?!?!? */
3019
3020 /* Determine what the mod_counter is supposed to be for this set. */
3021
3022 mod_counter_found = 0;
3023 mod_counter = 0;
3024 ac = cset->ac;
3025 while(ac!=NULL) {
3026 if (mod_counter_found==0) {
3027 mod_counter = ac->clabel->mod_counter;
3028 mod_counter_found = 1;
3029 } else {
3030 if (ac->clabel->mod_counter > mod_counter) {
3031 mod_counter = ac->clabel->mod_counter;
3032 }
3033 }
3034 ac = ac->next;
3035 }
3036
3037 num_missing = 0;
3038 auto_config = cset->ac;
3039
3040 for(r=0; r<num_rows; r++) {
3041 even_pair_failed = 0;
3042 for(c=0; c<num_cols; c++) {
3043 ac = auto_config;
3044 while(ac!=NULL) {
3045 if ((ac->clabel->row == r) &&
3046 (ac->clabel->column == c) &&
3047 (ac->clabel->mod_counter == mod_counter)) {
3048 /* it's this one... */
3049 #if DEBUG
3050 printf("Found: %s at %d,%d\n",
3051 ac->devname,r,c);
3052 #endif
3053 break;
3054 }
3055 ac=ac->next;
3056 }
3057 if (ac==NULL) {
3058 /* Didn't find one here! */
3059 /* special case for RAID 1, especially
3060 where there are more than 2
3061 components (where RAIDframe treats
3062 things a little differently :( ) */
3063 if (parity_type == '1') {
3064 if (c%2 == 0) { /* even component */
3065 even_pair_failed = 1;
3066 } else { /* odd component. If
3067 we're failed, and
3068 so is the even
3069 component, it's
3070 "Good Night, Charlie" */
3071 if (even_pair_failed == 1) {
3072 return(0);
3073 }
3074 }
3075 } else {
3076 /* normal accounting */
3077 num_missing++;
3078 }
3079 }
3080 if ((parity_type == '1') && (c%2 == 1)) {
3081 /* Just did an even component, and we didn't
3082 bail.. reset the even_pair_failed flag,
3083 and go on to the next component.... */
3084 even_pair_failed = 0;
3085 }
3086 }
3087 }
3088
3089 clabel = cset->ac->clabel;
3090
3091 if (((clabel->parityConfig == '0') && (num_missing > 0)) ||
3092 ((clabel->parityConfig == '4') && (num_missing > 1)) ||
3093 ((clabel->parityConfig == '5') && (num_missing > 1))) {
3094 /* XXX this needs to be made *much* more general */
3095 /* Too many failures */
3096 return(0);
3097 }
3098 /* otherwise, all is well, and we've got enough to take a kick
3099 at autoconfiguring this set */
3100 return(1);
3101 }
3102
3103 void
3104 rf_create_configuration(ac,config,raidPtr)
3105 RF_AutoConfig_t *ac;
3106 RF_Config_t *config;
3107 RF_Raid_t *raidPtr;
3108 {
3109 RF_ComponentLabel_t *clabel;
3110 int i;
3111
3112 clabel = ac->clabel;
3113
3114 /* 1. Fill in the common stuff */
3115 config->numRow = clabel->num_rows;
3116 config->numCol = clabel->num_columns;
3117 config->numSpare = 0; /* XXX should this be set here? */
3118 config->sectPerSU = clabel->sectPerSU;
3119 config->SUsPerPU = clabel->SUsPerPU;
3120 config->SUsPerRU = clabel->SUsPerRU;
3121 config->parityConfig = clabel->parityConfig;
3122 /* XXX... */
3123 strcpy(config->diskQueueType,"fifo");
3124 config->maxOutstandingDiskReqs = clabel->maxOutstanding;
3125 config->layoutSpecificSize = 0; /* XXX ?? */
3126
3127 while(ac!=NULL) {
3128 /* row/col values will be in range due to the checks
3129 in reasonable_label() */
3130 strcpy(config->devnames[ac->clabel->row][ac->clabel->column],
3131 ac->devname);
3132 ac = ac->next;
3133 }
3134
3135 for(i=0;i<RF_MAXDBGV;i++) {
3136 config->debugVars[i][0] = NULL;
3137 }
3138 }
3139
3140 int
3141 rf_set_autoconfig(raidPtr, new_value)
3142 RF_Raid_t *raidPtr;
3143 int new_value;
3144 {
3145 RF_ComponentLabel_t clabel;
3146 struct vnode *vp;
3147 dev_t dev;
3148 int row, column;
3149
3150 raidPtr->autoconfigure = new_value;
3151 for(row=0; row<raidPtr->numRow; row++) {
3152 for(column=0; column<raidPtr->numCol; column++) {
3153 if (raidPtr->Disks[row][column].status ==
3154 rf_ds_optimal) {
3155 dev = raidPtr->Disks[row][column].dev;
3156 vp = raidPtr->raid_cinfo[row][column].ci_vp;
3157 raidread_component_label(dev, vp, &clabel);
3158 clabel.autoconfigure = new_value;
3159 raidwrite_component_label(dev, vp, &clabel);
3160 }
3161 }
3162 }
3163 return(new_value);
3164 }
3165
3166 int
3167 rf_set_rootpartition(raidPtr, new_value)
3168 RF_Raid_t *raidPtr;
3169 int new_value;
3170 {
3171 RF_ComponentLabel_t clabel;
3172 struct vnode *vp;
3173 dev_t dev;
3174 int row, column;
3175
3176 raidPtr->root_partition = new_value;
3177 for(row=0; row<raidPtr->numRow; row++) {
3178 for(column=0; column<raidPtr->numCol; column++) {
3179 if (raidPtr->Disks[row][column].status ==
3180 rf_ds_optimal) {
3181 dev = raidPtr->Disks[row][column].dev;
3182 vp = raidPtr->raid_cinfo[row][column].ci_vp;
3183 raidread_component_label(dev, vp, &clabel);
3184 clabel.root_partition = new_value;
3185 raidwrite_component_label(dev, vp, &clabel);
3186 }
3187 }
3188 }
3189 return(new_value);
3190 }
3191
3192 void
3193 rf_release_all_vps(cset)
3194 RF_ConfigSet_t *cset;
3195 {
3196 RF_AutoConfig_t *ac;
3197
3198 ac = cset->ac;
3199 while(ac!=NULL) {
3200 /* Close the vp, and give it back */
3201 if (ac->vp) {
3202 vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY);
3203 VOP_CLOSE(ac->vp, FREAD, NOCRED, 0);
3204 vput(ac->vp);
3205 ac->vp = NULL;
3206 }
3207 ac = ac->next;
3208 }
3209 }
3210
3211
3212 void
3213 rf_cleanup_config_set(cset)
3214 RF_ConfigSet_t *cset;
3215 {
3216 RF_AutoConfig_t *ac;
3217 RF_AutoConfig_t *next_ac;
3218
3219 ac = cset->ac;
3220 while(ac!=NULL) {
3221 next_ac = ac->next;
3222 /* nuke the label */
3223 free(ac->clabel, M_RAIDFRAME);
3224 /* cleanup the config structure */
3225 free(ac, M_RAIDFRAME);
3226 /* "next.." */
3227 ac = next_ac;
3228 }
3229 /* and, finally, nuke the config set */
3230 free(cset, M_RAIDFRAME);
3231 }
3232
3233
3234 void
3235 raid_init_component_label(raidPtr, clabel)
3236 RF_Raid_t *raidPtr;
3237 RF_ComponentLabel_t *clabel;
3238 {
3239 /* current version number */
3240 clabel->version = RF_COMPONENT_LABEL_VERSION;
3241 clabel->serial_number = raidPtr->serial_number;
3242 clabel->mod_counter = raidPtr->mod_counter;
3243 clabel->num_rows = raidPtr->numRow;
3244 clabel->num_columns = raidPtr->numCol;
3245 clabel->clean = RF_RAID_DIRTY; /* not clean */
3246 clabel->status = rf_ds_optimal; /* "It's good!" */
3247
3248 clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
3249 clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
3250 clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
3251
3252 clabel->blockSize = raidPtr->bytesPerSector;
3253 clabel->numBlocks = raidPtr->sectorsPerDisk;
3254
3255 /* XXX not portable */
3256 clabel->parityConfig = raidPtr->Layout.map->parityConfig;
3257 clabel->maxOutstanding = raidPtr->maxOutstanding;
3258 clabel->autoconfigure = raidPtr->autoconfigure;
3259 clabel->root_partition = raidPtr->root_partition;
3260 clabel->last_unit = raidPtr->raidid;
3261 clabel->config_order = raidPtr->config_order;
3262 }
3263
3264 int
3265 rf_auto_config_set(cset,unit)
3266 RF_ConfigSet_t *cset;
3267 int *unit;
3268 {
3269 RF_Raid_t *raidPtr;
3270 RF_Config_t *config;
3271 int raidID;
3272 int retcode;
3273
3274 printf("RAID autoconfigure\n");
3275
3276 retcode = 0;
3277 *unit = -1;
3278
3279 /* 1. Create a config structure */
3280
3281 config = (RF_Config_t *)malloc(sizeof(RF_Config_t),
3282 M_RAIDFRAME,
3283 M_NOWAIT);
3284 if (config==NULL) {
3285 printf("Out of mem!?!?\n");
3286 /* XXX do something more intelligent here. */
3287 return(1);
3288 }
3289
3290 memset(config, 0, sizeof(RF_Config_t));
3291
3292 /* XXX raidID needs to be set correctly.. */
3293
3294 /*
3295 2. Figure out what RAID ID this one is supposed to live at
3296 See if we can get the same RAID dev that it was configured
3297 on last time..
3298 */
3299
3300 raidID = cset->ac->clabel->last_unit;
3301 if ((raidID < 0) || (raidID >= numraid)) {
3302 /* let's not wander off into lala land. */
3303 raidID = numraid - 1;
3304 }
3305 if (raidPtrs[raidID]->valid != 0) {
3306
3307 /*
3308 Nope... Go looking for an alternative...
3309 Start high so we don't immediately use raid0 if that's
3310 not taken.
3311 */
3312
3313 for(raidID = numraid - 1; raidID >= 0; raidID--) {
3314 if (raidPtrs[raidID]->valid == 0) {
3315 /* can use this one! */
3316 break;
3317 }
3318 }
3319 }
3320
3321 if (raidID < 0) {
3322 /* punt... */
3323 printf("Unable to auto configure this set!\n");
3324 printf("(Out of RAID devs!)\n");
3325 return(1);
3326 }
3327 printf("Configuring raid%d:\n",raidID);
3328 raidPtr = raidPtrs[raidID];
3329
3330 /* XXX all this stuff should be done SOMEWHERE ELSE! */
3331 raidPtr->raidid = raidID;
3332 raidPtr->openings = RAIDOUTSTANDING;
3333
3334 /* 3. Build the configuration structure */
3335 rf_create_configuration(cset->ac, config, raidPtr);
3336
3337 /* 4. Do the configuration */
3338 retcode = rf_Configure(raidPtr, config, cset->ac);
3339
3340 if (retcode == 0) {
3341
3342 raidinit(raidPtrs[raidID]);
3343
3344 rf_markalldirty(raidPtrs[raidID]);
3345 raidPtrs[raidID]->autoconfigure = 1; /* XXX do this here? */
3346 if (cset->ac->clabel->root_partition==1) {
3347 /* everything configured just fine. Make a note
3348 that this set is eligible to be root. */
3349 cset->rootable = 1;
3350 /* XXX do this here? */
3351 raidPtrs[raidID]->root_partition = 1;
3352 }
3353 }
3354
3355 /* 5. Cleanup */
3356 free(config, M_RAIDFRAME);
3357
3358 *unit = raidID;
3359 return(retcode);
3360 }
3361
3362 void
3363 rf_disk_unbusy(desc)
3364 RF_RaidAccessDesc_t *desc;
3365 {
3366 struct buf *bp;
3367
3368 bp = (struct buf *)desc->bp;
3369 disk_unbusy(&raid_softc[desc->raidPtr->raidid].sc_dkdev,
3370 (bp->b_bcount - bp->b_resid));
3371 }
3372