rf_netbsdkintf.c revision 1.104.2.10 1 /* $NetBSD: rf_netbsdkintf.c,v 1.104.2.10 2002/06/20 03:46:27 nathanw Exp $ */
2 /*-
3 * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
4 * All rights reserved.
5 *
6 * This code is derived from software contributed to The NetBSD Foundation
7 * by Greg Oster; Jason R. Thorpe.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed by the NetBSD
20 * Foundation, Inc. and its contributors.
21 * 4. Neither the name of The NetBSD Foundation nor the names of its
22 * contributors may be used to endorse or promote products derived
23 * from this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
26 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
36 */
37
38 /*
39 * Copyright (c) 1988 University of Utah.
40 * Copyright (c) 1990, 1993
41 * The Regents of the University of California. All rights reserved.
42 *
43 * This code is derived from software contributed to Berkeley by
44 * the Systems Programming Group of the University of Utah Computer
45 * Science Department.
46 *
47 * Redistribution and use in source and binary forms, with or without
48 * modification, are permitted provided that the following conditions
49 * are met:
50 * 1. Redistributions of source code must retain the above copyright
51 * notice, this list of conditions and the following disclaimer.
52 * 2. Redistributions in binary form must reproduce the above copyright
53 * notice, this list of conditions and the following disclaimer in the
54 * documentation and/or other materials provided with the distribution.
55 * 3. All advertising materials mentioning features or use of this software
56 * must display the following acknowledgement:
57 * This product includes software developed by the University of
58 * California, Berkeley and its contributors.
59 * 4. Neither the name of the University nor the names of its contributors
60 * may be used to endorse or promote products derived from this software
61 * without specific prior written permission.
62 *
63 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
64 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
65 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
66 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
67 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
68 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
69 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
70 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
71 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
72 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
73 * SUCH DAMAGE.
74 *
75 * from: Utah $Hdr: cd.c 1.6 90/11/28$
76 *
77 * @(#)cd.c 8.2 (Berkeley) 11/16/93
78 */
79
80
81
82
83 /*
84 * Copyright (c) 1995 Carnegie-Mellon University.
85 * All rights reserved.
86 *
87 * Authors: Mark Holland, Jim Zelenka
88 *
89 * Permission to use, copy, modify and distribute this software and
90 * its documentation is hereby granted, provided that both the copyright
91 * notice and this permission notice appear in all copies of the
92 * software, derivative works or modified versions, and any portions
93 * thereof, and that both notices appear in supporting documentation.
94 *
95 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
96 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
97 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
98 *
99 * Carnegie Mellon requests users of this software to return to
100 *
101 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
102 * School of Computer Science
103 * Carnegie Mellon University
104 * Pittsburgh PA 15213-3890
105 *
106 * any improvements or extensions that they make and grant Carnegie the
107 * rights to redistribute these changes.
108 */
109
110 /***********************************************************
111 *
112 * rf_kintf.c -- the kernel interface routines for RAIDframe
113 *
114 ***********************************************************/
115
116 #include <sys/cdefs.h>
117 __KERNEL_RCSID(0, "$NetBSD: rf_netbsdkintf.c,v 1.104.2.10 2002/06/20 03:46:27 nathanw Exp $");
118
119 #include <sys/param.h>
120 #include <sys/errno.h>
121 #include <sys/pool.h>
122 #include <sys/lwp.h>
123 #include <sys/proc.h>
124 #include <sys/queue.h>
125 #include <sys/disk.h>
126 #include <sys/device.h>
127 #include <sys/stat.h>
128 #include <sys/ioctl.h>
129 #include <sys/fcntl.h>
130 #include <sys/systm.h>
131 #include <sys/namei.h>
132 #include <sys/vnode.h>
133 #include <sys/disklabel.h>
134 #include <sys/conf.h>
135 #include <sys/lock.h>
136 #include <sys/buf.h>
137 #include <sys/user.h>
138 #include <sys/reboot.h>
139
140 #include <dev/raidframe/raidframevar.h>
141 #include <dev/raidframe/raidframeio.h>
142 #include "raid.h"
143 #include "opt_raid_autoconfig.h"
144 #include "rf_raid.h"
145 #include "rf_copyback.h"
146 #include "rf_dag.h"
147 #include "rf_dagflags.h"
148 #include "rf_desc.h"
149 #include "rf_diskqueue.h"
150 #include "rf_acctrace.h"
151 #include "rf_etimer.h"
152 #include "rf_general.h"
153 #include "rf_debugMem.h"
154 #include "rf_kintf.h"
155 #include "rf_options.h"
156 #include "rf_driver.h"
157 #include "rf_parityscan.h"
158 #include "rf_debugprint.h"
159 #include "rf_threadstuff.h"
160
161 int rf_kdebug_level = 0;
162
163 #ifdef DEBUG
164 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
165 #else /* DEBUG */
166 #define db1_printf(a) { }
167 #endif /* DEBUG */
168
169 static RF_Raid_t **raidPtrs; /* global raid device descriptors */
170
171 RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex)
172
173 static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a
174 * spare table */
175 static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from
176 * installation process */
177
178 /* prototypes */
179 static void KernelWakeupFunc(struct buf * bp);
180 static void InitBP(struct buf * bp, struct vnode *, unsigned rw_flag,
181 dev_t dev, RF_SectorNum_t startSect,
182 RF_SectorCount_t numSect, caddr_t buf,
183 void (*cbFunc) (struct buf *), void *cbArg,
184 int logBytesPerSector, struct proc * b_proc);
185 static void raidinit(RF_Raid_t *);
186
187 void raidattach(int);
188 int raidsize(dev_t);
189 int raidopen(dev_t, int, int, struct proc *);
190 int raidclose(dev_t, int, int, struct proc *);
191 int raidioctl(dev_t, u_long, caddr_t, int, struct proc *);
192 int raidwrite(dev_t, struct uio *, int);
193 int raidread(dev_t, struct uio *, int);
194 void raidstrategy(struct buf *);
195 int raiddump(dev_t, daddr_t, caddr_t, size_t);
196
197 /*
198 * Pilfered from ccd.c
199 */
200
201 struct raidbuf {
202 struct buf rf_buf; /* new I/O buf. MUST BE FIRST!!! */
203 struct buf *rf_obp; /* ptr. to original I/O buf */
204 int rf_flags; /* misc. flags */
205 RF_DiskQueueData_t *req;/* the request that this was part of.. */
206 };
207
208 /* component buffer pool */
209 struct pool raidframe_cbufpool;
210
211 #define RAIDGETBUF(rs) pool_get(&raidframe_cbufpool, PR_NOWAIT)
212 #define RAIDPUTBUF(rs, cbp) pool_put(&raidframe_cbufpool, cbp)
213
214 /* XXX Not sure if the following should be replacing the raidPtrs above,
215 or if it should be used in conjunction with that...
216 */
217
218 struct raid_softc {
219 int sc_flags; /* flags */
220 int sc_cflags; /* configuration flags */
221 size_t sc_size; /* size of the raid device */
222 char sc_xname[20]; /* XXX external name */
223 struct disk sc_dkdev; /* generic disk device info */
224 struct buf_queue buf_queue; /* used for the device queue */
225 };
226 /* sc_flags */
227 #define RAIDF_INITED 0x01 /* unit has been initialized */
228 #define RAIDF_WLABEL 0x02 /* label area is writable */
229 #define RAIDF_LABELLING 0x04 /* unit is currently being labelled */
230 #define RAIDF_WANTED 0x40 /* someone is waiting to obtain a lock */
231 #define RAIDF_LOCKED 0x80 /* unit is locked */
232
233 #define raidunit(x) DISKUNIT(x)
234 int numraid = 0;
235
236 /*
237 * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
238 * Be aware that large numbers can allow the driver to consume a lot of
239 * kernel memory, especially on writes, and in degraded mode reads.
240 *
241 * For example: with a stripe width of 64 blocks (32k) and 5 disks,
242 * a single 64K write will typically require 64K for the old data,
243 * 64K for the old parity, and 64K for the new parity, for a total
244 * of 192K (if the parity buffer is not re-used immediately).
245 * Even it if is used immediately, that's still 128K, which when multiplied
246 * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
247 *
248 * Now in degraded mode, for example, a 64K read on the above setup may
249 * require data reconstruction, which will require *all* of the 4 remaining
250 * disks to participate -- 4 * 32K/disk == 128K again.
251 */
252
253 #ifndef RAIDOUTSTANDING
254 #define RAIDOUTSTANDING 6
255 #endif
256
257 #define RAIDLABELDEV(dev) \
258 (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
259
260 /* declared here, and made public, for the benefit of KVM stuff.. */
261 struct raid_softc *raid_softc;
262
263 static void raidgetdefaultlabel(RF_Raid_t *, struct raid_softc *,
264 struct disklabel *);
265 static void raidgetdisklabel(dev_t);
266 static void raidmakedisklabel(struct raid_softc *);
267
268 static int raidlock(struct raid_softc *);
269 static void raidunlock(struct raid_softc *);
270
271 static void rf_markalldirty(RF_Raid_t *);
272 void rf_mountroot_hook(struct device *);
273
274 struct device *raidrootdev;
275
276 void rf_ReconThread(struct rf_recon_req *);
277 /* XXX what I want is: */
278 /*void rf_ReconThread(RF_Raid_t *raidPtr); */
279 void rf_RewriteParityThread(RF_Raid_t *raidPtr);
280 void rf_CopybackThread(RF_Raid_t *raidPtr);
281 void rf_ReconstructInPlaceThread(struct rf_recon_req *);
282 void rf_buildroothack(void *);
283
284 RF_AutoConfig_t *rf_find_raid_components(void);
285 RF_ConfigSet_t *rf_create_auto_sets(RF_AutoConfig_t *);
286 static int rf_does_it_fit(RF_ConfigSet_t *,RF_AutoConfig_t *);
287 static int rf_reasonable_label(RF_ComponentLabel_t *);
288 void rf_create_configuration(RF_AutoConfig_t *,RF_Config_t *, RF_Raid_t *);
289 int rf_set_autoconfig(RF_Raid_t *, int);
290 int rf_set_rootpartition(RF_Raid_t *, int);
291 void rf_release_all_vps(RF_ConfigSet_t *);
292 void rf_cleanup_config_set(RF_ConfigSet_t *);
293 int rf_have_enough_components(RF_ConfigSet_t *);
294 int rf_auto_config_set(RF_ConfigSet_t *, int *);
295
296 static int raidautoconfig = 0; /* Debugging, mostly. Set to 0 to not
297 allow autoconfig to take place.
298 Note that this is overridden by having
299 RAID_AUTOCONFIG as an option in the
300 kernel config file. */
301
302 void
303 raidattach(num)
304 int num;
305 {
306 int raidID;
307 int i, rc;
308 RF_AutoConfig_t *ac_list; /* autoconfig list */
309 RF_ConfigSet_t *config_sets;
310
311 #ifdef DEBUG
312 printf("raidattach: Asked for %d units\n", num);
313 #endif
314
315 if (num <= 0) {
316 #ifdef DIAGNOSTIC
317 panic("raidattach: count <= 0");
318 #endif
319 return;
320 }
321 /* This is where all the initialization stuff gets done. */
322
323 numraid = num;
324
325 /* Make some space for requested number of units... */
326
327 RF_Calloc(raidPtrs, num, sizeof(RF_Raid_t *), (RF_Raid_t **));
328 if (raidPtrs == NULL) {
329 panic("raidPtrs is NULL!!\n");
330 }
331
332 /* Initialize the component buffer pool. */
333 pool_init(&raidframe_cbufpool, sizeof(struct raidbuf), 0,
334 0, 0, "raidpl", NULL);
335
336 rc = rf_mutex_init(&rf_sparet_wait_mutex);
337 if (rc) {
338 RF_PANIC();
339 }
340
341 rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
342
343 for (i = 0; i < num; i++)
344 raidPtrs[i] = NULL;
345 rc = rf_BootRaidframe();
346 if (rc == 0)
347 printf("Kernelized RAIDframe activated\n");
348 else
349 panic("Serious error booting RAID!!\n");
350
351 /* put together some datastructures like the CCD device does.. This
352 * lets us lock the device and what-not when it gets opened. */
353
354 raid_softc = (struct raid_softc *)
355 malloc(num * sizeof(struct raid_softc),
356 M_RAIDFRAME, M_NOWAIT);
357 if (raid_softc == NULL) {
358 printf("WARNING: no memory for RAIDframe driver\n");
359 return;
360 }
361
362 memset(raid_softc, 0, num * sizeof(struct raid_softc));
363
364 raidrootdev = (struct device *)malloc(num * sizeof(struct device),
365 M_RAIDFRAME, M_NOWAIT);
366 if (raidrootdev == NULL) {
367 panic("No memory for RAIDframe driver!!?!?!\n");
368 }
369
370 for (raidID = 0; raidID < num; raidID++) {
371 BUFQ_INIT(&raid_softc[raidID].buf_queue);
372
373 raidrootdev[raidID].dv_class = DV_DISK;
374 raidrootdev[raidID].dv_cfdata = NULL;
375 raidrootdev[raidID].dv_unit = raidID;
376 raidrootdev[raidID].dv_parent = NULL;
377 raidrootdev[raidID].dv_flags = 0;
378 sprintf(raidrootdev[raidID].dv_xname,"raid%d",raidID);
379
380 RF_Calloc(raidPtrs[raidID], 1, sizeof(RF_Raid_t),
381 (RF_Raid_t *));
382 if (raidPtrs[raidID] == NULL) {
383 printf("WARNING: raidPtrs[%d] is NULL\n", raidID);
384 numraid = raidID;
385 return;
386 }
387 }
388
389 #ifdef RAID_AUTOCONFIG
390 raidautoconfig = 1;
391 #endif
392
393 if (raidautoconfig) {
394 /* 1. locate all RAID components on the system */
395
396 #if DEBUG
397 printf("Searching for raid components...\n");
398 #endif
399 ac_list = rf_find_raid_components();
400
401 /* 2. sort them into their respective sets */
402
403 config_sets = rf_create_auto_sets(ac_list);
404
405 /* 3. evaluate each set and configure the valid ones
406 This gets done in rf_buildroothack() */
407
408 /* schedule the creation of the thread to do the
409 "/ on RAID" stuff */
410
411 kthread_create(rf_buildroothack,config_sets);
412
413 #if 0
414 mountroothook_establish(rf_mountroot_hook, &raidrootdev[0]);
415 #endif
416 }
417
418 }
419
420 void
421 rf_buildroothack(arg)
422 void *arg;
423 {
424 RF_ConfigSet_t *config_sets = arg;
425 RF_ConfigSet_t *cset;
426 RF_ConfigSet_t *next_cset;
427 int retcode;
428 int raidID;
429 int rootID;
430 int num_root;
431
432 rootID = 0;
433 num_root = 0;
434 cset = config_sets;
435 while(cset != NULL ) {
436 next_cset = cset->next;
437 if (rf_have_enough_components(cset) &&
438 cset->ac->clabel->autoconfigure==1) {
439 retcode = rf_auto_config_set(cset,&raidID);
440 if (!retcode) {
441 if (cset->rootable) {
442 rootID = raidID;
443 num_root++;
444 }
445 } else {
446 /* The autoconfig didn't work :( */
447 #if DEBUG
448 printf("Autoconfig failed with code %d for raid%d\n", retcode, raidID);
449 #endif
450 rf_release_all_vps(cset);
451 }
452 } else {
453 /* we're not autoconfiguring this set...
454 release the associated resources */
455 rf_release_all_vps(cset);
456 }
457 /* cleanup */
458 rf_cleanup_config_set(cset);
459 cset = next_cset;
460 }
461 if (boothowto & RB_ASKNAME) {
462 /* We don't auto-config... */
463 } else {
464 /* They didn't ask, and we found something bootable... */
465
466 if (num_root == 1) {
467 booted_device = &raidrootdev[rootID];
468 } else if (num_root > 1) {
469 /* we can't guess.. require the user to answer... */
470 boothowto |= RB_ASKNAME;
471 }
472 }
473 }
474
475
476 int
477 raidsize(dev)
478 dev_t dev;
479 {
480 struct raid_softc *rs;
481 struct disklabel *lp;
482 int part, unit, omask, size;
483
484 unit = raidunit(dev);
485 if (unit >= numraid)
486 return (-1);
487 rs = &raid_softc[unit];
488
489 if ((rs->sc_flags & RAIDF_INITED) == 0)
490 return (-1);
491
492 part = DISKPART(dev);
493 omask = rs->sc_dkdev.dk_openmask & (1 << part);
494 lp = rs->sc_dkdev.dk_label;
495
496 if (omask == 0 && raidopen(dev, 0, S_IFBLK, curproc->l_proc))
497 return (-1);
498
499 if (lp->d_partitions[part].p_fstype != FS_SWAP)
500 size = -1;
501 else
502 size = lp->d_partitions[part].p_size *
503 (lp->d_secsize / DEV_BSIZE);
504
505 if (omask == 0 && raidclose(dev, 0, S_IFBLK, curproc->l_proc))
506 return (-1);
507
508 return (size);
509
510 }
511
512 int
513 raiddump(dev, blkno, va, size)
514 dev_t dev;
515 daddr_t blkno;
516 caddr_t va;
517 size_t size;
518 {
519 /* Not implemented. */
520 return ENXIO;
521 }
522 /* ARGSUSED */
523 int
524 raidopen(dev, flags, fmt, p)
525 dev_t dev;
526 int flags, fmt;
527 struct proc *p;
528 {
529 int unit = raidunit(dev);
530 struct raid_softc *rs;
531 struct disklabel *lp;
532 int part, pmask;
533 int error = 0;
534
535 if (unit >= numraid)
536 return (ENXIO);
537 rs = &raid_softc[unit];
538
539 if ((error = raidlock(rs)) != 0)
540 return (error);
541 lp = rs->sc_dkdev.dk_label;
542
543 part = DISKPART(dev);
544 pmask = (1 << part);
545
546 db1_printf(("Opening raid device number: %d partition: %d\n",
547 unit, part));
548
549
550 if ((rs->sc_flags & RAIDF_INITED) &&
551 (rs->sc_dkdev.dk_openmask == 0))
552 raidgetdisklabel(dev);
553
554 /* make sure that this partition exists */
555
556 if (part != RAW_PART) {
557 db1_printf(("Not a raw partition..\n"));
558 if (((rs->sc_flags & RAIDF_INITED) == 0) ||
559 ((part >= lp->d_npartitions) ||
560 (lp->d_partitions[part].p_fstype == FS_UNUSED))) {
561 error = ENXIO;
562 raidunlock(rs);
563 db1_printf(("Bailing out...\n"));
564 return (error);
565 }
566 }
567 /* Prevent this unit from being unconfigured while open. */
568 switch (fmt) {
569 case S_IFCHR:
570 rs->sc_dkdev.dk_copenmask |= pmask;
571 break;
572
573 case S_IFBLK:
574 rs->sc_dkdev.dk_bopenmask |= pmask;
575 break;
576 }
577
578 if ((rs->sc_dkdev.dk_openmask == 0) &&
579 ((rs->sc_flags & RAIDF_INITED) != 0)) {
580 /* First one... mark things as dirty... Note that we *MUST*
581 have done a configure before this. I DO NOT WANT TO BE
582 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
583 THAT THEY BELONG TOGETHER!!!!! */
584 /* XXX should check to see if we're only open for reading
585 here... If so, we needn't do this, but then need some
586 other way of keeping track of what's happened.. */
587
588 rf_markalldirty( raidPtrs[unit] );
589 }
590
591
592 rs->sc_dkdev.dk_openmask =
593 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
594
595 raidunlock(rs);
596
597 return (error);
598
599
600 }
601 /* ARGSUSED */
602 int
603 raidclose(dev, flags, fmt, p)
604 dev_t dev;
605 int flags, fmt;
606 struct proc *p;
607 {
608 int unit = raidunit(dev);
609 struct raid_softc *rs;
610 int error = 0;
611 int part;
612
613 if (unit >= numraid)
614 return (ENXIO);
615 rs = &raid_softc[unit];
616
617 if ((error = raidlock(rs)) != 0)
618 return (error);
619
620 part = DISKPART(dev);
621
622 /* ...that much closer to allowing unconfiguration... */
623 switch (fmt) {
624 case S_IFCHR:
625 rs->sc_dkdev.dk_copenmask &= ~(1 << part);
626 break;
627
628 case S_IFBLK:
629 rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
630 break;
631 }
632 rs->sc_dkdev.dk_openmask =
633 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
634
635 if ((rs->sc_dkdev.dk_openmask == 0) &&
636 ((rs->sc_flags & RAIDF_INITED) != 0)) {
637 /* Last one... device is not unconfigured yet.
638 Device shutdown has taken care of setting the
639 clean bits if RAIDF_INITED is not set
640 mark things as clean... */
641 #if 0
642 printf("Last one on raid%d. Updating status.\n",unit);
643 #endif
644 rf_update_component_labels(raidPtrs[unit],
645 RF_FINAL_COMPONENT_UPDATE);
646 if (doing_shutdown) {
647 /* last one, and we're going down, so
648 lights out for this RAID set too. */
649 error = rf_Shutdown(raidPtrs[unit]);
650
651 /* It's no longer initialized... */
652 rs->sc_flags &= ~RAIDF_INITED;
653
654 /* Detach the disk. */
655 disk_detach(&rs->sc_dkdev);
656 }
657 }
658
659 raidunlock(rs);
660 return (0);
661
662 }
663
664 void
665 raidstrategy(bp)
666 struct buf *bp;
667 {
668 int s;
669
670 unsigned int raidID = raidunit(bp->b_dev);
671 RF_Raid_t *raidPtr;
672 struct raid_softc *rs = &raid_softc[raidID];
673 struct disklabel *lp;
674 int wlabel;
675
676 if ((rs->sc_flags & RAIDF_INITED) ==0) {
677 bp->b_error = ENXIO;
678 bp->b_flags |= B_ERROR;
679 bp->b_resid = bp->b_bcount;
680 biodone(bp);
681 return;
682 }
683 if (raidID >= numraid || !raidPtrs[raidID]) {
684 bp->b_error = ENODEV;
685 bp->b_flags |= B_ERROR;
686 bp->b_resid = bp->b_bcount;
687 biodone(bp);
688 return;
689 }
690 raidPtr = raidPtrs[raidID];
691 if (!raidPtr->valid) {
692 bp->b_error = ENODEV;
693 bp->b_flags |= B_ERROR;
694 bp->b_resid = bp->b_bcount;
695 biodone(bp);
696 return;
697 }
698 if (bp->b_bcount == 0) {
699 db1_printf(("b_bcount is zero..\n"));
700 biodone(bp);
701 return;
702 }
703 lp = rs->sc_dkdev.dk_label;
704
705 /*
706 * Do bounds checking and adjust transfer. If there's an
707 * error, the bounds check will flag that for us.
708 */
709
710 wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
711 if (DISKPART(bp->b_dev) != RAW_PART)
712 if (bounds_check_with_label(bp, lp, wlabel) <= 0) {
713 db1_printf(("Bounds check failed!!:%d %d\n",
714 (int) bp->b_blkno, (int) wlabel));
715 biodone(bp);
716 return;
717 }
718 s = splbio();
719
720 bp->b_resid = 0;
721
722 /* stuff it onto our queue */
723 BUFQ_INSERT_TAIL(&rs->buf_queue, bp);
724
725 raidstart(raidPtrs[raidID]);
726
727 splx(s);
728 }
729 /* ARGSUSED */
730 int
731 raidread(dev, uio, flags)
732 dev_t dev;
733 struct uio *uio;
734 int flags;
735 {
736 int unit = raidunit(dev);
737 struct raid_softc *rs;
738 int part;
739
740 if (unit >= numraid)
741 return (ENXIO);
742 rs = &raid_softc[unit];
743
744 if ((rs->sc_flags & RAIDF_INITED) == 0)
745 return (ENXIO);
746 part = DISKPART(dev);
747
748 db1_printf(("raidread: unit: %d partition: %d\n", unit, part));
749
750 return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
751
752 }
753 /* ARGSUSED */
754 int
755 raidwrite(dev, uio, flags)
756 dev_t dev;
757 struct uio *uio;
758 int flags;
759 {
760 int unit = raidunit(dev);
761 struct raid_softc *rs;
762
763 if (unit >= numraid)
764 return (ENXIO);
765 rs = &raid_softc[unit];
766
767 if ((rs->sc_flags & RAIDF_INITED) == 0)
768 return (ENXIO);
769 db1_printf(("raidwrite\n"));
770 return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
771
772 }
773
774 int
775 raidioctl(dev, cmd, data, flag, p)
776 dev_t dev;
777 u_long cmd;
778 caddr_t data;
779 int flag;
780 struct proc *p;
781 {
782 int unit = raidunit(dev);
783 int error = 0;
784 int part, pmask;
785 struct raid_softc *rs;
786 RF_Config_t *k_cfg, *u_cfg;
787 RF_Raid_t *raidPtr;
788 RF_RaidDisk_t *diskPtr;
789 RF_AccTotals_t *totals;
790 RF_DeviceConfig_t *d_cfg, **ucfgp;
791 u_char *specific_buf;
792 int retcode = 0;
793 int row;
794 int column;
795 struct rf_recon_req *rrcopy, *rr;
796 RF_ComponentLabel_t *clabel;
797 RF_ComponentLabel_t ci_label;
798 RF_ComponentLabel_t **clabel_ptr;
799 RF_SingleComponent_t *sparePtr,*componentPtr;
800 RF_SingleComponent_t hot_spare;
801 RF_SingleComponent_t component;
802 RF_ProgressInfo_t progressInfo, **progressInfoPtr;
803 int i, j, d;
804 #ifdef __HAVE_OLD_DISKLABEL
805 struct disklabel newlabel;
806 #endif
807
808 if (unit >= numraid)
809 return (ENXIO);
810 rs = &raid_softc[unit];
811 raidPtr = raidPtrs[unit];
812
813 db1_printf(("raidioctl: %d %d %d %d\n", (int) dev,
814 (int) DISKPART(dev), (int) unit, (int) cmd));
815
816 /* Must be open for writes for these commands... */
817 switch (cmd) {
818 case DIOCSDINFO:
819 case DIOCWDINFO:
820 #ifdef __HAVE_OLD_DISKLABEL
821 case ODIOCWDINFO:
822 case ODIOCSDINFO:
823 #endif
824 case DIOCWLABEL:
825 if ((flag & FWRITE) == 0)
826 return (EBADF);
827 }
828
829 /* Must be initialized for these... */
830 switch (cmd) {
831 case DIOCGDINFO:
832 case DIOCSDINFO:
833 case DIOCWDINFO:
834 #ifdef __HAVE_OLD_DISKLABEL
835 case ODIOCGDINFO:
836 case ODIOCWDINFO:
837 case ODIOCSDINFO:
838 case ODIOCGDEFLABEL:
839 #endif
840 case DIOCGPART:
841 case DIOCWLABEL:
842 case DIOCGDEFLABEL:
843 case RAIDFRAME_SHUTDOWN:
844 case RAIDFRAME_REWRITEPARITY:
845 case RAIDFRAME_GET_INFO:
846 case RAIDFRAME_RESET_ACCTOTALS:
847 case RAIDFRAME_GET_ACCTOTALS:
848 case RAIDFRAME_KEEP_ACCTOTALS:
849 case RAIDFRAME_GET_SIZE:
850 case RAIDFRAME_FAIL_DISK:
851 case RAIDFRAME_COPYBACK:
852 case RAIDFRAME_CHECK_RECON_STATUS:
853 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
854 case RAIDFRAME_GET_COMPONENT_LABEL:
855 case RAIDFRAME_SET_COMPONENT_LABEL:
856 case RAIDFRAME_ADD_HOT_SPARE:
857 case RAIDFRAME_REMOVE_HOT_SPARE:
858 case RAIDFRAME_INIT_LABELS:
859 case RAIDFRAME_REBUILD_IN_PLACE:
860 case RAIDFRAME_CHECK_PARITY:
861 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
862 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
863 case RAIDFRAME_CHECK_COPYBACK_STATUS:
864 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
865 case RAIDFRAME_SET_AUTOCONFIG:
866 case RAIDFRAME_SET_ROOT:
867 case RAIDFRAME_DELETE_COMPONENT:
868 case RAIDFRAME_INCORPORATE_HOT_SPARE:
869 if ((rs->sc_flags & RAIDF_INITED) == 0)
870 return (ENXIO);
871 }
872
873 switch (cmd) {
874
875 /* configure the system */
876 case RAIDFRAME_CONFIGURE:
877
878 if (raidPtr->valid) {
879 /* There is a valid RAID set running on this unit! */
880 printf("raid%d: Device already configured!\n",unit);
881 return(EINVAL);
882 }
883
884 /* copy-in the configuration information */
885 /* data points to a pointer to the configuration structure */
886
887 u_cfg = *((RF_Config_t **) data);
888 RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
889 if (k_cfg == NULL) {
890 return (ENOMEM);
891 }
892 retcode = copyin((caddr_t) u_cfg, (caddr_t) k_cfg,
893 sizeof(RF_Config_t));
894 if (retcode) {
895 RF_Free(k_cfg, sizeof(RF_Config_t));
896 db1_printf(("rf_ioctl: retcode=%d copyin.1\n",
897 retcode));
898 return (retcode);
899 }
900 /* allocate a buffer for the layout-specific data, and copy it
901 * in */
902 if (k_cfg->layoutSpecificSize) {
903 if (k_cfg->layoutSpecificSize > 10000) {
904 /* sanity check */
905 RF_Free(k_cfg, sizeof(RF_Config_t));
906 return (EINVAL);
907 }
908 RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
909 (u_char *));
910 if (specific_buf == NULL) {
911 RF_Free(k_cfg, sizeof(RF_Config_t));
912 return (ENOMEM);
913 }
914 retcode = copyin(k_cfg->layoutSpecific,
915 (caddr_t) specific_buf,
916 k_cfg->layoutSpecificSize);
917 if (retcode) {
918 RF_Free(k_cfg, sizeof(RF_Config_t));
919 RF_Free(specific_buf,
920 k_cfg->layoutSpecificSize);
921 db1_printf(("rf_ioctl: retcode=%d copyin.2\n",
922 retcode));
923 return (retcode);
924 }
925 } else
926 specific_buf = NULL;
927 k_cfg->layoutSpecific = specific_buf;
928
929 /* should do some kind of sanity check on the configuration.
930 * Store the sum of all the bytes in the last byte? */
931
932 /* configure the system */
933
934 /*
935 * Clear the entire RAID descriptor, just to make sure
936 * there is no stale data left in the case of a
937 * reconfiguration
938 */
939 memset((char *) raidPtr, 0, sizeof(RF_Raid_t));
940 raidPtr->raidid = unit;
941
942 retcode = rf_Configure(raidPtr, k_cfg, NULL);
943
944 if (retcode == 0) {
945
946 /* allow this many simultaneous IO's to
947 this RAID device */
948 raidPtr->openings = RAIDOUTSTANDING;
949
950 raidinit(raidPtr);
951 rf_markalldirty(raidPtr);
952 }
953 /* free the buffers. No return code here. */
954 if (k_cfg->layoutSpecificSize) {
955 RF_Free(specific_buf, k_cfg->layoutSpecificSize);
956 }
957 RF_Free(k_cfg, sizeof(RF_Config_t));
958
959 return (retcode);
960
961 /* shutdown the system */
962 case RAIDFRAME_SHUTDOWN:
963
964 if ((error = raidlock(rs)) != 0)
965 return (error);
966
967 /*
968 * If somebody has a partition mounted, we shouldn't
969 * shutdown.
970 */
971
972 part = DISKPART(dev);
973 pmask = (1 << part);
974 if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
975 ((rs->sc_dkdev.dk_bopenmask & pmask) &&
976 (rs->sc_dkdev.dk_copenmask & pmask))) {
977 raidunlock(rs);
978 return (EBUSY);
979 }
980
981 retcode = rf_Shutdown(raidPtr);
982
983 /* It's no longer initialized... */
984 rs->sc_flags &= ~RAIDF_INITED;
985
986 /* Detach the disk. */
987 disk_detach(&rs->sc_dkdev);
988
989 raidunlock(rs);
990
991 return (retcode);
992 case RAIDFRAME_GET_COMPONENT_LABEL:
993 clabel_ptr = (RF_ComponentLabel_t **) data;
994 /* need to read the component label for the disk indicated
995 by row,column in clabel */
996
997 /* For practice, let's get it directly fromdisk, rather
998 than from the in-core copy */
999 RF_Malloc( clabel, sizeof( RF_ComponentLabel_t ),
1000 (RF_ComponentLabel_t *));
1001 if (clabel == NULL)
1002 return (ENOMEM);
1003
1004 memset((char *) clabel, 0, sizeof(RF_ComponentLabel_t));
1005
1006 retcode = copyin( *clabel_ptr, clabel,
1007 sizeof(RF_ComponentLabel_t));
1008
1009 if (retcode) {
1010 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
1011 return(retcode);
1012 }
1013
1014 row = clabel->row;
1015 column = clabel->column;
1016
1017 if ((row < 0) || (row >= raidPtr->numRow) ||
1018 (column < 0) || (column >= raidPtr->numCol +
1019 raidPtr->numSpare)) {
1020 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
1021 return(EINVAL);
1022 }
1023
1024 raidread_component_label(raidPtr->Disks[row][column].dev,
1025 raidPtr->raid_cinfo[row][column].ci_vp,
1026 clabel );
1027
1028 retcode = copyout((caddr_t) clabel,
1029 (caddr_t) *clabel_ptr,
1030 sizeof(RF_ComponentLabel_t));
1031 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
1032 return (retcode);
1033
1034 case RAIDFRAME_SET_COMPONENT_LABEL:
1035 clabel = (RF_ComponentLabel_t *) data;
1036
1037 /* XXX check the label for valid stuff... */
1038 /* Note that some things *should not* get modified --
1039 the user should be re-initing the labels instead of
1040 trying to patch things.
1041 */
1042
1043 printf("Got component label:\n");
1044 printf("Version: %d\n",clabel->version);
1045 printf("Serial Number: %d\n",clabel->serial_number);
1046 printf("Mod counter: %d\n",clabel->mod_counter);
1047 printf("Row: %d\n", clabel->row);
1048 printf("Column: %d\n", clabel->column);
1049 printf("Num Rows: %d\n", clabel->num_rows);
1050 printf("Num Columns: %d\n", clabel->num_columns);
1051 printf("Clean: %d\n", clabel->clean);
1052 printf("Status: %d\n", clabel->status);
1053
1054 row = clabel->row;
1055 column = clabel->column;
1056
1057 if ((row < 0) || (row >= raidPtr->numRow) ||
1058 (column < 0) || (column >= raidPtr->numCol)) {
1059 return(EINVAL);
1060 }
1061
1062 /* XXX this isn't allowed to do anything for now :-) */
1063
1064 /* XXX and before it is, we need to fill in the rest
1065 of the fields!?!?!?! */
1066 #if 0
1067 raidwrite_component_label(
1068 raidPtr->Disks[row][column].dev,
1069 raidPtr->raid_cinfo[row][column].ci_vp,
1070 clabel );
1071 #endif
1072 return (0);
1073
1074 case RAIDFRAME_INIT_LABELS:
1075 clabel = (RF_ComponentLabel_t *) data;
1076 /*
1077 we only want the serial number from
1078 the above. We get all the rest of the information
1079 from the config that was used to create this RAID
1080 set.
1081 */
1082
1083 raidPtr->serial_number = clabel->serial_number;
1084
1085 raid_init_component_label(raidPtr, &ci_label);
1086 ci_label.serial_number = clabel->serial_number;
1087
1088 for(row=0;row<raidPtr->numRow;row++) {
1089 ci_label.row = row;
1090 for(column=0;column<raidPtr->numCol;column++) {
1091 diskPtr = &raidPtr->Disks[row][column];
1092 if (!RF_DEAD_DISK(diskPtr->status)) {
1093 ci_label.partitionSize = diskPtr->partitionSize;
1094 ci_label.column = column;
1095 raidwrite_component_label(
1096 raidPtr->Disks[row][column].dev,
1097 raidPtr->raid_cinfo[row][column].ci_vp,
1098 &ci_label );
1099 }
1100 }
1101 }
1102
1103 return (retcode);
1104 case RAIDFRAME_SET_AUTOCONFIG:
1105 d = rf_set_autoconfig(raidPtr, *(int *) data);
1106 printf("New autoconfig value is: %d\n", d);
1107 *(int *) data = d;
1108 return (retcode);
1109
1110 case RAIDFRAME_SET_ROOT:
1111 d = rf_set_rootpartition(raidPtr, *(int *) data);
1112 printf("New rootpartition value is: %d\n", d);
1113 *(int *) data = d;
1114 return (retcode);
1115
1116 /* initialize all parity */
1117 case RAIDFRAME_REWRITEPARITY:
1118
1119 if (raidPtr->Layout.map->faultsTolerated == 0) {
1120 /* Parity for RAID 0 is trivially correct */
1121 raidPtr->parity_good = RF_RAID_CLEAN;
1122 return(0);
1123 }
1124
1125 if (raidPtr->parity_rewrite_in_progress == 1) {
1126 /* Re-write is already in progress! */
1127 return(EINVAL);
1128 }
1129
1130 retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
1131 rf_RewriteParityThread,
1132 raidPtr,"raid_parity");
1133 return (retcode);
1134
1135
1136 case RAIDFRAME_ADD_HOT_SPARE:
1137 sparePtr = (RF_SingleComponent_t *) data;
1138 memcpy( &hot_spare, sparePtr, sizeof(RF_SingleComponent_t));
1139 retcode = rf_add_hot_spare(raidPtr, &hot_spare);
1140 return(retcode);
1141
1142 case RAIDFRAME_REMOVE_HOT_SPARE:
1143 return(retcode);
1144
1145 case RAIDFRAME_DELETE_COMPONENT:
1146 componentPtr = (RF_SingleComponent_t *)data;
1147 memcpy( &component, componentPtr,
1148 sizeof(RF_SingleComponent_t));
1149 retcode = rf_delete_component(raidPtr, &component);
1150 return(retcode);
1151
1152 case RAIDFRAME_INCORPORATE_HOT_SPARE:
1153 componentPtr = (RF_SingleComponent_t *)data;
1154 memcpy( &component, componentPtr,
1155 sizeof(RF_SingleComponent_t));
1156 retcode = rf_incorporate_hot_spare(raidPtr, &component);
1157 return(retcode);
1158
1159 case RAIDFRAME_REBUILD_IN_PLACE:
1160
1161 if (raidPtr->Layout.map->faultsTolerated == 0) {
1162 /* Can't do this on a RAID 0!! */
1163 return(EINVAL);
1164 }
1165
1166 if (raidPtr->recon_in_progress == 1) {
1167 /* a reconstruct is already in progress! */
1168 return(EINVAL);
1169 }
1170
1171 componentPtr = (RF_SingleComponent_t *) data;
1172 memcpy( &component, componentPtr,
1173 sizeof(RF_SingleComponent_t));
1174 row = component.row;
1175 column = component.column;
1176 printf("Rebuild: %d %d\n",row, column);
1177 if ((row < 0) || (row >= raidPtr->numRow) ||
1178 (column < 0) || (column >= raidPtr->numCol)) {
1179 return(EINVAL);
1180 }
1181
1182 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1183 if (rrcopy == NULL)
1184 return(ENOMEM);
1185
1186 rrcopy->raidPtr = (void *) raidPtr;
1187 rrcopy->row = row;
1188 rrcopy->col = column;
1189
1190 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1191 rf_ReconstructInPlaceThread,
1192 rrcopy,"raid_reconip");
1193 return(retcode);
1194
1195 case RAIDFRAME_GET_INFO:
1196 if (!raidPtr->valid)
1197 return (ENODEV);
1198 ucfgp = (RF_DeviceConfig_t **) data;
1199 RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t),
1200 (RF_DeviceConfig_t *));
1201 if (d_cfg == NULL)
1202 return (ENOMEM);
1203 memset((char *) d_cfg, 0, sizeof(RF_DeviceConfig_t));
1204 d_cfg->rows = raidPtr->numRow;
1205 d_cfg->cols = raidPtr->numCol;
1206 d_cfg->ndevs = raidPtr->numRow * raidPtr->numCol;
1207 if (d_cfg->ndevs >= RF_MAX_DISKS) {
1208 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1209 return (ENOMEM);
1210 }
1211 d_cfg->nspares = raidPtr->numSpare;
1212 if (d_cfg->nspares >= RF_MAX_DISKS) {
1213 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1214 return (ENOMEM);
1215 }
1216 d_cfg->maxqdepth = raidPtr->maxQueueDepth;
1217 d = 0;
1218 for (i = 0; i < d_cfg->rows; i++) {
1219 for (j = 0; j < d_cfg->cols; j++) {
1220 d_cfg->devs[d] = raidPtr->Disks[i][j];
1221 d++;
1222 }
1223 }
1224 for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) {
1225 d_cfg->spares[i] = raidPtr->Disks[0][j];
1226 }
1227 retcode = copyout((caddr_t) d_cfg, (caddr_t) * ucfgp,
1228 sizeof(RF_DeviceConfig_t));
1229 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1230
1231 return (retcode);
1232
1233 case RAIDFRAME_CHECK_PARITY:
1234 *(int *) data = raidPtr->parity_good;
1235 return (0);
1236
1237 case RAIDFRAME_RESET_ACCTOTALS:
1238 memset(&raidPtr->acc_totals, 0, sizeof(raidPtr->acc_totals));
1239 return (0);
1240
1241 case RAIDFRAME_GET_ACCTOTALS:
1242 totals = (RF_AccTotals_t *) data;
1243 *totals = raidPtr->acc_totals;
1244 return (0);
1245
1246 case RAIDFRAME_KEEP_ACCTOTALS:
1247 raidPtr->keep_acc_totals = *(int *)data;
1248 return (0);
1249
1250 case RAIDFRAME_GET_SIZE:
1251 *(int *) data = raidPtr->totalSectors;
1252 return (0);
1253
1254 /* fail a disk & optionally start reconstruction */
1255 case RAIDFRAME_FAIL_DISK:
1256
1257 if (raidPtr->Layout.map->faultsTolerated == 0) {
1258 /* Can't do this on a RAID 0!! */
1259 return(EINVAL);
1260 }
1261
1262 rr = (struct rf_recon_req *) data;
1263
1264 if (rr->row < 0 || rr->row >= raidPtr->numRow
1265 || rr->col < 0 || rr->col >= raidPtr->numCol)
1266 return (EINVAL);
1267
1268 printf("raid%d: Failing the disk: row: %d col: %d\n",
1269 unit, rr->row, rr->col);
1270
1271 /* make a copy of the recon request so that we don't rely on
1272 * the user's buffer */
1273 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1274 if (rrcopy == NULL)
1275 return(ENOMEM);
1276 memcpy(rrcopy, rr, sizeof(*rr));
1277 rrcopy->raidPtr = (void *) raidPtr;
1278
1279 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1280 rf_ReconThread,
1281 rrcopy,"raid_recon");
1282 return (0);
1283
1284 /* invoke a copyback operation after recon on whatever disk
1285 * needs it, if any */
1286 case RAIDFRAME_COPYBACK:
1287
1288 if (raidPtr->Layout.map->faultsTolerated == 0) {
1289 /* This makes no sense on a RAID 0!! */
1290 return(EINVAL);
1291 }
1292
1293 if (raidPtr->copyback_in_progress == 1) {
1294 /* Copyback is already in progress! */
1295 return(EINVAL);
1296 }
1297
1298 retcode = RF_CREATE_THREAD(raidPtr->copyback_thread,
1299 rf_CopybackThread,
1300 raidPtr,"raid_copyback");
1301 return (retcode);
1302
1303 /* return the percentage completion of reconstruction */
1304 case RAIDFRAME_CHECK_RECON_STATUS:
1305 if (raidPtr->Layout.map->faultsTolerated == 0) {
1306 /* This makes no sense on a RAID 0, so tell the
1307 user it's done. */
1308 *(int *) data = 100;
1309 return(0);
1310 }
1311 row = 0; /* XXX we only consider a single row... */
1312 if (raidPtr->status[row] != rf_rs_reconstructing)
1313 *(int *) data = 100;
1314 else
1315 *(int *) data = raidPtr->reconControl[row]->percentComplete;
1316 return (0);
1317 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
1318 progressInfoPtr = (RF_ProgressInfo_t **) data;
1319 row = 0; /* XXX we only consider a single row... */
1320 if (raidPtr->status[row] != rf_rs_reconstructing) {
1321 progressInfo.remaining = 0;
1322 progressInfo.completed = 100;
1323 progressInfo.total = 100;
1324 } else {
1325 progressInfo.total =
1326 raidPtr->reconControl[row]->numRUsTotal;
1327 progressInfo.completed =
1328 raidPtr->reconControl[row]->numRUsComplete;
1329 progressInfo.remaining = progressInfo.total -
1330 progressInfo.completed;
1331 }
1332 retcode = copyout((caddr_t) &progressInfo,
1333 (caddr_t) *progressInfoPtr,
1334 sizeof(RF_ProgressInfo_t));
1335 return (retcode);
1336
1337 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1338 if (raidPtr->Layout.map->faultsTolerated == 0) {
1339 /* This makes no sense on a RAID 0, so tell the
1340 user it's done. */
1341 *(int *) data = 100;
1342 return(0);
1343 }
1344 if (raidPtr->parity_rewrite_in_progress == 1) {
1345 *(int *) data = 100 *
1346 raidPtr->parity_rewrite_stripes_done /
1347 raidPtr->Layout.numStripe;
1348 } else {
1349 *(int *) data = 100;
1350 }
1351 return (0);
1352
1353 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
1354 progressInfoPtr = (RF_ProgressInfo_t **) data;
1355 if (raidPtr->parity_rewrite_in_progress == 1) {
1356 progressInfo.total = raidPtr->Layout.numStripe;
1357 progressInfo.completed =
1358 raidPtr->parity_rewrite_stripes_done;
1359 progressInfo.remaining = progressInfo.total -
1360 progressInfo.completed;
1361 } else {
1362 progressInfo.remaining = 0;
1363 progressInfo.completed = 100;
1364 progressInfo.total = 100;
1365 }
1366 retcode = copyout((caddr_t) &progressInfo,
1367 (caddr_t) *progressInfoPtr,
1368 sizeof(RF_ProgressInfo_t));
1369 return (retcode);
1370
1371 case RAIDFRAME_CHECK_COPYBACK_STATUS:
1372 if (raidPtr->Layout.map->faultsTolerated == 0) {
1373 /* This makes no sense on a RAID 0 */
1374 *(int *) data = 100;
1375 return(0);
1376 }
1377 if (raidPtr->copyback_in_progress == 1) {
1378 *(int *) data = 100 * raidPtr->copyback_stripes_done /
1379 raidPtr->Layout.numStripe;
1380 } else {
1381 *(int *) data = 100;
1382 }
1383 return (0);
1384
1385 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
1386 progressInfoPtr = (RF_ProgressInfo_t **) data;
1387 if (raidPtr->copyback_in_progress == 1) {
1388 progressInfo.total = raidPtr->Layout.numStripe;
1389 progressInfo.completed =
1390 raidPtr->copyback_stripes_done;
1391 progressInfo.remaining = progressInfo.total -
1392 progressInfo.completed;
1393 } else {
1394 progressInfo.remaining = 0;
1395 progressInfo.completed = 100;
1396 progressInfo.total = 100;
1397 }
1398 retcode = copyout((caddr_t) &progressInfo,
1399 (caddr_t) *progressInfoPtr,
1400 sizeof(RF_ProgressInfo_t));
1401 return (retcode);
1402
1403 /* the sparetable daemon calls this to wait for the kernel to
1404 * need a spare table. this ioctl does not return until a
1405 * spare table is needed. XXX -- calling mpsleep here in the
1406 * ioctl code is almost certainly wrong and evil. -- XXX XXX
1407 * -- I should either compute the spare table in the kernel,
1408 * or have a different -- XXX XXX -- interface (a different
1409 * character device) for delivering the table -- XXX */
1410 #if 0
1411 case RAIDFRAME_SPARET_WAIT:
1412 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1413 while (!rf_sparet_wait_queue)
1414 mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE);
1415 waitreq = rf_sparet_wait_queue;
1416 rf_sparet_wait_queue = rf_sparet_wait_queue->next;
1417 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1418
1419 /* structure assignment */
1420 *((RF_SparetWait_t *) data) = *waitreq;
1421
1422 RF_Free(waitreq, sizeof(*waitreq));
1423 return (0);
1424
1425 /* wakes up a process waiting on SPARET_WAIT and puts an error
1426 * code in it that will cause the dameon to exit */
1427 case RAIDFRAME_ABORT_SPARET_WAIT:
1428 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1429 waitreq->fcol = -1;
1430 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1431 waitreq->next = rf_sparet_wait_queue;
1432 rf_sparet_wait_queue = waitreq;
1433 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1434 wakeup(&rf_sparet_wait_queue);
1435 return (0);
1436
1437 /* used by the spare table daemon to deliver a spare table
1438 * into the kernel */
1439 case RAIDFRAME_SEND_SPARET:
1440
1441 /* install the spare table */
1442 retcode = rf_SetSpareTable(raidPtr, *(void **) data);
1443
1444 /* respond to the requestor. the return status of the spare
1445 * table installation is passed in the "fcol" field */
1446 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1447 waitreq->fcol = retcode;
1448 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1449 waitreq->next = rf_sparet_resp_queue;
1450 rf_sparet_resp_queue = waitreq;
1451 wakeup(&rf_sparet_resp_queue);
1452 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1453
1454 return (retcode);
1455 #endif
1456
1457 default:
1458 break; /* fall through to the os-specific code below */
1459
1460 }
1461
1462 if (!raidPtr->valid)
1463 return (EINVAL);
1464
1465 /*
1466 * Add support for "regular" device ioctls here.
1467 */
1468
1469 switch (cmd) {
1470 case DIOCGDINFO:
1471 *(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
1472 break;
1473 #ifdef __HAVE_OLD_DISKLABEL
1474 case ODIOCGDINFO:
1475 newlabel = *(rs->sc_dkdev.dk_label);
1476 if (newlabel.d_npartitions > OLDMAXPARTITIONS)
1477 return ENOTTY;
1478 memcpy(data, &newlabel, sizeof (struct olddisklabel));
1479 break;
1480 #endif
1481
1482 case DIOCGPART:
1483 ((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
1484 ((struct partinfo *) data)->part =
1485 &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
1486 break;
1487
1488 case DIOCWDINFO:
1489 case DIOCSDINFO:
1490 #ifdef __HAVE_OLD_DISKLABEL
1491 case ODIOCWDINFO:
1492 case ODIOCSDINFO:
1493 #endif
1494 {
1495 struct disklabel *lp;
1496 #ifdef __HAVE_OLD_DISKLABEL
1497 if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) {
1498 memset(&newlabel, 0, sizeof newlabel);
1499 memcpy(&newlabel, data, sizeof (struct olddisklabel));
1500 lp = &newlabel;
1501 } else
1502 #endif
1503 lp = (struct disklabel *)data;
1504
1505 if ((error = raidlock(rs)) != 0)
1506 return (error);
1507
1508 rs->sc_flags |= RAIDF_LABELLING;
1509
1510 error = setdisklabel(rs->sc_dkdev.dk_label,
1511 lp, 0, rs->sc_dkdev.dk_cpulabel);
1512 if (error == 0) {
1513 if (cmd == DIOCWDINFO
1514 #ifdef __HAVE_OLD_DISKLABEL
1515 || cmd == ODIOCWDINFO
1516 #endif
1517 )
1518 error = writedisklabel(RAIDLABELDEV(dev),
1519 raidstrategy, rs->sc_dkdev.dk_label,
1520 rs->sc_dkdev.dk_cpulabel);
1521 }
1522 rs->sc_flags &= ~RAIDF_LABELLING;
1523
1524 raidunlock(rs);
1525
1526 if (error)
1527 return (error);
1528 break;
1529 }
1530
1531 case DIOCWLABEL:
1532 if (*(int *) data != 0)
1533 rs->sc_flags |= RAIDF_WLABEL;
1534 else
1535 rs->sc_flags &= ~RAIDF_WLABEL;
1536 break;
1537
1538 case DIOCGDEFLABEL:
1539 raidgetdefaultlabel(raidPtr, rs, (struct disklabel *) data);
1540 break;
1541
1542 #ifdef __HAVE_OLD_DISKLABEL
1543 case ODIOCGDEFLABEL:
1544 raidgetdefaultlabel(raidPtr, rs, &newlabel);
1545 if (newlabel.d_npartitions > OLDMAXPARTITIONS)
1546 return ENOTTY;
1547 memcpy(data, &newlabel, sizeof (struct olddisklabel));
1548 break;
1549 #endif
1550
1551 default:
1552 retcode = ENOTTY;
1553 }
1554 return (retcode);
1555
1556 }
1557
1558
1559 /* raidinit -- complete the rest of the initialization for the
1560 RAIDframe device. */
1561
1562
1563 static void
1564 raidinit(raidPtr)
1565 RF_Raid_t *raidPtr;
1566 {
1567 struct raid_softc *rs;
1568 int unit;
1569
1570 unit = raidPtr->raidid;
1571
1572 rs = &raid_softc[unit];
1573
1574 /* XXX should check return code first... */
1575 rs->sc_flags |= RAIDF_INITED;
1576
1577 sprintf(rs->sc_xname, "raid%d", unit); /* XXX doesn't check bounds. */
1578
1579 rs->sc_dkdev.dk_name = rs->sc_xname;
1580
1581 /* disk_attach actually creates space for the CPU disklabel, among
1582 * other things, so it's critical to call this *BEFORE* we try putzing
1583 * with disklabels. */
1584
1585 disk_attach(&rs->sc_dkdev);
1586
1587 /* XXX There may be a weird interaction here between this, and
1588 * protectedSectors, as used in RAIDframe. */
1589
1590 rs->sc_size = raidPtr->totalSectors;
1591
1592 }
1593
1594 /* wake up the daemon & tell it to get us a spare table
1595 * XXX
1596 * the entries in the queues should be tagged with the raidPtr
1597 * so that in the extremely rare case that two recons happen at once,
1598 * we know for which device were requesting a spare table
1599 * XXX
1600 *
1601 * XXX This code is not currently used. GO
1602 */
1603 int
1604 rf_GetSpareTableFromDaemon(req)
1605 RF_SparetWait_t *req;
1606 {
1607 int retcode;
1608
1609 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1610 req->next = rf_sparet_wait_queue;
1611 rf_sparet_wait_queue = req;
1612 wakeup(&rf_sparet_wait_queue);
1613
1614 /* mpsleep unlocks the mutex */
1615 while (!rf_sparet_resp_queue) {
1616 tsleep(&rf_sparet_resp_queue, PRIBIO,
1617 "raidframe getsparetable", 0);
1618 }
1619 req = rf_sparet_resp_queue;
1620 rf_sparet_resp_queue = req->next;
1621 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1622
1623 retcode = req->fcol;
1624 RF_Free(req, sizeof(*req)); /* this is not the same req as we
1625 * alloc'd */
1626 return (retcode);
1627 }
1628
1629 /* a wrapper around rf_DoAccess that extracts appropriate info from the
1630 * bp & passes it down.
1631 * any calls originating in the kernel must use non-blocking I/O
1632 * do some extra sanity checking to return "appropriate" error values for
1633 * certain conditions (to make some standard utilities work)
1634 *
1635 * Formerly known as: rf_DoAccessKernel
1636 */
1637 void
1638 raidstart(raidPtr)
1639 RF_Raid_t *raidPtr;
1640 {
1641 RF_SectorCount_t num_blocks, pb, sum;
1642 RF_RaidAddr_t raid_addr;
1643 int retcode;
1644 struct partition *pp;
1645 daddr_t blocknum;
1646 int unit;
1647 struct raid_softc *rs;
1648 int do_async;
1649 struct buf *bp;
1650
1651 unit = raidPtr->raidid;
1652 rs = &raid_softc[unit];
1653
1654 /* quick check to see if anything has died recently */
1655 RF_LOCK_MUTEX(raidPtr->mutex);
1656 if (raidPtr->numNewFailures > 0) {
1657 rf_update_component_labels(raidPtr,
1658 RF_NORMAL_COMPONENT_UPDATE);
1659 raidPtr->numNewFailures--;
1660 }
1661 RF_UNLOCK_MUTEX(raidPtr->mutex);
1662
1663 /* Check to see if we're at the limit... */
1664 RF_LOCK_MUTEX(raidPtr->mutex);
1665 while (raidPtr->openings > 0) {
1666 RF_UNLOCK_MUTEX(raidPtr->mutex);
1667
1668 /* get the next item, if any, from the queue */
1669 if ((bp = BUFQ_FIRST(&rs->buf_queue)) == NULL) {
1670 /* nothing more to do */
1671 return;
1672 }
1673 BUFQ_REMOVE(&rs->buf_queue, bp);
1674
1675 /* Ok, for the bp we have here, bp->b_blkno is relative to the
1676 * partition.. Need to make it absolute to the underlying
1677 * device.. */
1678
1679 blocknum = bp->b_blkno;
1680 if (DISKPART(bp->b_dev) != RAW_PART) {
1681 pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
1682 blocknum += pp->p_offset;
1683 }
1684
1685 db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
1686 (int) blocknum));
1687
1688 db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
1689 db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
1690
1691 /* *THIS* is where we adjust what block we're going to...
1692 * but DO NOT TOUCH bp->b_blkno!!! */
1693 raid_addr = blocknum;
1694
1695 num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
1696 pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
1697 sum = raid_addr + num_blocks + pb;
1698 if (1 || rf_debugKernelAccess) {
1699 db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
1700 (int) raid_addr, (int) sum, (int) num_blocks,
1701 (int) pb, (int) bp->b_resid));
1702 }
1703 if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
1704 || (sum < num_blocks) || (sum < pb)) {
1705 bp->b_error = ENOSPC;
1706 bp->b_flags |= B_ERROR;
1707 bp->b_resid = bp->b_bcount;
1708 biodone(bp);
1709 RF_LOCK_MUTEX(raidPtr->mutex);
1710 continue;
1711 }
1712 /*
1713 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
1714 */
1715
1716 if (bp->b_bcount & raidPtr->sectorMask) {
1717 bp->b_error = EINVAL;
1718 bp->b_flags |= B_ERROR;
1719 bp->b_resid = bp->b_bcount;
1720 biodone(bp);
1721 RF_LOCK_MUTEX(raidPtr->mutex);
1722 continue;
1723
1724 }
1725 db1_printf(("Calling DoAccess..\n"));
1726
1727
1728 RF_LOCK_MUTEX(raidPtr->mutex);
1729 raidPtr->openings--;
1730 RF_UNLOCK_MUTEX(raidPtr->mutex);
1731
1732 /*
1733 * Everything is async.
1734 */
1735 do_async = 1;
1736
1737 disk_busy(&rs->sc_dkdev);
1738
1739 /* XXX we're still at splbio() here... do we *really*
1740 need to be? */
1741
1742 /* don't ever condition on bp->b_flags & B_WRITE.
1743 * always condition on B_READ instead */
1744
1745 retcode = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
1746 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
1747 do_async, raid_addr, num_blocks,
1748 bp->b_data, bp, RF_DAG_NONBLOCKING_IO);
1749
1750 RF_LOCK_MUTEX(raidPtr->mutex);
1751 }
1752 RF_UNLOCK_MUTEX(raidPtr->mutex);
1753 }
1754
1755
1756
1757
1758 /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */
1759
1760 int
1761 rf_DispatchKernelIO(queue, req)
1762 RF_DiskQueue_t *queue;
1763 RF_DiskQueueData_t *req;
1764 {
1765 int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
1766 struct buf *bp;
1767 struct raidbuf *raidbp = NULL;
1768 struct raid_softc *rs;
1769 int unit;
1770 int s;
1771
1772 s=0;
1773 /* s = splbio();*/ /* want to test this */
1774 /* XXX along with the vnode, we also need the softc associated with
1775 * this device.. */
1776
1777 req->queue = queue;
1778
1779 unit = queue->raidPtr->raidid;
1780
1781 db1_printf(("DispatchKernelIO unit: %d\n", unit));
1782
1783 if (unit >= numraid) {
1784 printf("Invalid unit number: %d %d\n", unit, numraid);
1785 panic("Invalid Unit number in rf_DispatchKernelIO\n");
1786 }
1787 rs = &raid_softc[unit];
1788
1789 bp = req->bp;
1790 #if 1
1791 /* XXX when there is a physical disk failure, someone is passing us a
1792 * buffer that contains old stuff!! Attempt to deal with this problem
1793 * without taking a performance hit... (not sure where the real bug
1794 * is. It's buried in RAIDframe somewhere) :-( GO ) */
1795
1796 if (bp->b_flags & B_ERROR) {
1797 bp->b_flags &= ~B_ERROR;
1798 }
1799 if (bp->b_error != 0) {
1800 bp->b_error = 0;
1801 }
1802 #endif
1803 raidbp = RAIDGETBUF(rs);
1804
1805 raidbp->rf_flags = 0; /* XXX not really used anywhere... */
1806
1807 /*
1808 * context for raidiodone
1809 */
1810 raidbp->rf_obp = bp;
1811 raidbp->req = req;
1812
1813 LIST_INIT(&raidbp->rf_buf.b_dep);
1814
1815 switch (req->type) {
1816 case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */
1817 /* XXX need to do something extra here.. */
1818 /* I'm leaving this in, as I've never actually seen it used,
1819 * and I'd like folks to report it... GO */
1820 printf(("WAKEUP CALLED\n"));
1821 queue->numOutstanding++;
1822
1823 /* XXX need to glue the original buffer into this?? */
1824
1825 KernelWakeupFunc(&raidbp->rf_buf);
1826 break;
1827
1828 case RF_IO_TYPE_READ:
1829 case RF_IO_TYPE_WRITE:
1830
1831 if (req->tracerec) {
1832 RF_ETIMER_START(req->tracerec->timer);
1833 }
1834 InitBP(&raidbp->rf_buf, queue->rf_cinfo->ci_vp,
1835 op | bp->b_flags, queue->rf_cinfo->ci_dev,
1836 req->sectorOffset, req->numSector,
1837 req->buf, KernelWakeupFunc, (void *) req,
1838 queue->raidPtr->logBytesPerSector, req->b_proc);
1839
1840 if (rf_debugKernelAccess) {
1841 db1_printf(("dispatch: bp->b_blkno = %ld\n",
1842 (long) bp->b_blkno));
1843 }
1844 queue->numOutstanding++;
1845 queue->last_deq_sector = req->sectorOffset;
1846 /* acc wouldn't have been let in if there were any pending
1847 * reqs at any other priority */
1848 queue->curPriority = req->priority;
1849
1850 db1_printf(("Going for %c to unit %d row %d col %d\n",
1851 req->type, unit, queue->row, queue->col));
1852 db1_printf(("sector %d count %d (%d bytes) %d\n",
1853 (int) req->sectorOffset, (int) req->numSector,
1854 (int) (req->numSector <<
1855 queue->raidPtr->logBytesPerSector),
1856 (int) queue->raidPtr->logBytesPerSector));
1857 if ((raidbp->rf_buf.b_flags & B_READ) == 0) {
1858 raidbp->rf_buf.b_vp->v_numoutput++;
1859 }
1860 VOP_STRATEGY(&raidbp->rf_buf);
1861
1862 break;
1863
1864 default:
1865 panic("bad req->type in rf_DispatchKernelIO");
1866 }
1867 db1_printf(("Exiting from DispatchKernelIO\n"));
1868 /* splx(s); */ /* want to test this */
1869 return (0);
1870 }
1871 /* this is the callback function associated with a I/O invoked from
1872 kernel code.
1873 */
1874 static void
1875 KernelWakeupFunc(vbp)
1876 struct buf *vbp;
1877 {
1878 RF_DiskQueueData_t *req = NULL;
1879 RF_DiskQueue_t *queue;
1880 struct raidbuf *raidbp = (struct raidbuf *) vbp;
1881 struct buf *bp;
1882 struct raid_softc *rs;
1883 int unit;
1884 int s;
1885
1886 s = splbio();
1887 db1_printf(("recovering the request queue:\n"));
1888 req = raidbp->req;
1889
1890 bp = raidbp->rf_obp;
1891
1892 queue = (RF_DiskQueue_t *) req->queue;
1893
1894 if (raidbp->rf_buf.b_flags & B_ERROR) {
1895 bp->b_flags |= B_ERROR;
1896 bp->b_error = raidbp->rf_buf.b_error ?
1897 raidbp->rf_buf.b_error : EIO;
1898 }
1899
1900 /* XXX methinks this could be wrong... */
1901 #if 1
1902 bp->b_resid = raidbp->rf_buf.b_resid;
1903 #endif
1904
1905 if (req->tracerec) {
1906 RF_ETIMER_STOP(req->tracerec->timer);
1907 RF_ETIMER_EVAL(req->tracerec->timer);
1908 RF_LOCK_MUTEX(rf_tracing_mutex);
1909 req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1910 req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1911 req->tracerec->num_phys_ios++;
1912 RF_UNLOCK_MUTEX(rf_tracing_mutex);
1913 }
1914 bp->b_bcount = raidbp->rf_buf.b_bcount; /* XXXX ?? */
1915
1916 unit = queue->raidPtr->raidid; /* *Much* simpler :-> */
1917
1918
1919 /* XXX Ok, let's get aggressive... If B_ERROR is set, let's go
1920 * ballistic, and mark the component as hosed... */
1921
1922 if (bp->b_flags & B_ERROR) {
1923 /* Mark the disk as dead */
1924 /* but only mark it once... */
1925 if (queue->raidPtr->Disks[queue->row][queue->col].status ==
1926 rf_ds_optimal) {
1927 printf("raid%d: IO Error. Marking %s as failed.\n",
1928 unit, queue->raidPtr->Disks[queue->row][queue->col].devname);
1929 queue->raidPtr->Disks[queue->row][queue->col].status =
1930 rf_ds_failed;
1931 queue->raidPtr->status[queue->row] = rf_rs_degraded;
1932 queue->raidPtr->numFailures++;
1933 queue->raidPtr->numNewFailures++;
1934 } else { /* Disk is already dead... */
1935 /* printf("Disk already marked as dead!\n"); */
1936 }
1937
1938 }
1939
1940 rs = &raid_softc[unit];
1941 RAIDPUTBUF(rs, raidbp);
1942
1943 rf_DiskIOComplete(queue, req, (bp->b_flags & B_ERROR) ? 1 : 0);
1944 (req->CompleteFunc) (req->argument, (bp->b_flags & B_ERROR) ? 1 : 0);
1945
1946 splx(s);
1947 }
1948
1949
1950
1951 /*
1952 * initialize a buf structure for doing an I/O in the kernel.
1953 */
1954 static void
1955 InitBP(bp, b_vp, rw_flag, dev, startSect, numSect, buf, cbFunc, cbArg,
1956 logBytesPerSector, b_proc)
1957 struct buf *bp;
1958 struct vnode *b_vp;
1959 unsigned rw_flag;
1960 dev_t dev;
1961 RF_SectorNum_t startSect;
1962 RF_SectorCount_t numSect;
1963 caddr_t buf;
1964 void (*cbFunc) (struct buf *);
1965 void *cbArg;
1966 int logBytesPerSector;
1967 struct proc *b_proc;
1968 {
1969 /* bp->b_flags = B_PHYS | rw_flag; */
1970 bp->b_flags = B_CALL | rw_flag; /* XXX need B_PHYS here too??? */
1971 bp->b_bcount = numSect << logBytesPerSector;
1972 bp->b_bufsize = bp->b_bcount;
1973 bp->b_error = 0;
1974 bp->b_dev = dev;
1975 bp->b_data = buf;
1976 bp->b_blkno = startSect;
1977 bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */
1978 if (bp->b_bcount == 0) {
1979 panic("bp->b_bcount is zero in InitBP!!\n");
1980 }
1981 bp->b_proc = b_proc;
1982 bp->b_iodone = cbFunc;
1983 bp->b_vp = b_vp;
1984
1985 }
1986
1987 static void
1988 raidgetdefaultlabel(raidPtr, rs, lp)
1989 RF_Raid_t *raidPtr;
1990 struct raid_softc *rs;
1991 struct disklabel *lp;
1992 {
1993 db1_printf(("Building a default label...\n"));
1994 memset(lp, 0, sizeof(*lp));
1995
1996 /* fabricate a label... */
1997 lp->d_secperunit = raidPtr->totalSectors;
1998 lp->d_secsize = raidPtr->bytesPerSector;
1999 lp->d_nsectors = raidPtr->Layout.dataSectorsPerStripe;
2000 lp->d_ntracks = 4 * raidPtr->numCol;
2001 lp->d_ncylinders = raidPtr->totalSectors /
2002 (lp->d_nsectors * lp->d_ntracks);
2003 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
2004
2005 strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
2006 lp->d_type = DTYPE_RAID;
2007 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
2008 lp->d_rpm = 3600;
2009 lp->d_interleave = 1;
2010 lp->d_flags = 0;
2011
2012 lp->d_partitions[RAW_PART].p_offset = 0;
2013 lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors;
2014 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
2015 lp->d_npartitions = RAW_PART + 1;
2016
2017 lp->d_magic = DISKMAGIC;
2018 lp->d_magic2 = DISKMAGIC;
2019 lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
2020
2021 }
2022 /*
2023 * Read the disklabel from the raid device. If one is not present, fake one
2024 * up.
2025 */
2026 static void
2027 raidgetdisklabel(dev)
2028 dev_t dev;
2029 {
2030 int unit = raidunit(dev);
2031 struct raid_softc *rs = &raid_softc[unit];
2032 char *errstring;
2033 struct disklabel *lp = rs->sc_dkdev.dk_label;
2034 struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel;
2035 RF_Raid_t *raidPtr;
2036
2037 db1_printf(("Getting the disklabel...\n"));
2038
2039 memset(clp, 0, sizeof(*clp));
2040
2041 raidPtr = raidPtrs[unit];
2042
2043 raidgetdefaultlabel(raidPtr, rs, lp);
2044
2045 /*
2046 * Call the generic disklabel extraction routine.
2047 */
2048 errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
2049 rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
2050 if (errstring)
2051 raidmakedisklabel(rs);
2052 else {
2053 int i;
2054 struct partition *pp;
2055
2056 /*
2057 * Sanity check whether the found disklabel is valid.
2058 *
2059 * This is necessary since total size of the raid device
2060 * may vary when an interleave is changed even though exactly
2061 * same componets are used, and old disklabel may used
2062 * if that is found.
2063 */
2064 if (lp->d_secperunit != rs->sc_size)
2065 printf("WARNING: %s: "
2066 "total sector size in disklabel (%d) != "
2067 "the size of raid (%ld)\n", rs->sc_xname,
2068 lp->d_secperunit, (long) rs->sc_size);
2069 for (i = 0; i < lp->d_npartitions; i++) {
2070 pp = &lp->d_partitions[i];
2071 if (pp->p_offset + pp->p_size > rs->sc_size)
2072 printf("WARNING: %s: end of partition `%c' "
2073 "exceeds the size of raid (%ld)\n",
2074 rs->sc_xname, 'a' + i, (long) rs->sc_size);
2075 }
2076 }
2077
2078 }
2079 /*
2080 * Take care of things one might want to take care of in the event
2081 * that a disklabel isn't present.
2082 */
2083 static void
2084 raidmakedisklabel(rs)
2085 struct raid_softc *rs;
2086 {
2087 struct disklabel *lp = rs->sc_dkdev.dk_label;
2088 db1_printf(("Making a label..\n"));
2089
2090 /*
2091 * For historical reasons, if there's no disklabel present
2092 * the raw partition must be marked FS_BSDFFS.
2093 */
2094
2095 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
2096
2097 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
2098
2099 lp->d_checksum = dkcksum(lp);
2100 }
2101 /*
2102 * Lookup the provided name in the filesystem. If the file exists,
2103 * is a valid block device, and isn't being used by anyone else,
2104 * set *vpp to the file's vnode.
2105 * You'll find the original of this in ccd.c
2106 */
2107 int
2108 raidlookup(path, p, vpp)
2109 char *path;
2110 struct proc *p;
2111 struct vnode **vpp; /* result */
2112 {
2113 struct nameidata nd;
2114 struct vnode *vp;
2115 struct vattr va;
2116 int error;
2117
2118 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, path, p);
2119 if ((error = vn_open(&nd, FREAD | FWRITE, 0)) != 0) {
2120 #ifdef DEBUG
2121 printf("RAIDframe: vn_open returned %d\n", error);
2122 #endif
2123 return (error);
2124 }
2125 vp = nd.ni_vp;
2126 if (vp->v_usecount > 1) {
2127 VOP_UNLOCK(vp, 0);
2128 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2129 return (EBUSY);
2130 }
2131 if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)) != 0) {
2132 VOP_UNLOCK(vp, 0);
2133 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2134 return (error);
2135 }
2136 /* XXX: eventually we should handle VREG, too. */
2137 if (va.va_type != VBLK) {
2138 VOP_UNLOCK(vp, 0);
2139 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2140 return (ENOTBLK);
2141 }
2142 VOP_UNLOCK(vp, 0);
2143 *vpp = vp;
2144 return (0);
2145 }
2146 /*
2147 * Wait interruptibly for an exclusive lock.
2148 *
2149 * XXX
2150 * Several drivers do this; it should be abstracted and made MP-safe.
2151 * (Hmm... where have we seen this warning before :-> GO )
2152 */
2153 static int
2154 raidlock(rs)
2155 struct raid_softc *rs;
2156 {
2157 int error;
2158
2159 while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
2160 rs->sc_flags |= RAIDF_WANTED;
2161 if ((error =
2162 tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
2163 return (error);
2164 }
2165 rs->sc_flags |= RAIDF_LOCKED;
2166 return (0);
2167 }
2168 /*
2169 * Unlock and wake up any waiters.
2170 */
2171 static void
2172 raidunlock(rs)
2173 struct raid_softc *rs;
2174 {
2175
2176 rs->sc_flags &= ~RAIDF_LOCKED;
2177 if ((rs->sc_flags & RAIDF_WANTED) != 0) {
2178 rs->sc_flags &= ~RAIDF_WANTED;
2179 wakeup(rs);
2180 }
2181 }
2182
2183
2184 #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */
2185 #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */
2186
2187 int
2188 raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter)
2189 {
2190 RF_ComponentLabel_t clabel;
2191 raidread_component_label(dev, b_vp, &clabel);
2192 clabel.mod_counter = mod_counter;
2193 clabel.clean = RF_RAID_CLEAN;
2194 raidwrite_component_label(dev, b_vp, &clabel);
2195 return(0);
2196 }
2197
2198
2199 int
2200 raidmarkdirty(dev_t dev, struct vnode *b_vp, int mod_counter)
2201 {
2202 RF_ComponentLabel_t clabel;
2203 raidread_component_label(dev, b_vp, &clabel);
2204 clabel.mod_counter = mod_counter;
2205 clabel.clean = RF_RAID_DIRTY;
2206 raidwrite_component_label(dev, b_vp, &clabel);
2207 return(0);
2208 }
2209
2210 /* ARGSUSED */
2211 int
2212 raidread_component_label(dev, b_vp, clabel)
2213 dev_t dev;
2214 struct vnode *b_vp;
2215 RF_ComponentLabel_t *clabel;
2216 {
2217 struct buf *bp;
2218 int error;
2219
2220 /* XXX should probably ensure that we don't try to do this if
2221 someone has changed rf_protected_sectors. */
2222
2223 if (b_vp == NULL) {
2224 /* For whatever reason, this component is not valid.
2225 Don't try to read a component label from it. */
2226 return(EINVAL);
2227 }
2228
2229 /* get a block of the appropriate size... */
2230 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2231 bp->b_dev = dev;
2232
2233 /* get our ducks in a row for the read */
2234 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2235 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2236 bp->b_flags |= B_READ;
2237 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2238
2239 (*bdevsw[major(bp->b_dev)].d_strategy)(bp);
2240
2241 error = biowait(bp);
2242
2243 if (!error) {
2244 memcpy(clabel, bp->b_data,
2245 sizeof(RF_ComponentLabel_t));
2246 #if 0
2247 rf_print_component_label( clabel );
2248 #endif
2249 } else {
2250 #if 0
2251 printf("Failed to read RAID component label!\n");
2252 #endif
2253 }
2254
2255 brelse(bp);
2256 return(error);
2257 }
2258 /* ARGSUSED */
2259 int
2260 raidwrite_component_label(dev, b_vp, clabel)
2261 dev_t dev;
2262 struct vnode *b_vp;
2263 RF_ComponentLabel_t *clabel;
2264 {
2265 struct buf *bp;
2266 int error;
2267
2268 /* get a block of the appropriate size... */
2269 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2270 bp->b_dev = dev;
2271
2272 /* get our ducks in a row for the write */
2273 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2274 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2275 bp->b_flags |= B_WRITE;
2276 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2277
2278 memset(bp->b_data, 0, RF_COMPONENT_INFO_SIZE );
2279
2280 memcpy(bp->b_data, clabel, sizeof(RF_ComponentLabel_t));
2281
2282 (*bdevsw[major(bp->b_dev)].d_strategy)(bp);
2283 error = biowait(bp);
2284 brelse(bp);
2285 if (error) {
2286 #if 1
2287 printf("Failed to write RAID component info!\n");
2288 #endif
2289 }
2290
2291 return(error);
2292 }
2293
2294 void
2295 rf_markalldirty(raidPtr)
2296 RF_Raid_t *raidPtr;
2297 {
2298 RF_ComponentLabel_t clabel;
2299 int r,c;
2300
2301 raidPtr->mod_counter++;
2302 for (r = 0; r < raidPtr->numRow; r++) {
2303 for (c = 0; c < raidPtr->numCol; c++) {
2304 /* we don't want to touch (at all) a disk that has
2305 failed */
2306 if (!RF_DEAD_DISK(raidPtr->Disks[r][c].status)) {
2307 raidread_component_label(
2308 raidPtr->Disks[r][c].dev,
2309 raidPtr->raid_cinfo[r][c].ci_vp,
2310 &clabel);
2311 if (clabel.status == rf_ds_spared) {
2312 /* XXX do something special...
2313 but whatever you do, don't
2314 try to access it!! */
2315 } else {
2316 #if 0
2317 clabel.status =
2318 raidPtr->Disks[r][c].status;
2319 raidwrite_component_label(
2320 raidPtr->Disks[r][c].dev,
2321 raidPtr->raid_cinfo[r][c].ci_vp,
2322 &clabel);
2323 #endif
2324 raidmarkdirty(
2325 raidPtr->Disks[r][c].dev,
2326 raidPtr->raid_cinfo[r][c].ci_vp,
2327 raidPtr->mod_counter);
2328 }
2329 }
2330 }
2331 }
2332 /* printf("Component labels marked dirty.\n"); */
2333 #if 0
2334 for( c = 0; c < raidPtr->numSpare ; c++) {
2335 sparecol = raidPtr->numCol + c;
2336 if (raidPtr->Disks[r][sparecol].status == rf_ds_used_spare) {
2337 /*
2338
2339 XXX this is where we get fancy and map this spare
2340 into it's correct spot in the array.
2341
2342 */
2343 /*
2344
2345 we claim this disk is "optimal" if it's
2346 rf_ds_used_spare, as that means it should be
2347 directly substitutable for the disk it replaced.
2348 We note that too...
2349
2350 */
2351
2352 for(i=0;i<raidPtr->numRow;i++) {
2353 for(j=0;j<raidPtr->numCol;j++) {
2354 if ((raidPtr->Disks[i][j].spareRow ==
2355 r) &&
2356 (raidPtr->Disks[i][j].spareCol ==
2357 sparecol)) {
2358 srow = r;
2359 scol = sparecol;
2360 break;
2361 }
2362 }
2363 }
2364
2365 raidread_component_label(
2366 raidPtr->Disks[r][sparecol].dev,
2367 raidPtr->raid_cinfo[r][sparecol].ci_vp,
2368 &clabel);
2369 /* make sure status is noted */
2370 clabel.version = RF_COMPONENT_LABEL_VERSION;
2371 clabel.mod_counter = raidPtr->mod_counter;
2372 clabel.serial_number = raidPtr->serial_number;
2373 clabel.row = srow;
2374 clabel.column = scol;
2375 clabel.num_rows = raidPtr->numRow;
2376 clabel.num_columns = raidPtr->numCol;
2377 clabel.clean = RF_RAID_DIRTY; /* changed in a bit*/
2378 clabel.status = rf_ds_optimal;
2379 raidwrite_component_label(
2380 raidPtr->Disks[r][sparecol].dev,
2381 raidPtr->raid_cinfo[r][sparecol].ci_vp,
2382 &clabel);
2383 raidmarkclean( raidPtr->Disks[r][sparecol].dev,
2384 raidPtr->raid_cinfo[r][sparecol].ci_vp);
2385 }
2386 }
2387
2388 #endif
2389 }
2390
2391
2392 void
2393 rf_update_component_labels(raidPtr, final)
2394 RF_Raid_t *raidPtr;
2395 int final;
2396 {
2397 RF_ComponentLabel_t clabel;
2398 int sparecol;
2399 int r,c;
2400 int i,j;
2401 int srow, scol;
2402
2403 srow = -1;
2404 scol = -1;
2405
2406 /* XXX should do extra checks to make sure things really are clean,
2407 rather than blindly setting the clean bit... */
2408
2409 raidPtr->mod_counter++;
2410
2411 for (r = 0; r < raidPtr->numRow; r++) {
2412 for (c = 0; c < raidPtr->numCol; c++) {
2413 if (raidPtr->Disks[r][c].status == rf_ds_optimal) {
2414 raidread_component_label(
2415 raidPtr->Disks[r][c].dev,
2416 raidPtr->raid_cinfo[r][c].ci_vp,
2417 &clabel);
2418 /* make sure status is noted */
2419 clabel.status = rf_ds_optimal;
2420 /* bump the counter */
2421 clabel.mod_counter = raidPtr->mod_counter;
2422
2423 raidwrite_component_label(
2424 raidPtr->Disks[r][c].dev,
2425 raidPtr->raid_cinfo[r][c].ci_vp,
2426 &clabel);
2427 if (final == RF_FINAL_COMPONENT_UPDATE) {
2428 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2429 raidmarkclean(
2430 raidPtr->Disks[r][c].dev,
2431 raidPtr->raid_cinfo[r][c].ci_vp,
2432 raidPtr->mod_counter);
2433 }
2434 }
2435 }
2436 /* else we don't touch it.. */
2437 }
2438 }
2439
2440 for( c = 0; c < raidPtr->numSpare ; c++) {
2441 sparecol = raidPtr->numCol + c;
2442 /* Need to ensure that the reconstruct actually completed! */
2443 if (raidPtr->Disks[0][sparecol].status == rf_ds_used_spare) {
2444 /*
2445
2446 we claim this disk is "optimal" if it's
2447 rf_ds_used_spare, as that means it should be
2448 directly substitutable for the disk it replaced.
2449 We note that too...
2450
2451 */
2452
2453 for(i=0;i<raidPtr->numRow;i++) {
2454 for(j=0;j<raidPtr->numCol;j++) {
2455 if ((raidPtr->Disks[i][j].spareRow ==
2456 0) &&
2457 (raidPtr->Disks[i][j].spareCol ==
2458 sparecol)) {
2459 srow = i;
2460 scol = j;
2461 break;
2462 }
2463 }
2464 }
2465
2466 /* XXX shouldn't *really* need this... */
2467 raidread_component_label(
2468 raidPtr->Disks[0][sparecol].dev,
2469 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2470 &clabel);
2471 /* make sure status is noted */
2472
2473 raid_init_component_label(raidPtr, &clabel);
2474
2475 clabel.mod_counter = raidPtr->mod_counter;
2476 clabel.row = srow;
2477 clabel.column = scol;
2478 clabel.status = rf_ds_optimal;
2479
2480 raidwrite_component_label(
2481 raidPtr->Disks[0][sparecol].dev,
2482 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2483 &clabel);
2484 if (final == RF_FINAL_COMPONENT_UPDATE) {
2485 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2486 raidmarkclean( raidPtr->Disks[0][sparecol].dev,
2487 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2488 raidPtr->mod_counter);
2489 }
2490 }
2491 }
2492 }
2493 /* printf("Component labels updated\n"); */
2494 }
2495
2496 void
2497 rf_close_component(raidPtr, vp, auto_configured)
2498 RF_Raid_t *raidPtr;
2499 struct vnode *vp;
2500 int auto_configured;
2501 {
2502 struct proc *p;
2503
2504 p = raidPtr->engine_thread;
2505
2506 if (vp != NULL) {
2507 if (auto_configured == 1) {
2508 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2509 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
2510 vput(vp);
2511
2512 } else {
2513 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2514 }
2515 } else {
2516 printf("vnode was NULL\n");
2517 }
2518 }
2519
2520
2521 void
2522 rf_UnconfigureVnodes(raidPtr)
2523 RF_Raid_t *raidPtr;
2524 {
2525 int r,c;
2526 struct proc *p;
2527 struct vnode *vp;
2528 int acd;
2529
2530
2531 /* We take this opportunity to close the vnodes like we should.. */
2532
2533 p = raidPtr->engine_thread;
2534
2535 for (r = 0; r < raidPtr->numRow; r++) {
2536 for (c = 0; c < raidPtr->numCol; c++) {
2537 printf("Closing vnode for row: %d col: %d\n", r, c);
2538 vp = raidPtr->raid_cinfo[r][c].ci_vp;
2539 acd = raidPtr->Disks[r][c].auto_configured;
2540 rf_close_component(raidPtr, vp, acd);
2541 raidPtr->raid_cinfo[r][c].ci_vp = NULL;
2542 raidPtr->Disks[r][c].auto_configured = 0;
2543 }
2544 }
2545 for (r = 0; r < raidPtr->numSpare; r++) {
2546 printf("Closing vnode for spare: %d\n", r);
2547 vp = raidPtr->raid_cinfo[0][raidPtr->numCol + r].ci_vp;
2548 acd = raidPtr->Disks[0][raidPtr->numCol + r].auto_configured;
2549 rf_close_component(raidPtr, vp, acd);
2550 raidPtr->raid_cinfo[0][raidPtr->numCol + r].ci_vp = NULL;
2551 raidPtr->Disks[0][raidPtr->numCol + r].auto_configured = 0;
2552 }
2553 }
2554
2555
2556 void
2557 rf_ReconThread(req)
2558 struct rf_recon_req *req;
2559 {
2560 int s;
2561 RF_Raid_t *raidPtr;
2562
2563 s = splbio();
2564 raidPtr = (RF_Raid_t *) req->raidPtr;
2565 raidPtr->recon_in_progress = 1;
2566
2567 rf_FailDisk((RF_Raid_t *) req->raidPtr, req->row, req->col,
2568 ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
2569
2570 /* XXX get rid of this! we don't need it at all.. */
2571 RF_Free(req, sizeof(*req));
2572
2573 raidPtr->recon_in_progress = 0;
2574 splx(s);
2575
2576 /* That's all... */
2577 kthread_exit(0); /* does not return */
2578 }
2579
2580 void
2581 rf_RewriteParityThread(raidPtr)
2582 RF_Raid_t *raidPtr;
2583 {
2584 int retcode;
2585 int s;
2586
2587 raidPtr->parity_rewrite_in_progress = 1;
2588 s = splbio();
2589 retcode = rf_RewriteParity(raidPtr);
2590 splx(s);
2591 if (retcode) {
2592 printf("raid%d: Error re-writing parity!\n",raidPtr->raidid);
2593 } else {
2594 /* set the clean bit! If we shutdown correctly,
2595 the clean bit on each component label will get
2596 set */
2597 raidPtr->parity_good = RF_RAID_CLEAN;
2598 }
2599 raidPtr->parity_rewrite_in_progress = 0;
2600
2601 /* Anyone waiting for us to stop? If so, inform them... */
2602 if (raidPtr->waitShutdown) {
2603 wakeup(&raidPtr->parity_rewrite_in_progress);
2604 }
2605
2606 /* That's all... */
2607 kthread_exit(0); /* does not return */
2608 }
2609
2610
2611 void
2612 rf_CopybackThread(raidPtr)
2613 RF_Raid_t *raidPtr;
2614 {
2615 int s;
2616
2617 raidPtr->copyback_in_progress = 1;
2618 s = splbio();
2619 rf_CopybackReconstructedData(raidPtr);
2620 splx(s);
2621 raidPtr->copyback_in_progress = 0;
2622
2623 /* That's all... */
2624 kthread_exit(0); /* does not return */
2625 }
2626
2627
2628 void
2629 rf_ReconstructInPlaceThread(req)
2630 struct rf_recon_req *req;
2631 {
2632 int retcode;
2633 int s;
2634 RF_Raid_t *raidPtr;
2635
2636 s = splbio();
2637 raidPtr = req->raidPtr;
2638 raidPtr->recon_in_progress = 1;
2639 retcode = rf_ReconstructInPlace(raidPtr, req->row, req->col);
2640 RF_Free(req, sizeof(*req));
2641 raidPtr->recon_in_progress = 0;
2642 splx(s);
2643
2644 /* That's all... */
2645 kthread_exit(0); /* does not return */
2646 }
2647
2648 void
2649 rf_mountroot_hook(dev)
2650 struct device *dev;
2651 {
2652
2653 }
2654
2655
2656 RF_AutoConfig_t *
2657 rf_find_raid_components()
2658 {
2659 struct devnametobdevmaj *dtobdm;
2660 struct vnode *vp;
2661 struct disklabel label;
2662 struct device *dv;
2663 char *cd_name;
2664 dev_t dev;
2665 int error;
2666 int i;
2667 int good_one;
2668 RF_ComponentLabel_t *clabel;
2669 RF_AutoConfig_t *ac_list;
2670 RF_AutoConfig_t *ac;
2671
2672
2673 /* initialize the AutoConfig list */
2674 ac_list = NULL;
2675
2676 /* we begin by trolling through *all* the devices on the system */
2677
2678 for (dv = alldevs.tqh_first; dv != NULL;
2679 dv = dv->dv_list.tqe_next) {
2680
2681 /* we are only interested in disks... */
2682 if (dv->dv_class != DV_DISK)
2683 continue;
2684
2685 /* we don't care about floppies... */
2686 if (!strcmp(dv->dv_cfdata->cf_driver->cd_name,"fd")) {
2687 continue;
2688 }
2689 /* hdfd is the Atari/Hades floppy driver */
2690 if (!strcmp(dv->dv_cfdata->cf_driver->cd_name,"hdfd")) {
2691 continue;
2692 }
2693
2694 /* need to find the device_name_to_block_device_major stuff */
2695 cd_name = dv->dv_cfdata->cf_driver->cd_name;
2696 dtobdm = dev_name2blk;
2697 while (dtobdm->d_name && strcmp(dtobdm->d_name, cd_name)) {
2698 dtobdm++;
2699 }
2700
2701 /* get a vnode for the raw partition of this disk */
2702
2703 dev = MAKEDISKDEV(dtobdm->d_maj, dv->dv_unit, RAW_PART);
2704 if (bdevvp(dev, &vp))
2705 panic("RAID can't alloc vnode");
2706
2707 error = VOP_OPEN(vp, FREAD, NOCRED, 0);
2708
2709 if (error) {
2710 /* "Who cares." Continue looking
2711 for something that exists*/
2712 vput(vp);
2713 continue;
2714 }
2715
2716 /* Ok, the disk exists. Go get the disklabel. */
2717 error = VOP_IOCTL(vp, DIOCGDINFO, (caddr_t)&label,
2718 FREAD, NOCRED, 0);
2719 if (error) {
2720 /*
2721 * XXX can't happen - open() would
2722 * have errored out (or faked up one)
2723 */
2724 printf("can't get label for dev %s%c (%d)!?!?\n",
2725 dv->dv_xname, 'a' + RAW_PART, error);
2726 }
2727
2728 /* don't need this any more. We'll allocate it again
2729 a little later if we really do... */
2730 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2731 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
2732 vput(vp);
2733
2734 for (i=0; i < label.d_npartitions; i++) {
2735 /* We only support partitions marked as RAID */
2736 if (label.d_partitions[i].p_fstype != FS_RAID)
2737 continue;
2738
2739 dev = MAKEDISKDEV(dtobdm->d_maj, dv->dv_unit, i);
2740 if (bdevvp(dev, &vp))
2741 panic("RAID can't alloc vnode");
2742
2743 error = VOP_OPEN(vp, FREAD, NOCRED, 0);
2744 if (error) {
2745 /* Whatever... */
2746 vput(vp);
2747 continue;
2748 }
2749
2750 good_one = 0;
2751
2752 clabel = (RF_ComponentLabel_t *)
2753 malloc(sizeof(RF_ComponentLabel_t),
2754 M_RAIDFRAME, M_NOWAIT);
2755 if (clabel == NULL) {
2756 /* XXX CLEANUP HERE */
2757 printf("RAID auto config: out of memory!\n");
2758 return(NULL); /* XXX probably should panic? */
2759 }
2760
2761 if (!raidread_component_label(dev, vp, clabel)) {
2762 /* Got the label. Does it look reasonable? */
2763 if (rf_reasonable_label(clabel) &&
2764 (clabel->partitionSize <=
2765 label.d_partitions[i].p_size)) {
2766 #if DEBUG
2767 printf("Component on: %s%c: %d\n",
2768 dv->dv_xname, 'a'+i,
2769 label.d_partitions[i].p_size);
2770 rf_print_component_label(clabel);
2771 #endif
2772 /* if it's reasonable, add it,
2773 else ignore it. */
2774 ac = (RF_AutoConfig_t *)
2775 malloc(sizeof(RF_AutoConfig_t),
2776 M_RAIDFRAME,
2777 M_NOWAIT);
2778 if (ac == NULL) {
2779 /* XXX should panic?? */
2780 return(NULL);
2781 }
2782
2783 sprintf(ac->devname, "%s%c",
2784 dv->dv_xname, 'a'+i);
2785 ac->dev = dev;
2786 ac->vp = vp;
2787 ac->clabel = clabel;
2788 ac->next = ac_list;
2789 ac_list = ac;
2790 good_one = 1;
2791 }
2792 }
2793 if (!good_one) {
2794 /* cleanup */
2795 free(clabel, M_RAIDFRAME);
2796 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2797 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
2798 vput(vp);
2799 }
2800 }
2801 }
2802 return(ac_list);
2803 }
2804
2805 static int
2806 rf_reasonable_label(clabel)
2807 RF_ComponentLabel_t *clabel;
2808 {
2809
2810 if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) ||
2811 (clabel->version==RF_COMPONENT_LABEL_VERSION)) &&
2812 ((clabel->clean == RF_RAID_CLEAN) ||
2813 (clabel->clean == RF_RAID_DIRTY)) &&
2814 clabel->row >=0 &&
2815 clabel->column >= 0 &&
2816 clabel->num_rows > 0 &&
2817 clabel->num_columns > 0 &&
2818 clabel->row < clabel->num_rows &&
2819 clabel->column < clabel->num_columns &&
2820 clabel->blockSize > 0 &&
2821 clabel->numBlocks > 0) {
2822 /* label looks reasonable enough... */
2823 return(1);
2824 }
2825 return(0);
2826 }
2827
2828
2829 void
2830 rf_print_component_label(clabel)
2831 RF_ComponentLabel_t *clabel;
2832 {
2833 printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
2834 clabel->row, clabel->column,
2835 clabel->num_rows, clabel->num_columns);
2836 printf(" Version: %d Serial Number: %d Mod Counter: %d\n",
2837 clabel->version, clabel->serial_number,
2838 clabel->mod_counter);
2839 printf(" Clean: %s Status: %d\n",
2840 clabel->clean ? "Yes" : "No", clabel->status );
2841 printf(" sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
2842 clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
2843 printf(" RAID Level: %c blocksize: %d numBlocks: %d\n",
2844 (char) clabel->parityConfig, clabel->blockSize,
2845 clabel->numBlocks);
2846 printf(" Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No" );
2847 printf(" Contains root partition: %s\n",
2848 clabel->root_partition ? "Yes" : "No" );
2849 printf(" Last configured as: raid%d\n", clabel->last_unit );
2850 #if 0
2851 printf(" Config order: %d\n", clabel->config_order);
2852 #endif
2853
2854 }
2855
2856 RF_ConfigSet_t *
2857 rf_create_auto_sets(ac_list)
2858 RF_AutoConfig_t *ac_list;
2859 {
2860 RF_AutoConfig_t *ac;
2861 RF_ConfigSet_t *config_sets;
2862 RF_ConfigSet_t *cset;
2863 RF_AutoConfig_t *ac_next;
2864
2865
2866 config_sets = NULL;
2867
2868 /* Go through the AutoConfig list, and figure out which components
2869 belong to what sets. */
2870 ac = ac_list;
2871 while(ac!=NULL) {
2872 /* we're going to putz with ac->next, so save it here
2873 for use at the end of the loop */
2874 ac_next = ac->next;
2875
2876 if (config_sets == NULL) {
2877 /* will need at least this one... */
2878 config_sets = (RF_ConfigSet_t *)
2879 malloc(sizeof(RF_ConfigSet_t),
2880 M_RAIDFRAME, M_NOWAIT);
2881 if (config_sets == NULL) {
2882 panic("rf_create_auto_sets: No memory!\n");
2883 }
2884 /* this one is easy :) */
2885 config_sets->ac = ac;
2886 config_sets->next = NULL;
2887 config_sets->rootable = 0;
2888 ac->next = NULL;
2889 } else {
2890 /* which set does this component fit into? */
2891 cset = config_sets;
2892 while(cset!=NULL) {
2893 if (rf_does_it_fit(cset, ac)) {
2894 /* looks like it matches... */
2895 ac->next = cset->ac;
2896 cset->ac = ac;
2897 break;
2898 }
2899 cset = cset->next;
2900 }
2901 if (cset==NULL) {
2902 /* didn't find a match above... new set..*/
2903 cset = (RF_ConfigSet_t *)
2904 malloc(sizeof(RF_ConfigSet_t),
2905 M_RAIDFRAME, M_NOWAIT);
2906 if (cset == NULL) {
2907 panic("rf_create_auto_sets: No memory!\n");
2908 }
2909 cset->ac = ac;
2910 ac->next = NULL;
2911 cset->next = config_sets;
2912 cset->rootable = 0;
2913 config_sets = cset;
2914 }
2915 }
2916 ac = ac_next;
2917 }
2918
2919
2920 return(config_sets);
2921 }
2922
2923 static int
2924 rf_does_it_fit(cset, ac)
2925 RF_ConfigSet_t *cset;
2926 RF_AutoConfig_t *ac;
2927 {
2928 RF_ComponentLabel_t *clabel1, *clabel2;
2929
2930 /* If this one matches the *first* one in the set, that's good
2931 enough, since the other members of the set would have been
2932 through here too... */
2933 /* note that we are not checking partitionSize here..
2934
2935 Note that we are also not checking the mod_counters here.
2936 If everything else matches execpt the mod_counter, that's
2937 good enough for this test. We will deal with the mod_counters
2938 a little later in the autoconfiguration process.
2939
2940 (clabel1->mod_counter == clabel2->mod_counter) &&
2941
2942 The reason we don't check for this is that failed disks
2943 will have lower modification counts. If those disks are
2944 not added to the set they used to belong to, then they will
2945 form their own set, which may result in 2 different sets,
2946 for example, competing to be configured at raid0, and
2947 perhaps competing to be the root filesystem set. If the
2948 wrong ones get configured, or both attempt to become /,
2949 weird behaviour and or serious lossage will occur. Thus we
2950 need to bring them into the fold here, and kick them out at
2951 a later point.
2952
2953 */
2954
2955 clabel1 = cset->ac->clabel;
2956 clabel2 = ac->clabel;
2957 if ((clabel1->version == clabel2->version) &&
2958 (clabel1->serial_number == clabel2->serial_number) &&
2959 (clabel1->num_rows == clabel2->num_rows) &&
2960 (clabel1->num_columns == clabel2->num_columns) &&
2961 (clabel1->sectPerSU == clabel2->sectPerSU) &&
2962 (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
2963 (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
2964 (clabel1->parityConfig == clabel2->parityConfig) &&
2965 (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
2966 (clabel1->blockSize == clabel2->blockSize) &&
2967 (clabel1->numBlocks == clabel2->numBlocks) &&
2968 (clabel1->autoconfigure == clabel2->autoconfigure) &&
2969 (clabel1->root_partition == clabel2->root_partition) &&
2970 (clabel1->last_unit == clabel2->last_unit) &&
2971 (clabel1->config_order == clabel2->config_order)) {
2972 /* if it get's here, it almost *has* to be a match */
2973 } else {
2974 /* it's not consistent with somebody in the set..
2975 punt */
2976 return(0);
2977 }
2978 /* all was fine.. it must fit... */
2979 return(1);
2980 }
2981
2982 int
2983 rf_have_enough_components(cset)
2984 RF_ConfigSet_t *cset;
2985 {
2986 RF_AutoConfig_t *ac;
2987 RF_AutoConfig_t *auto_config;
2988 RF_ComponentLabel_t *clabel;
2989 int r,c;
2990 int num_rows;
2991 int num_cols;
2992 int num_missing;
2993 int mod_counter;
2994 int mod_counter_found;
2995 int even_pair_failed;
2996 char parity_type;
2997
2998
2999 /* check to see that we have enough 'live' components
3000 of this set. If so, we can configure it if necessary */
3001
3002 num_rows = cset->ac->clabel->num_rows;
3003 num_cols = cset->ac->clabel->num_columns;
3004 parity_type = cset->ac->clabel->parityConfig;
3005
3006 /* XXX Check for duplicate components!?!?!? */
3007
3008 /* Determine what the mod_counter is supposed to be for this set. */
3009
3010 mod_counter_found = 0;
3011 mod_counter = 0;
3012 ac = cset->ac;
3013 while(ac!=NULL) {
3014 if (mod_counter_found==0) {
3015 mod_counter = ac->clabel->mod_counter;
3016 mod_counter_found = 1;
3017 } else {
3018 if (ac->clabel->mod_counter > mod_counter) {
3019 mod_counter = ac->clabel->mod_counter;
3020 }
3021 }
3022 ac = ac->next;
3023 }
3024
3025 num_missing = 0;
3026 auto_config = cset->ac;
3027
3028 for(r=0; r<num_rows; r++) {
3029 even_pair_failed = 0;
3030 for(c=0; c<num_cols; c++) {
3031 ac = auto_config;
3032 while(ac!=NULL) {
3033 if ((ac->clabel->row == r) &&
3034 (ac->clabel->column == c) &&
3035 (ac->clabel->mod_counter == mod_counter)) {
3036 /* it's this one... */
3037 #if DEBUG
3038 printf("Found: %s at %d,%d\n",
3039 ac->devname,r,c);
3040 #endif
3041 break;
3042 }
3043 ac=ac->next;
3044 }
3045 if (ac==NULL) {
3046 /* Didn't find one here! */
3047 /* special case for RAID 1, especially
3048 where there are more than 2
3049 components (where RAIDframe treats
3050 things a little differently :( ) */
3051 if (parity_type == '1') {
3052 if (c%2 == 0) { /* even component */
3053 even_pair_failed = 1;
3054 } else { /* odd component. If
3055 we're failed, and
3056 so is the even
3057 component, it's
3058 "Good Night, Charlie" */
3059 if (even_pair_failed == 1) {
3060 return(0);
3061 }
3062 }
3063 } else {
3064 /* normal accounting */
3065 num_missing++;
3066 }
3067 }
3068 if ((parity_type == '1') && (c%2 == 1)) {
3069 /* Just did an even component, and we didn't
3070 bail.. reset the even_pair_failed flag,
3071 and go on to the next component.... */
3072 even_pair_failed = 0;
3073 }
3074 }
3075 }
3076
3077 clabel = cset->ac->clabel;
3078
3079 if (((clabel->parityConfig == '0') && (num_missing > 0)) ||
3080 ((clabel->parityConfig == '4') && (num_missing > 1)) ||
3081 ((clabel->parityConfig == '5') && (num_missing > 1))) {
3082 /* XXX this needs to be made *much* more general */
3083 /* Too many failures */
3084 return(0);
3085 }
3086 /* otherwise, all is well, and we've got enough to take a kick
3087 at autoconfiguring this set */
3088 return(1);
3089 }
3090
3091 void
3092 rf_create_configuration(ac,config,raidPtr)
3093 RF_AutoConfig_t *ac;
3094 RF_Config_t *config;
3095 RF_Raid_t *raidPtr;
3096 {
3097 RF_ComponentLabel_t *clabel;
3098 int i;
3099
3100 clabel = ac->clabel;
3101
3102 /* 1. Fill in the common stuff */
3103 config->numRow = clabel->num_rows;
3104 config->numCol = clabel->num_columns;
3105 config->numSpare = 0; /* XXX should this be set here? */
3106 config->sectPerSU = clabel->sectPerSU;
3107 config->SUsPerPU = clabel->SUsPerPU;
3108 config->SUsPerRU = clabel->SUsPerRU;
3109 config->parityConfig = clabel->parityConfig;
3110 /* XXX... */
3111 strcpy(config->diskQueueType,"fifo");
3112 config->maxOutstandingDiskReqs = clabel->maxOutstanding;
3113 config->layoutSpecificSize = 0; /* XXX ?? */
3114
3115 while(ac!=NULL) {
3116 /* row/col values will be in range due to the checks
3117 in reasonable_label() */
3118 strcpy(config->devnames[ac->clabel->row][ac->clabel->column],
3119 ac->devname);
3120 ac = ac->next;
3121 }
3122
3123 for(i=0;i<RF_MAXDBGV;i++) {
3124 config->debugVars[i][0] = NULL;
3125 }
3126 }
3127
3128 int
3129 rf_set_autoconfig(raidPtr, new_value)
3130 RF_Raid_t *raidPtr;
3131 int new_value;
3132 {
3133 RF_ComponentLabel_t clabel;
3134 struct vnode *vp;
3135 dev_t dev;
3136 int row, column;
3137
3138 raidPtr->autoconfigure = new_value;
3139 for(row=0; row<raidPtr->numRow; row++) {
3140 for(column=0; column<raidPtr->numCol; column++) {
3141 if (raidPtr->Disks[row][column].status ==
3142 rf_ds_optimal) {
3143 dev = raidPtr->Disks[row][column].dev;
3144 vp = raidPtr->raid_cinfo[row][column].ci_vp;
3145 raidread_component_label(dev, vp, &clabel);
3146 clabel.autoconfigure = new_value;
3147 raidwrite_component_label(dev, vp, &clabel);
3148 }
3149 }
3150 }
3151 return(new_value);
3152 }
3153
3154 int
3155 rf_set_rootpartition(raidPtr, new_value)
3156 RF_Raid_t *raidPtr;
3157 int new_value;
3158 {
3159 RF_ComponentLabel_t clabel;
3160 struct vnode *vp;
3161 dev_t dev;
3162 int row, column;
3163
3164 raidPtr->root_partition = new_value;
3165 for(row=0; row<raidPtr->numRow; row++) {
3166 for(column=0; column<raidPtr->numCol; column++) {
3167 if (raidPtr->Disks[row][column].status ==
3168 rf_ds_optimal) {
3169 dev = raidPtr->Disks[row][column].dev;
3170 vp = raidPtr->raid_cinfo[row][column].ci_vp;
3171 raidread_component_label(dev, vp, &clabel);
3172 clabel.root_partition = new_value;
3173 raidwrite_component_label(dev, vp, &clabel);
3174 }
3175 }
3176 }
3177 return(new_value);
3178 }
3179
3180 void
3181 rf_release_all_vps(cset)
3182 RF_ConfigSet_t *cset;
3183 {
3184 RF_AutoConfig_t *ac;
3185
3186 ac = cset->ac;
3187 while(ac!=NULL) {
3188 /* Close the vp, and give it back */
3189 if (ac->vp) {
3190 vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY);
3191 VOP_CLOSE(ac->vp, FREAD, NOCRED, 0);
3192 vput(ac->vp);
3193 ac->vp = NULL;
3194 }
3195 ac = ac->next;
3196 }
3197 }
3198
3199
3200 void
3201 rf_cleanup_config_set(cset)
3202 RF_ConfigSet_t *cset;
3203 {
3204 RF_AutoConfig_t *ac;
3205 RF_AutoConfig_t *next_ac;
3206
3207 ac = cset->ac;
3208 while(ac!=NULL) {
3209 next_ac = ac->next;
3210 /* nuke the label */
3211 free(ac->clabel, M_RAIDFRAME);
3212 /* cleanup the config structure */
3213 free(ac, M_RAIDFRAME);
3214 /* "next.." */
3215 ac = next_ac;
3216 }
3217 /* and, finally, nuke the config set */
3218 free(cset, M_RAIDFRAME);
3219 }
3220
3221
3222 void
3223 raid_init_component_label(raidPtr, clabel)
3224 RF_Raid_t *raidPtr;
3225 RF_ComponentLabel_t *clabel;
3226 {
3227 /* current version number */
3228 clabel->version = RF_COMPONENT_LABEL_VERSION;
3229 clabel->serial_number = raidPtr->serial_number;
3230 clabel->mod_counter = raidPtr->mod_counter;
3231 clabel->num_rows = raidPtr->numRow;
3232 clabel->num_columns = raidPtr->numCol;
3233 clabel->clean = RF_RAID_DIRTY; /* not clean */
3234 clabel->status = rf_ds_optimal; /* "It's good!" */
3235
3236 clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
3237 clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
3238 clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
3239
3240 clabel->blockSize = raidPtr->bytesPerSector;
3241 clabel->numBlocks = raidPtr->sectorsPerDisk;
3242
3243 /* XXX not portable */
3244 clabel->parityConfig = raidPtr->Layout.map->parityConfig;
3245 clabel->maxOutstanding = raidPtr->maxOutstanding;
3246 clabel->autoconfigure = raidPtr->autoconfigure;
3247 clabel->root_partition = raidPtr->root_partition;
3248 clabel->last_unit = raidPtr->raidid;
3249 clabel->config_order = raidPtr->config_order;
3250 }
3251
3252 int
3253 rf_auto_config_set(cset,unit)
3254 RF_ConfigSet_t *cset;
3255 int *unit;
3256 {
3257 RF_Raid_t *raidPtr;
3258 RF_Config_t *config;
3259 int raidID;
3260 int retcode;
3261
3262 printf("RAID autoconfigure\n");
3263
3264 retcode = 0;
3265 *unit = -1;
3266
3267 /* 1. Create a config structure */
3268
3269 config = (RF_Config_t *)malloc(sizeof(RF_Config_t),
3270 M_RAIDFRAME,
3271 M_NOWAIT);
3272 if (config==NULL) {
3273 printf("Out of mem!?!?\n");
3274 /* XXX do something more intelligent here. */
3275 return(1);
3276 }
3277
3278 memset(config, 0, sizeof(RF_Config_t));
3279
3280 /* XXX raidID needs to be set correctly.. */
3281
3282 /*
3283 2. Figure out what RAID ID this one is supposed to live at
3284 See if we can get the same RAID dev that it was configured
3285 on last time..
3286 */
3287
3288 raidID = cset->ac->clabel->last_unit;
3289 if ((raidID < 0) || (raidID >= numraid)) {
3290 /* let's not wander off into lala land. */
3291 raidID = numraid - 1;
3292 }
3293 if (raidPtrs[raidID]->valid != 0) {
3294
3295 /*
3296 Nope... Go looking for an alternative...
3297 Start high so we don't immediately use raid0 if that's
3298 not taken.
3299 */
3300
3301 for(raidID = numraid - 1; raidID >= 0; raidID--) {
3302 if (raidPtrs[raidID]->valid == 0) {
3303 /* can use this one! */
3304 break;
3305 }
3306 }
3307 }
3308
3309 if (raidID < 0) {
3310 /* punt... */
3311 printf("Unable to auto configure this set!\n");
3312 printf("(Out of RAID devs!)\n");
3313 return(1);
3314 }
3315 printf("Configuring raid%d:\n",raidID);
3316 raidPtr = raidPtrs[raidID];
3317
3318 /* XXX all this stuff should be done SOMEWHERE ELSE! */
3319 raidPtr->raidid = raidID;
3320 raidPtr->openings = RAIDOUTSTANDING;
3321
3322 /* 3. Build the configuration structure */
3323 rf_create_configuration(cset->ac, config, raidPtr);
3324
3325 /* 4. Do the configuration */
3326 retcode = rf_Configure(raidPtr, config, cset->ac);
3327
3328 if (retcode == 0) {
3329
3330 raidinit(raidPtrs[raidID]);
3331
3332 rf_markalldirty(raidPtrs[raidID]);
3333 raidPtrs[raidID]->autoconfigure = 1; /* XXX do this here? */
3334 if (cset->ac->clabel->root_partition==1) {
3335 /* everything configured just fine. Make a note
3336 that this set is eligible to be root. */
3337 cset->rootable = 1;
3338 /* XXX do this here? */
3339 raidPtrs[raidID]->root_partition = 1;
3340 }
3341 }
3342
3343 /* 5. Cleanup */
3344 free(config, M_RAIDFRAME);
3345
3346 *unit = raidID;
3347 return(retcode);
3348 }
3349
3350 void
3351 rf_disk_unbusy(desc)
3352 RF_RaidAccessDesc_t *desc;
3353 {
3354 struct buf *bp;
3355
3356 bp = (struct buf *)desc->bp;
3357 disk_unbusy(&raid_softc[desc->raidPtr->raidid].sc_dkdev,
3358 (bp->b_bcount - bp->b_resid));
3359 }
3360