rf_netbsdkintf.c revision 1.128 1 /* $NetBSD: rf_netbsdkintf.c,v 1.128 2002/08/04 03:27:04 oster Exp $ */
2 /*-
3 * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
4 * All rights reserved.
5 *
6 * This code is derived from software contributed to The NetBSD Foundation
7 * by Greg Oster; Jason R. Thorpe.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed by the NetBSD
20 * Foundation, Inc. and its contributors.
21 * 4. Neither the name of The NetBSD Foundation nor the names of its
22 * contributors may be used to endorse or promote products derived
23 * from this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
26 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
36 */
37
38 /*
39 * Copyright (c) 1988 University of Utah.
40 * Copyright (c) 1990, 1993
41 * The Regents of the University of California. All rights reserved.
42 *
43 * This code is derived from software contributed to Berkeley by
44 * the Systems Programming Group of the University of Utah Computer
45 * Science Department.
46 *
47 * Redistribution and use in source and binary forms, with or without
48 * modification, are permitted provided that the following conditions
49 * are met:
50 * 1. Redistributions of source code must retain the above copyright
51 * notice, this list of conditions and the following disclaimer.
52 * 2. Redistributions in binary form must reproduce the above copyright
53 * notice, this list of conditions and the following disclaimer in the
54 * documentation and/or other materials provided with the distribution.
55 * 3. All advertising materials mentioning features or use of this software
56 * must display the following acknowledgement:
57 * This product includes software developed by the University of
58 * California, Berkeley and its contributors.
59 * 4. Neither the name of the University nor the names of its contributors
60 * may be used to endorse or promote products derived from this software
61 * without specific prior written permission.
62 *
63 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
64 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
65 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
66 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
67 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
68 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
69 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
70 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
71 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
72 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
73 * SUCH DAMAGE.
74 *
75 * from: Utah $Hdr: cd.c 1.6 90/11/28$
76 *
77 * @(#)cd.c 8.2 (Berkeley) 11/16/93
78 */
79
80
81
82
83 /*
84 * Copyright (c) 1995 Carnegie-Mellon University.
85 * All rights reserved.
86 *
87 * Authors: Mark Holland, Jim Zelenka
88 *
89 * Permission to use, copy, modify and distribute this software and
90 * its documentation is hereby granted, provided that both the copyright
91 * notice and this permission notice appear in all copies of the
92 * software, derivative works or modified versions, and any portions
93 * thereof, and that both notices appear in supporting documentation.
94 *
95 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
96 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
97 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
98 *
99 * Carnegie Mellon requests users of this software to return to
100 *
101 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
102 * School of Computer Science
103 * Carnegie Mellon University
104 * Pittsburgh PA 15213-3890
105 *
106 * any improvements or extensions that they make and grant Carnegie the
107 * rights to redistribute these changes.
108 */
109
110 /***********************************************************
111 *
112 * rf_kintf.c -- the kernel interface routines for RAIDframe
113 *
114 ***********************************************************/
115
116 #include <sys/cdefs.h>
117 __KERNEL_RCSID(0, "$NetBSD: rf_netbsdkintf.c,v 1.128 2002/08/04 03:27:04 oster Exp $");
118
119 #include <sys/param.h>
120 #include <sys/errno.h>
121 #include <sys/pool.h>
122 #include <sys/queue.h>
123 #include <sys/disk.h>
124 #include <sys/device.h>
125 #include <sys/stat.h>
126 #include <sys/ioctl.h>
127 #include <sys/fcntl.h>
128 #include <sys/systm.h>
129 #include <sys/namei.h>
130 #include <sys/vnode.h>
131 #include <sys/disklabel.h>
132 #include <sys/conf.h>
133 #include <sys/lock.h>
134 #include <sys/buf.h>
135 #include <sys/user.h>
136 #include <sys/reboot.h>
137
138 #include <dev/raidframe/raidframevar.h>
139 #include <dev/raidframe/raidframeio.h>
140 #include "raid.h"
141 #include "opt_raid_autoconfig.h"
142 #include "rf_raid.h"
143 #include "rf_copyback.h"
144 #include "rf_dag.h"
145 #include "rf_dagflags.h"
146 #include "rf_desc.h"
147 #include "rf_diskqueue.h"
148 #include "rf_etimer.h"
149 #include "rf_general.h"
150 #include "rf_kintf.h"
151 #include "rf_options.h"
152 #include "rf_driver.h"
153 #include "rf_parityscan.h"
154 #include "rf_threadstuff.h"
155
156 int rf_kdebug_level = 0;
157
158 #ifdef DEBUG
159 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
160 #else /* DEBUG */
161 #define db1_printf(a) { }
162 #endif /* DEBUG */
163
164 static RF_Raid_t **raidPtrs; /* global raid device descriptors */
165
166 RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex)
167
168 static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a
169 * spare table */
170 static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from
171 * installation process */
172
173 /* prototypes */
174 static void KernelWakeupFunc(struct buf * bp);
175 static void InitBP(struct buf * bp, struct vnode *, unsigned rw_flag,
176 dev_t dev, RF_SectorNum_t startSect,
177 RF_SectorCount_t numSect, caddr_t buf,
178 void (*cbFunc) (struct buf *), void *cbArg,
179 int logBytesPerSector, struct proc * b_proc);
180 static void raidinit(RF_Raid_t *);
181
182 void raidattach(int);
183 int raidsize(dev_t);
184 int raidopen(dev_t, int, int, struct proc *);
185 int raidclose(dev_t, int, int, struct proc *);
186 int raidioctl(dev_t, u_long, caddr_t, int, struct proc *);
187 int raidwrite(dev_t, struct uio *, int);
188 int raidread(dev_t, struct uio *, int);
189 void raidstrategy(struct buf *);
190 int raiddump(dev_t, daddr_t, caddr_t, size_t);
191
192 /*
193 * Pilfered from ccd.c
194 */
195
196 struct raidbuf {
197 struct buf rf_buf; /* new I/O buf. MUST BE FIRST!!! */
198 struct buf *rf_obp; /* ptr. to original I/O buf */
199 int rf_flags; /* misc. flags */
200 RF_DiskQueueData_t *req;/* the request that this was part of.. */
201 };
202
203 /* component buffer pool */
204 struct pool raidframe_cbufpool;
205
206 #define RAIDGETBUF(rs) pool_get(&raidframe_cbufpool, PR_NOWAIT)
207 #define RAIDPUTBUF(rs, cbp) pool_put(&raidframe_cbufpool, cbp)
208
209 /* XXX Not sure if the following should be replacing the raidPtrs above,
210 or if it should be used in conjunction with that...
211 */
212
213 struct raid_softc {
214 int sc_flags; /* flags */
215 int sc_cflags; /* configuration flags */
216 size_t sc_size; /* size of the raid device */
217 char sc_xname[20]; /* XXX external name */
218 struct disk sc_dkdev; /* generic disk device info */
219 struct bufq_state buf_queue; /* used for the device queue */
220 };
221 /* sc_flags */
222 #define RAIDF_INITED 0x01 /* unit has been initialized */
223 #define RAIDF_WLABEL 0x02 /* label area is writable */
224 #define RAIDF_LABELLING 0x04 /* unit is currently being labelled */
225 #define RAIDF_WANTED 0x40 /* someone is waiting to obtain a lock */
226 #define RAIDF_LOCKED 0x80 /* unit is locked */
227
228 #define raidunit(x) DISKUNIT(x)
229 int numraid = 0;
230
231 /*
232 * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
233 * Be aware that large numbers can allow the driver to consume a lot of
234 * kernel memory, especially on writes, and in degraded mode reads.
235 *
236 * For example: with a stripe width of 64 blocks (32k) and 5 disks,
237 * a single 64K write will typically require 64K for the old data,
238 * 64K for the old parity, and 64K for the new parity, for a total
239 * of 192K (if the parity buffer is not re-used immediately).
240 * Even it if is used immediately, that's still 128K, which when multiplied
241 * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
242 *
243 * Now in degraded mode, for example, a 64K read on the above setup may
244 * require data reconstruction, which will require *all* of the 4 remaining
245 * disks to participate -- 4 * 32K/disk == 128K again.
246 */
247
248 #ifndef RAIDOUTSTANDING
249 #define RAIDOUTSTANDING 6
250 #endif
251
252 #define RAIDLABELDEV(dev) \
253 (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
254
255 /* declared here, and made public, for the benefit of KVM stuff.. */
256 struct raid_softc *raid_softc;
257
258 static void raidgetdefaultlabel(RF_Raid_t *, struct raid_softc *,
259 struct disklabel *);
260 static void raidgetdisklabel(dev_t);
261 static void raidmakedisklabel(struct raid_softc *);
262
263 static int raidlock(struct raid_softc *);
264 static void raidunlock(struct raid_softc *);
265
266 static void rf_markalldirty(RF_Raid_t *);
267 void rf_mountroot_hook(struct device *);
268
269 struct device *raidrootdev;
270
271 void rf_ReconThread(struct rf_recon_req *);
272 /* XXX what I want is: */
273 /*void rf_ReconThread(RF_Raid_t *raidPtr); */
274 void rf_RewriteParityThread(RF_Raid_t *raidPtr);
275 void rf_CopybackThread(RF_Raid_t *raidPtr);
276 void rf_ReconstructInPlaceThread(struct rf_recon_req *);
277 void rf_buildroothack(void *);
278
279 RF_AutoConfig_t *rf_find_raid_components(void);
280 RF_ConfigSet_t *rf_create_auto_sets(RF_AutoConfig_t *);
281 static int rf_does_it_fit(RF_ConfigSet_t *,RF_AutoConfig_t *);
282 static int rf_reasonable_label(RF_ComponentLabel_t *);
283 void rf_create_configuration(RF_AutoConfig_t *,RF_Config_t *, RF_Raid_t *);
284 int rf_set_autoconfig(RF_Raid_t *, int);
285 int rf_set_rootpartition(RF_Raid_t *, int);
286 void rf_release_all_vps(RF_ConfigSet_t *);
287 void rf_cleanup_config_set(RF_ConfigSet_t *);
288 int rf_have_enough_components(RF_ConfigSet_t *);
289 int rf_auto_config_set(RF_ConfigSet_t *, int *);
290
291 static int raidautoconfig = 0; /* Debugging, mostly. Set to 0 to not
292 allow autoconfig to take place.
293 Note that this is overridden by having
294 RAID_AUTOCONFIG as an option in the
295 kernel config file. */
296
297 void
298 raidattach(num)
299 int num;
300 {
301 int raidID;
302 int i, rc;
303 RF_AutoConfig_t *ac_list; /* autoconfig list */
304 RF_ConfigSet_t *config_sets;
305
306 #ifdef DEBUG
307 printf("raidattach: Asked for %d units\n", num);
308 #endif
309
310 if (num <= 0) {
311 #ifdef DIAGNOSTIC
312 panic("raidattach: count <= 0");
313 #endif
314 return;
315 }
316 /* This is where all the initialization stuff gets done. */
317
318 numraid = num;
319
320 /* Make some space for requested number of units... */
321
322 RF_Calloc(raidPtrs, num, sizeof(RF_Raid_t *), (RF_Raid_t **));
323 if (raidPtrs == NULL) {
324 panic("raidPtrs is NULL!!\n");
325 }
326
327 /* Initialize the component buffer pool. */
328 pool_init(&raidframe_cbufpool, sizeof(struct raidbuf), 0,
329 0, 0, "raidpl", NULL);
330
331 rc = rf_mutex_init(&rf_sparet_wait_mutex);
332 if (rc) {
333 RF_PANIC();
334 }
335
336 rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
337
338 for (i = 0; i < num; i++)
339 raidPtrs[i] = NULL;
340 rc = rf_BootRaidframe();
341 if (rc == 0)
342 printf("Kernelized RAIDframe activated\n");
343 else
344 panic("Serious error booting RAID!!\n");
345
346 /* put together some datastructures like the CCD device does.. This
347 * lets us lock the device and what-not when it gets opened. */
348
349 raid_softc = (struct raid_softc *)
350 malloc(num * sizeof(struct raid_softc),
351 M_RAIDFRAME, M_NOWAIT);
352 if (raid_softc == NULL) {
353 printf("WARNING: no memory for RAIDframe driver\n");
354 return;
355 }
356
357 memset(raid_softc, 0, num * sizeof(struct raid_softc));
358
359 raidrootdev = (struct device *)malloc(num * sizeof(struct device),
360 M_RAIDFRAME, M_NOWAIT);
361 if (raidrootdev == NULL) {
362 panic("No memory for RAIDframe driver!!?!?!\n");
363 }
364
365 for (raidID = 0; raidID < num; raidID++) {
366 bufq_alloc(&raid_softc[raidID].buf_queue, BUFQ_FCFS);
367
368 raidrootdev[raidID].dv_class = DV_DISK;
369 raidrootdev[raidID].dv_cfdata = NULL;
370 raidrootdev[raidID].dv_unit = raidID;
371 raidrootdev[raidID].dv_parent = NULL;
372 raidrootdev[raidID].dv_flags = 0;
373 sprintf(raidrootdev[raidID].dv_xname,"raid%d",raidID);
374
375 RF_Calloc(raidPtrs[raidID], 1, sizeof(RF_Raid_t),
376 (RF_Raid_t *));
377 if (raidPtrs[raidID] == NULL) {
378 printf("WARNING: raidPtrs[%d] is NULL\n", raidID);
379 numraid = raidID;
380 return;
381 }
382 }
383
384 #ifdef RAID_AUTOCONFIG
385 raidautoconfig = 1;
386 #endif
387
388 if (raidautoconfig) {
389 /* 1. locate all RAID components on the system */
390
391 #if DEBUG
392 printf("Searching for raid components...\n");
393 #endif
394 ac_list = rf_find_raid_components();
395
396 /* 2. sort them into their respective sets */
397
398 config_sets = rf_create_auto_sets(ac_list);
399
400 /* 3. evaluate each set and configure the valid ones
401 This gets done in rf_buildroothack() */
402
403 /* schedule the creation of the thread to do the
404 "/ on RAID" stuff */
405
406 kthread_create(rf_buildroothack,config_sets);
407
408 #if 0
409 mountroothook_establish(rf_mountroot_hook, &raidrootdev[0]);
410 #endif
411 }
412
413 }
414
415 void
416 rf_buildroothack(arg)
417 void *arg;
418 {
419 RF_ConfigSet_t *config_sets = arg;
420 RF_ConfigSet_t *cset;
421 RF_ConfigSet_t *next_cset;
422 int retcode;
423 int raidID;
424 int rootID;
425 int num_root;
426
427 rootID = 0;
428 num_root = 0;
429 cset = config_sets;
430 while(cset != NULL ) {
431 next_cset = cset->next;
432 if (rf_have_enough_components(cset) &&
433 cset->ac->clabel->autoconfigure==1) {
434 retcode = rf_auto_config_set(cset,&raidID);
435 if (!retcode) {
436 if (cset->rootable) {
437 rootID = raidID;
438 num_root++;
439 }
440 } else {
441 /* The autoconfig didn't work :( */
442 #if DEBUG
443 printf("Autoconfig failed with code %d for raid%d\n", retcode, raidID);
444 #endif
445 rf_release_all_vps(cset);
446 }
447 } else {
448 /* we're not autoconfiguring this set...
449 release the associated resources */
450 rf_release_all_vps(cset);
451 }
452 /* cleanup */
453 rf_cleanup_config_set(cset);
454 cset = next_cset;
455 }
456
457 /* we found something bootable... */
458
459 if (num_root == 1) {
460 booted_device = &raidrootdev[rootID];
461 } else if (num_root > 1) {
462 /* we can't guess.. require the user to answer... */
463 boothowto |= RB_ASKNAME;
464 }
465 }
466
467
468 int
469 raidsize(dev)
470 dev_t dev;
471 {
472 struct raid_softc *rs;
473 struct disklabel *lp;
474 int part, unit, omask, size;
475
476 unit = raidunit(dev);
477 if (unit >= numraid)
478 return (-1);
479 rs = &raid_softc[unit];
480
481 if ((rs->sc_flags & RAIDF_INITED) == 0)
482 return (-1);
483
484 part = DISKPART(dev);
485 omask = rs->sc_dkdev.dk_openmask & (1 << part);
486 lp = rs->sc_dkdev.dk_label;
487
488 if (omask == 0 && raidopen(dev, 0, S_IFBLK, curproc))
489 return (-1);
490
491 if (lp->d_partitions[part].p_fstype != FS_SWAP)
492 size = -1;
493 else
494 size = lp->d_partitions[part].p_size *
495 (lp->d_secsize / DEV_BSIZE);
496
497 if (omask == 0 && raidclose(dev, 0, S_IFBLK, curproc))
498 return (-1);
499
500 return (size);
501
502 }
503
504 int
505 raiddump(dev, blkno, va, size)
506 dev_t dev;
507 daddr_t blkno;
508 caddr_t va;
509 size_t size;
510 {
511 /* Not implemented. */
512 return ENXIO;
513 }
514 /* ARGSUSED */
515 int
516 raidopen(dev, flags, fmt, p)
517 dev_t dev;
518 int flags, fmt;
519 struct proc *p;
520 {
521 int unit = raidunit(dev);
522 struct raid_softc *rs;
523 struct disklabel *lp;
524 int part, pmask;
525 int error = 0;
526
527 if (unit >= numraid)
528 return (ENXIO);
529 rs = &raid_softc[unit];
530
531 if ((error = raidlock(rs)) != 0)
532 return (error);
533 lp = rs->sc_dkdev.dk_label;
534
535 part = DISKPART(dev);
536 pmask = (1 << part);
537
538 db1_printf(("Opening raid device number: %d partition: %d\n",
539 unit, part));
540
541
542 if ((rs->sc_flags & RAIDF_INITED) &&
543 (rs->sc_dkdev.dk_openmask == 0))
544 raidgetdisklabel(dev);
545
546 /* make sure that this partition exists */
547
548 if (part != RAW_PART) {
549 db1_printf(("Not a raw partition..\n"));
550 if (((rs->sc_flags & RAIDF_INITED) == 0) ||
551 ((part >= lp->d_npartitions) ||
552 (lp->d_partitions[part].p_fstype == FS_UNUSED))) {
553 error = ENXIO;
554 raidunlock(rs);
555 db1_printf(("Bailing out...\n"));
556 return (error);
557 }
558 }
559 /* Prevent this unit from being unconfigured while open. */
560 switch (fmt) {
561 case S_IFCHR:
562 rs->sc_dkdev.dk_copenmask |= pmask;
563 break;
564
565 case S_IFBLK:
566 rs->sc_dkdev.dk_bopenmask |= pmask;
567 break;
568 }
569
570 if ((rs->sc_dkdev.dk_openmask == 0) &&
571 ((rs->sc_flags & RAIDF_INITED) != 0)) {
572 /* First one... mark things as dirty... Note that we *MUST*
573 have done a configure before this. I DO NOT WANT TO BE
574 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
575 THAT THEY BELONG TOGETHER!!!!! */
576 /* XXX should check to see if we're only open for reading
577 here... If so, we needn't do this, but then need some
578 other way of keeping track of what's happened.. */
579
580 rf_markalldirty( raidPtrs[unit] );
581 }
582
583
584 rs->sc_dkdev.dk_openmask =
585 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
586
587 raidunlock(rs);
588
589 return (error);
590
591
592 }
593 /* ARGSUSED */
594 int
595 raidclose(dev, flags, fmt, p)
596 dev_t dev;
597 int flags, fmt;
598 struct proc *p;
599 {
600 int unit = raidunit(dev);
601 struct raid_softc *rs;
602 int error = 0;
603 int part;
604
605 if (unit >= numraid)
606 return (ENXIO);
607 rs = &raid_softc[unit];
608
609 if ((error = raidlock(rs)) != 0)
610 return (error);
611
612 part = DISKPART(dev);
613
614 /* ...that much closer to allowing unconfiguration... */
615 switch (fmt) {
616 case S_IFCHR:
617 rs->sc_dkdev.dk_copenmask &= ~(1 << part);
618 break;
619
620 case S_IFBLK:
621 rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
622 break;
623 }
624 rs->sc_dkdev.dk_openmask =
625 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
626
627 if ((rs->sc_dkdev.dk_openmask == 0) &&
628 ((rs->sc_flags & RAIDF_INITED) != 0)) {
629 /* Last one... device is not unconfigured yet.
630 Device shutdown has taken care of setting the
631 clean bits if RAIDF_INITED is not set
632 mark things as clean... */
633 #if 0
634 printf("Last one on raid%d. Updating status.\n",unit);
635 #endif
636 rf_update_component_labels(raidPtrs[unit],
637 RF_FINAL_COMPONENT_UPDATE);
638 if (doing_shutdown) {
639 /* last one, and we're going down, so
640 lights out for this RAID set too. */
641 error = rf_Shutdown(raidPtrs[unit]);
642
643 /* It's no longer initialized... */
644 rs->sc_flags &= ~RAIDF_INITED;
645
646 /* Detach the disk. */
647 disk_detach(&rs->sc_dkdev);
648 }
649 }
650
651 raidunlock(rs);
652 return (0);
653
654 }
655
656 void
657 raidstrategy(bp)
658 struct buf *bp;
659 {
660 int s;
661
662 unsigned int raidID = raidunit(bp->b_dev);
663 RF_Raid_t *raidPtr;
664 struct raid_softc *rs = &raid_softc[raidID];
665 struct disklabel *lp;
666 int wlabel;
667
668 if ((rs->sc_flags & RAIDF_INITED) ==0) {
669 bp->b_error = ENXIO;
670 bp->b_flags |= B_ERROR;
671 bp->b_resid = bp->b_bcount;
672 biodone(bp);
673 return;
674 }
675 if (raidID >= numraid || !raidPtrs[raidID]) {
676 bp->b_error = ENODEV;
677 bp->b_flags |= B_ERROR;
678 bp->b_resid = bp->b_bcount;
679 biodone(bp);
680 return;
681 }
682 raidPtr = raidPtrs[raidID];
683 if (!raidPtr->valid) {
684 bp->b_error = ENODEV;
685 bp->b_flags |= B_ERROR;
686 bp->b_resid = bp->b_bcount;
687 biodone(bp);
688 return;
689 }
690 if (bp->b_bcount == 0) {
691 db1_printf(("b_bcount is zero..\n"));
692 biodone(bp);
693 return;
694 }
695 lp = rs->sc_dkdev.dk_label;
696
697 /*
698 * Do bounds checking and adjust transfer. If there's an
699 * error, the bounds check will flag that for us.
700 */
701
702 wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
703 if (DISKPART(bp->b_dev) != RAW_PART)
704 if (bounds_check_with_label(bp, lp, wlabel) <= 0) {
705 db1_printf(("Bounds check failed!!:%d %d\n",
706 (int) bp->b_blkno, (int) wlabel));
707 biodone(bp);
708 return;
709 }
710 s = splbio();
711
712 bp->b_resid = 0;
713
714 /* stuff it onto our queue */
715 BUFQ_PUT(&rs->buf_queue, bp);
716
717 raidstart(raidPtrs[raidID]);
718
719 splx(s);
720 }
721 /* ARGSUSED */
722 int
723 raidread(dev, uio, flags)
724 dev_t dev;
725 struct uio *uio;
726 int flags;
727 {
728 int unit = raidunit(dev);
729 struct raid_softc *rs;
730 int part;
731
732 if (unit >= numraid)
733 return (ENXIO);
734 rs = &raid_softc[unit];
735
736 if ((rs->sc_flags & RAIDF_INITED) == 0)
737 return (ENXIO);
738 part = DISKPART(dev);
739
740 db1_printf(("raidread: unit: %d partition: %d\n", unit, part));
741
742 return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
743
744 }
745 /* ARGSUSED */
746 int
747 raidwrite(dev, uio, flags)
748 dev_t dev;
749 struct uio *uio;
750 int flags;
751 {
752 int unit = raidunit(dev);
753 struct raid_softc *rs;
754
755 if (unit >= numraid)
756 return (ENXIO);
757 rs = &raid_softc[unit];
758
759 if ((rs->sc_flags & RAIDF_INITED) == 0)
760 return (ENXIO);
761 db1_printf(("raidwrite\n"));
762 return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
763
764 }
765
766 int
767 raidioctl(dev, cmd, data, flag, p)
768 dev_t dev;
769 u_long cmd;
770 caddr_t data;
771 int flag;
772 struct proc *p;
773 {
774 int unit = raidunit(dev);
775 int error = 0;
776 int part, pmask;
777 struct raid_softc *rs;
778 RF_Config_t *k_cfg, *u_cfg;
779 RF_Raid_t *raidPtr;
780 RF_RaidDisk_t *diskPtr;
781 RF_AccTotals_t *totals;
782 RF_DeviceConfig_t *d_cfg, **ucfgp;
783 u_char *specific_buf;
784 int retcode = 0;
785 int row;
786 int column;
787 int raidid;
788 struct rf_recon_req *rrcopy, *rr;
789 RF_ComponentLabel_t *clabel;
790 RF_ComponentLabel_t ci_label;
791 RF_ComponentLabel_t **clabel_ptr;
792 RF_SingleComponent_t *sparePtr,*componentPtr;
793 RF_SingleComponent_t hot_spare;
794 RF_SingleComponent_t component;
795 RF_ProgressInfo_t progressInfo, **progressInfoPtr;
796 int i, j, d;
797 #ifdef __HAVE_OLD_DISKLABEL
798 struct disklabel newlabel;
799 #endif
800
801 if (unit >= numraid)
802 return (ENXIO);
803 rs = &raid_softc[unit];
804 raidPtr = raidPtrs[unit];
805
806 db1_printf(("raidioctl: %d %d %d %d\n", (int) dev,
807 (int) DISKPART(dev), (int) unit, (int) cmd));
808
809 /* Must be open for writes for these commands... */
810 switch (cmd) {
811 case DIOCSDINFO:
812 case DIOCWDINFO:
813 #ifdef __HAVE_OLD_DISKLABEL
814 case ODIOCWDINFO:
815 case ODIOCSDINFO:
816 #endif
817 case DIOCWLABEL:
818 if ((flag & FWRITE) == 0)
819 return (EBADF);
820 }
821
822 /* Must be initialized for these... */
823 switch (cmd) {
824 case DIOCGDINFO:
825 case DIOCSDINFO:
826 case DIOCWDINFO:
827 #ifdef __HAVE_OLD_DISKLABEL
828 case ODIOCGDINFO:
829 case ODIOCWDINFO:
830 case ODIOCSDINFO:
831 case ODIOCGDEFLABEL:
832 #endif
833 case DIOCGPART:
834 case DIOCWLABEL:
835 case DIOCGDEFLABEL:
836 case RAIDFRAME_SHUTDOWN:
837 case RAIDFRAME_REWRITEPARITY:
838 case RAIDFRAME_GET_INFO:
839 case RAIDFRAME_RESET_ACCTOTALS:
840 case RAIDFRAME_GET_ACCTOTALS:
841 case RAIDFRAME_KEEP_ACCTOTALS:
842 case RAIDFRAME_GET_SIZE:
843 case RAIDFRAME_FAIL_DISK:
844 case RAIDFRAME_COPYBACK:
845 case RAIDFRAME_CHECK_RECON_STATUS:
846 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
847 case RAIDFRAME_GET_COMPONENT_LABEL:
848 case RAIDFRAME_SET_COMPONENT_LABEL:
849 case RAIDFRAME_ADD_HOT_SPARE:
850 case RAIDFRAME_REMOVE_HOT_SPARE:
851 case RAIDFRAME_INIT_LABELS:
852 case RAIDFRAME_REBUILD_IN_PLACE:
853 case RAIDFRAME_CHECK_PARITY:
854 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
855 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
856 case RAIDFRAME_CHECK_COPYBACK_STATUS:
857 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
858 case RAIDFRAME_SET_AUTOCONFIG:
859 case RAIDFRAME_SET_ROOT:
860 case RAIDFRAME_DELETE_COMPONENT:
861 case RAIDFRAME_INCORPORATE_HOT_SPARE:
862 if ((rs->sc_flags & RAIDF_INITED) == 0)
863 return (ENXIO);
864 }
865
866 switch (cmd) {
867
868 /* configure the system */
869 case RAIDFRAME_CONFIGURE:
870
871 if (raidPtr->valid) {
872 /* There is a valid RAID set running on this unit! */
873 printf("raid%d: Device already configured!\n",unit);
874 return(EINVAL);
875 }
876
877 /* copy-in the configuration information */
878 /* data points to a pointer to the configuration structure */
879
880 u_cfg = *((RF_Config_t **) data);
881 RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
882 if (k_cfg == NULL) {
883 return (ENOMEM);
884 }
885 retcode = copyin((caddr_t) u_cfg, (caddr_t) k_cfg,
886 sizeof(RF_Config_t));
887 if (retcode) {
888 RF_Free(k_cfg, sizeof(RF_Config_t));
889 db1_printf(("rf_ioctl: retcode=%d copyin.1\n",
890 retcode));
891 return (retcode);
892 }
893 /* allocate a buffer for the layout-specific data, and copy it
894 * in */
895 if (k_cfg->layoutSpecificSize) {
896 if (k_cfg->layoutSpecificSize > 10000) {
897 /* sanity check */
898 RF_Free(k_cfg, sizeof(RF_Config_t));
899 return (EINVAL);
900 }
901 RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
902 (u_char *));
903 if (specific_buf == NULL) {
904 RF_Free(k_cfg, sizeof(RF_Config_t));
905 return (ENOMEM);
906 }
907 retcode = copyin(k_cfg->layoutSpecific,
908 (caddr_t) specific_buf,
909 k_cfg->layoutSpecificSize);
910 if (retcode) {
911 RF_Free(k_cfg, sizeof(RF_Config_t));
912 RF_Free(specific_buf,
913 k_cfg->layoutSpecificSize);
914 db1_printf(("rf_ioctl: retcode=%d copyin.2\n",
915 retcode));
916 return (retcode);
917 }
918 } else
919 specific_buf = NULL;
920 k_cfg->layoutSpecific = specific_buf;
921
922 /* should do some kind of sanity check on the configuration.
923 * Store the sum of all the bytes in the last byte? */
924
925 /* configure the system */
926
927 /*
928 * Clear the entire RAID descriptor, just to make sure
929 * there is no stale data left in the case of a
930 * reconfiguration
931 */
932 memset((char *) raidPtr, 0, sizeof(RF_Raid_t));
933 raidPtr->raidid = unit;
934
935 retcode = rf_Configure(raidPtr, k_cfg, NULL);
936
937 if (retcode == 0) {
938
939 /* allow this many simultaneous IO's to
940 this RAID device */
941 raidPtr->openings = RAIDOUTSTANDING;
942
943 raidinit(raidPtr);
944 rf_markalldirty(raidPtr);
945 }
946 /* free the buffers. No return code here. */
947 if (k_cfg->layoutSpecificSize) {
948 RF_Free(specific_buf, k_cfg->layoutSpecificSize);
949 }
950 RF_Free(k_cfg, sizeof(RF_Config_t));
951
952 return (retcode);
953
954 /* shutdown the system */
955 case RAIDFRAME_SHUTDOWN:
956
957 if ((error = raidlock(rs)) != 0)
958 return (error);
959
960 /*
961 * If somebody has a partition mounted, we shouldn't
962 * shutdown.
963 */
964
965 part = DISKPART(dev);
966 pmask = (1 << part);
967 if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
968 ((rs->sc_dkdev.dk_bopenmask & pmask) &&
969 (rs->sc_dkdev.dk_copenmask & pmask))) {
970 raidunlock(rs);
971 return (EBUSY);
972 }
973
974 retcode = rf_Shutdown(raidPtr);
975
976 /* It's no longer initialized... */
977 rs->sc_flags &= ~RAIDF_INITED;
978
979 /* Detach the disk. */
980 disk_detach(&rs->sc_dkdev);
981
982 raidunlock(rs);
983
984 return (retcode);
985 case RAIDFRAME_GET_COMPONENT_LABEL:
986 clabel_ptr = (RF_ComponentLabel_t **) data;
987 /* need to read the component label for the disk indicated
988 by row,column in clabel */
989
990 /* For practice, let's get it directly fromdisk, rather
991 than from the in-core copy */
992 RF_Malloc( clabel, sizeof( RF_ComponentLabel_t ),
993 (RF_ComponentLabel_t *));
994 if (clabel == NULL)
995 return (ENOMEM);
996
997 memset((char *) clabel, 0, sizeof(RF_ComponentLabel_t));
998
999 retcode = copyin( *clabel_ptr, clabel,
1000 sizeof(RF_ComponentLabel_t));
1001
1002 if (retcode) {
1003 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
1004 return(retcode);
1005 }
1006
1007 row = clabel->row;
1008 column = clabel->column;
1009
1010 if ((row < 0) || (row >= raidPtr->numRow) ||
1011 (column < 0) || (column >= raidPtr->numCol +
1012 raidPtr->numSpare)) {
1013 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
1014 return(EINVAL);
1015 }
1016
1017 raidread_component_label(raidPtr->Disks[row][column].dev,
1018 raidPtr->raid_cinfo[row][column].ci_vp,
1019 clabel );
1020
1021 retcode = copyout((caddr_t) clabel,
1022 (caddr_t) *clabel_ptr,
1023 sizeof(RF_ComponentLabel_t));
1024 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
1025 return (retcode);
1026
1027 case RAIDFRAME_SET_COMPONENT_LABEL:
1028 clabel = (RF_ComponentLabel_t *) data;
1029
1030 /* XXX check the label for valid stuff... */
1031 /* Note that some things *should not* get modified --
1032 the user should be re-initing the labels instead of
1033 trying to patch things.
1034 */
1035
1036 raidid = raidPtr->raidid;
1037 printf("raid%d: Got component label:\n", raidid);
1038 printf("raid%d: Version: %d\n", raidid, clabel->version);
1039 printf("raid%d: Serial Number: %d\n", raidid, clabel->serial_number);
1040 printf("raid%d: Mod counter: %d\n", raidid, clabel->mod_counter);
1041 printf("raid%d: Row: %d\n", raidid, clabel->row);
1042 printf("raid%d: Column: %d\n", raidid, clabel->column);
1043 printf("raid%d: Num Rows: %d\n", raidid, clabel->num_rows);
1044 printf("raid%d: Num Columns: %d\n", raidid, clabel->num_columns);
1045 printf("raid%d: Clean: %d\n", raidid, clabel->clean);
1046 printf("raid%d: Status: %d\n", raidid, clabel->status);
1047
1048 row = clabel->row;
1049 column = clabel->column;
1050
1051 if ((row < 0) || (row >= raidPtr->numRow) ||
1052 (column < 0) || (column >= raidPtr->numCol)) {
1053 return(EINVAL);
1054 }
1055
1056 /* XXX this isn't allowed to do anything for now :-) */
1057
1058 /* XXX and before it is, we need to fill in the rest
1059 of the fields!?!?!?! */
1060 #if 0
1061 raidwrite_component_label(
1062 raidPtr->Disks[row][column].dev,
1063 raidPtr->raid_cinfo[row][column].ci_vp,
1064 clabel );
1065 #endif
1066 return (0);
1067
1068 case RAIDFRAME_INIT_LABELS:
1069 clabel = (RF_ComponentLabel_t *) data;
1070 /*
1071 we only want the serial number from
1072 the above. We get all the rest of the information
1073 from the config that was used to create this RAID
1074 set.
1075 */
1076
1077 raidPtr->serial_number = clabel->serial_number;
1078
1079 raid_init_component_label(raidPtr, &ci_label);
1080 ci_label.serial_number = clabel->serial_number;
1081
1082 for(row=0;row<raidPtr->numRow;row++) {
1083 ci_label.row = row;
1084 for(column=0;column<raidPtr->numCol;column++) {
1085 diskPtr = &raidPtr->Disks[row][column];
1086 if (!RF_DEAD_DISK(diskPtr->status)) {
1087 ci_label.partitionSize = diskPtr->partitionSize;
1088 ci_label.column = column;
1089 raidwrite_component_label(
1090 raidPtr->Disks[row][column].dev,
1091 raidPtr->raid_cinfo[row][column].ci_vp,
1092 &ci_label );
1093 }
1094 }
1095 }
1096
1097 return (retcode);
1098 case RAIDFRAME_SET_AUTOCONFIG:
1099 d = rf_set_autoconfig(raidPtr, *(int *) data);
1100 printf("raid%d: New autoconfig value is: %d\n",
1101 raidPtr->raidid, d);
1102 *(int *) data = d;
1103 return (retcode);
1104
1105 case RAIDFRAME_SET_ROOT:
1106 d = rf_set_rootpartition(raidPtr, *(int *) data);
1107 printf("raid%d: New rootpartition value is: %d\n",
1108 raidPtr->raidid, d);
1109 *(int *) data = d;
1110 return (retcode);
1111
1112 /* initialize all parity */
1113 case RAIDFRAME_REWRITEPARITY:
1114
1115 if (raidPtr->Layout.map->faultsTolerated == 0) {
1116 /* Parity for RAID 0 is trivially correct */
1117 raidPtr->parity_good = RF_RAID_CLEAN;
1118 return(0);
1119 }
1120
1121 if (raidPtr->parity_rewrite_in_progress == 1) {
1122 /* Re-write is already in progress! */
1123 return(EINVAL);
1124 }
1125
1126 retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
1127 rf_RewriteParityThread,
1128 raidPtr,"raid_parity");
1129 return (retcode);
1130
1131
1132 case RAIDFRAME_ADD_HOT_SPARE:
1133 sparePtr = (RF_SingleComponent_t *) data;
1134 memcpy( &hot_spare, sparePtr, sizeof(RF_SingleComponent_t));
1135 retcode = rf_add_hot_spare(raidPtr, &hot_spare);
1136 return(retcode);
1137
1138 case RAIDFRAME_REMOVE_HOT_SPARE:
1139 return(retcode);
1140
1141 case RAIDFRAME_DELETE_COMPONENT:
1142 componentPtr = (RF_SingleComponent_t *)data;
1143 memcpy( &component, componentPtr,
1144 sizeof(RF_SingleComponent_t));
1145 retcode = rf_delete_component(raidPtr, &component);
1146 return(retcode);
1147
1148 case RAIDFRAME_INCORPORATE_HOT_SPARE:
1149 componentPtr = (RF_SingleComponent_t *)data;
1150 memcpy( &component, componentPtr,
1151 sizeof(RF_SingleComponent_t));
1152 retcode = rf_incorporate_hot_spare(raidPtr, &component);
1153 return(retcode);
1154
1155 case RAIDFRAME_REBUILD_IN_PLACE:
1156
1157 if (raidPtr->Layout.map->faultsTolerated == 0) {
1158 /* Can't do this on a RAID 0!! */
1159 return(EINVAL);
1160 }
1161
1162 if (raidPtr->recon_in_progress == 1) {
1163 /* a reconstruct is already in progress! */
1164 return(EINVAL);
1165 }
1166
1167 componentPtr = (RF_SingleComponent_t *) data;
1168 memcpy( &component, componentPtr,
1169 sizeof(RF_SingleComponent_t));
1170 row = component.row;
1171 column = component.column;
1172 printf("raid%d: Rebuild: %d %d\n", raidPtr->raidid,
1173 row, column);
1174 if ((row < 0) || (row >= raidPtr->numRow) ||
1175 (column < 0) || (column >= raidPtr->numCol)) {
1176 return(EINVAL);
1177 }
1178
1179 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1180 if (rrcopy == NULL)
1181 return(ENOMEM);
1182
1183 rrcopy->raidPtr = (void *) raidPtr;
1184 rrcopy->row = row;
1185 rrcopy->col = column;
1186
1187 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1188 rf_ReconstructInPlaceThread,
1189 rrcopy,"raid_reconip");
1190 return(retcode);
1191
1192 case RAIDFRAME_GET_INFO:
1193 if (!raidPtr->valid)
1194 return (ENODEV);
1195 ucfgp = (RF_DeviceConfig_t **) data;
1196 RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t),
1197 (RF_DeviceConfig_t *));
1198 if (d_cfg == NULL)
1199 return (ENOMEM);
1200 memset((char *) d_cfg, 0, sizeof(RF_DeviceConfig_t));
1201 d_cfg->rows = raidPtr->numRow;
1202 d_cfg->cols = raidPtr->numCol;
1203 d_cfg->ndevs = raidPtr->numRow * raidPtr->numCol;
1204 if (d_cfg->ndevs >= RF_MAX_DISKS) {
1205 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1206 return (ENOMEM);
1207 }
1208 d_cfg->nspares = raidPtr->numSpare;
1209 if (d_cfg->nspares >= RF_MAX_DISKS) {
1210 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1211 return (ENOMEM);
1212 }
1213 d_cfg->maxqdepth = raidPtr->maxQueueDepth;
1214 d = 0;
1215 for (i = 0; i < d_cfg->rows; i++) {
1216 for (j = 0; j < d_cfg->cols; j++) {
1217 d_cfg->devs[d] = raidPtr->Disks[i][j];
1218 d++;
1219 }
1220 }
1221 for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) {
1222 d_cfg->spares[i] = raidPtr->Disks[0][j];
1223 }
1224 retcode = copyout((caddr_t) d_cfg, (caddr_t) * ucfgp,
1225 sizeof(RF_DeviceConfig_t));
1226 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1227
1228 return (retcode);
1229
1230 case RAIDFRAME_CHECK_PARITY:
1231 *(int *) data = raidPtr->parity_good;
1232 return (0);
1233
1234 case RAIDFRAME_RESET_ACCTOTALS:
1235 memset(&raidPtr->acc_totals, 0, sizeof(raidPtr->acc_totals));
1236 return (0);
1237
1238 case RAIDFRAME_GET_ACCTOTALS:
1239 totals = (RF_AccTotals_t *) data;
1240 *totals = raidPtr->acc_totals;
1241 return (0);
1242
1243 case RAIDFRAME_KEEP_ACCTOTALS:
1244 raidPtr->keep_acc_totals = *(int *)data;
1245 return (0);
1246
1247 case RAIDFRAME_GET_SIZE:
1248 *(int *) data = raidPtr->totalSectors;
1249 return (0);
1250
1251 /* fail a disk & optionally start reconstruction */
1252 case RAIDFRAME_FAIL_DISK:
1253
1254 if (raidPtr->Layout.map->faultsTolerated == 0) {
1255 /* Can't do this on a RAID 0!! */
1256 return(EINVAL);
1257 }
1258
1259 rr = (struct rf_recon_req *) data;
1260
1261 if (rr->row < 0 || rr->row >= raidPtr->numRow
1262 || rr->col < 0 || rr->col >= raidPtr->numCol)
1263 return (EINVAL);
1264
1265 printf("raid%d: Failing the disk: row: %d col: %d\n",
1266 unit, rr->row, rr->col);
1267
1268 /* make a copy of the recon request so that we don't rely on
1269 * the user's buffer */
1270 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1271 if (rrcopy == NULL)
1272 return(ENOMEM);
1273 memcpy(rrcopy, rr, sizeof(*rr));
1274 rrcopy->raidPtr = (void *) raidPtr;
1275
1276 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1277 rf_ReconThread,
1278 rrcopy,"raid_recon");
1279 return (0);
1280
1281 /* invoke a copyback operation after recon on whatever disk
1282 * needs it, if any */
1283 case RAIDFRAME_COPYBACK:
1284
1285 if (raidPtr->Layout.map->faultsTolerated == 0) {
1286 /* This makes no sense on a RAID 0!! */
1287 return(EINVAL);
1288 }
1289
1290 if (raidPtr->copyback_in_progress == 1) {
1291 /* Copyback is already in progress! */
1292 return(EINVAL);
1293 }
1294
1295 retcode = RF_CREATE_THREAD(raidPtr->copyback_thread,
1296 rf_CopybackThread,
1297 raidPtr,"raid_copyback");
1298 return (retcode);
1299
1300 /* return the percentage completion of reconstruction */
1301 case RAIDFRAME_CHECK_RECON_STATUS:
1302 if (raidPtr->Layout.map->faultsTolerated == 0) {
1303 /* This makes no sense on a RAID 0, so tell the
1304 user it's done. */
1305 *(int *) data = 100;
1306 return(0);
1307 }
1308 row = 0; /* XXX we only consider a single row... */
1309 if (raidPtr->status[row] != rf_rs_reconstructing)
1310 *(int *) data = 100;
1311 else
1312 *(int *) data = raidPtr->reconControl[row]->percentComplete;
1313 return (0);
1314 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
1315 progressInfoPtr = (RF_ProgressInfo_t **) data;
1316 row = 0; /* XXX we only consider a single row... */
1317 if (raidPtr->status[row] != rf_rs_reconstructing) {
1318 progressInfo.remaining = 0;
1319 progressInfo.completed = 100;
1320 progressInfo.total = 100;
1321 } else {
1322 progressInfo.total =
1323 raidPtr->reconControl[row]->numRUsTotal;
1324 progressInfo.completed =
1325 raidPtr->reconControl[row]->numRUsComplete;
1326 progressInfo.remaining = progressInfo.total -
1327 progressInfo.completed;
1328 }
1329 retcode = copyout((caddr_t) &progressInfo,
1330 (caddr_t) *progressInfoPtr,
1331 sizeof(RF_ProgressInfo_t));
1332 return (retcode);
1333
1334 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1335 if (raidPtr->Layout.map->faultsTolerated == 0) {
1336 /* This makes no sense on a RAID 0, so tell the
1337 user it's done. */
1338 *(int *) data = 100;
1339 return(0);
1340 }
1341 if (raidPtr->parity_rewrite_in_progress == 1) {
1342 *(int *) data = 100 *
1343 raidPtr->parity_rewrite_stripes_done /
1344 raidPtr->Layout.numStripe;
1345 } else {
1346 *(int *) data = 100;
1347 }
1348 return (0);
1349
1350 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
1351 progressInfoPtr = (RF_ProgressInfo_t **) data;
1352 if (raidPtr->parity_rewrite_in_progress == 1) {
1353 progressInfo.total = raidPtr->Layout.numStripe;
1354 progressInfo.completed =
1355 raidPtr->parity_rewrite_stripes_done;
1356 progressInfo.remaining = progressInfo.total -
1357 progressInfo.completed;
1358 } else {
1359 progressInfo.remaining = 0;
1360 progressInfo.completed = 100;
1361 progressInfo.total = 100;
1362 }
1363 retcode = copyout((caddr_t) &progressInfo,
1364 (caddr_t) *progressInfoPtr,
1365 sizeof(RF_ProgressInfo_t));
1366 return (retcode);
1367
1368 case RAIDFRAME_CHECK_COPYBACK_STATUS:
1369 if (raidPtr->Layout.map->faultsTolerated == 0) {
1370 /* This makes no sense on a RAID 0 */
1371 *(int *) data = 100;
1372 return(0);
1373 }
1374 if (raidPtr->copyback_in_progress == 1) {
1375 *(int *) data = 100 * raidPtr->copyback_stripes_done /
1376 raidPtr->Layout.numStripe;
1377 } else {
1378 *(int *) data = 100;
1379 }
1380 return (0);
1381
1382 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
1383 progressInfoPtr = (RF_ProgressInfo_t **) data;
1384 if (raidPtr->copyback_in_progress == 1) {
1385 progressInfo.total = raidPtr->Layout.numStripe;
1386 progressInfo.completed =
1387 raidPtr->copyback_stripes_done;
1388 progressInfo.remaining = progressInfo.total -
1389 progressInfo.completed;
1390 } else {
1391 progressInfo.remaining = 0;
1392 progressInfo.completed = 100;
1393 progressInfo.total = 100;
1394 }
1395 retcode = copyout((caddr_t) &progressInfo,
1396 (caddr_t) *progressInfoPtr,
1397 sizeof(RF_ProgressInfo_t));
1398 return (retcode);
1399
1400 /* the sparetable daemon calls this to wait for the kernel to
1401 * need a spare table. this ioctl does not return until a
1402 * spare table is needed. XXX -- calling mpsleep here in the
1403 * ioctl code is almost certainly wrong and evil. -- XXX XXX
1404 * -- I should either compute the spare table in the kernel,
1405 * or have a different -- XXX XXX -- interface (a different
1406 * character device) for delivering the table -- XXX */
1407 #if 0
1408 case RAIDFRAME_SPARET_WAIT:
1409 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1410 while (!rf_sparet_wait_queue)
1411 mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE);
1412 waitreq = rf_sparet_wait_queue;
1413 rf_sparet_wait_queue = rf_sparet_wait_queue->next;
1414 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1415
1416 /* structure assignment */
1417 *((RF_SparetWait_t *) data) = *waitreq;
1418
1419 RF_Free(waitreq, sizeof(*waitreq));
1420 return (0);
1421
1422 /* wakes up a process waiting on SPARET_WAIT and puts an error
1423 * code in it that will cause the dameon to exit */
1424 case RAIDFRAME_ABORT_SPARET_WAIT:
1425 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1426 waitreq->fcol = -1;
1427 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1428 waitreq->next = rf_sparet_wait_queue;
1429 rf_sparet_wait_queue = waitreq;
1430 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1431 wakeup(&rf_sparet_wait_queue);
1432 return (0);
1433
1434 /* used by the spare table daemon to deliver a spare table
1435 * into the kernel */
1436 case RAIDFRAME_SEND_SPARET:
1437
1438 /* install the spare table */
1439 retcode = rf_SetSpareTable(raidPtr, *(void **) data);
1440
1441 /* respond to the requestor. the return status of the spare
1442 * table installation is passed in the "fcol" field */
1443 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1444 waitreq->fcol = retcode;
1445 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1446 waitreq->next = rf_sparet_resp_queue;
1447 rf_sparet_resp_queue = waitreq;
1448 wakeup(&rf_sparet_resp_queue);
1449 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1450
1451 return (retcode);
1452 #endif
1453
1454 default:
1455 break; /* fall through to the os-specific code below */
1456
1457 }
1458
1459 if (!raidPtr->valid)
1460 return (EINVAL);
1461
1462 /*
1463 * Add support for "regular" device ioctls here.
1464 */
1465
1466 switch (cmd) {
1467 case DIOCGDINFO:
1468 *(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
1469 break;
1470 #ifdef __HAVE_OLD_DISKLABEL
1471 case ODIOCGDINFO:
1472 newlabel = *(rs->sc_dkdev.dk_label);
1473 if (newlabel.d_npartitions > OLDMAXPARTITIONS)
1474 return ENOTTY;
1475 memcpy(data, &newlabel, sizeof (struct olddisklabel));
1476 break;
1477 #endif
1478
1479 case DIOCGPART:
1480 ((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
1481 ((struct partinfo *) data)->part =
1482 &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
1483 break;
1484
1485 case DIOCWDINFO:
1486 case DIOCSDINFO:
1487 #ifdef __HAVE_OLD_DISKLABEL
1488 case ODIOCWDINFO:
1489 case ODIOCSDINFO:
1490 #endif
1491 {
1492 struct disklabel *lp;
1493 #ifdef __HAVE_OLD_DISKLABEL
1494 if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) {
1495 memset(&newlabel, 0, sizeof newlabel);
1496 memcpy(&newlabel, data, sizeof (struct olddisklabel));
1497 lp = &newlabel;
1498 } else
1499 #endif
1500 lp = (struct disklabel *)data;
1501
1502 if ((error = raidlock(rs)) != 0)
1503 return (error);
1504
1505 rs->sc_flags |= RAIDF_LABELLING;
1506
1507 error = setdisklabel(rs->sc_dkdev.dk_label,
1508 lp, 0, rs->sc_dkdev.dk_cpulabel);
1509 if (error == 0) {
1510 if (cmd == DIOCWDINFO
1511 #ifdef __HAVE_OLD_DISKLABEL
1512 || cmd == ODIOCWDINFO
1513 #endif
1514 )
1515 error = writedisklabel(RAIDLABELDEV(dev),
1516 raidstrategy, rs->sc_dkdev.dk_label,
1517 rs->sc_dkdev.dk_cpulabel);
1518 }
1519 rs->sc_flags &= ~RAIDF_LABELLING;
1520
1521 raidunlock(rs);
1522
1523 if (error)
1524 return (error);
1525 break;
1526 }
1527
1528 case DIOCWLABEL:
1529 if (*(int *) data != 0)
1530 rs->sc_flags |= RAIDF_WLABEL;
1531 else
1532 rs->sc_flags &= ~RAIDF_WLABEL;
1533 break;
1534
1535 case DIOCGDEFLABEL:
1536 raidgetdefaultlabel(raidPtr, rs, (struct disklabel *) data);
1537 break;
1538
1539 #ifdef __HAVE_OLD_DISKLABEL
1540 case ODIOCGDEFLABEL:
1541 raidgetdefaultlabel(raidPtr, rs, &newlabel);
1542 if (newlabel.d_npartitions > OLDMAXPARTITIONS)
1543 return ENOTTY;
1544 memcpy(data, &newlabel, sizeof (struct olddisklabel));
1545 break;
1546 #endif
1547
1548 default:
1549 retcode = ENOTTY;
1550 }
1551 return (retcode);
1552
1553 }
1554
1555
1556 /* raidinit -- complete the rest of the initialization for the
1557 RAIDframe device. */
1558
1559
1560 static void
1561 raidinit(raidPtr)
1562 RF_Raid_t *raidPtr;
1563 {
1564 struct raid_softc *rs;
1565 int unit;
1566
1567 unit = raidPtr->raidid;
1568
1569 rs = &raid_softc[unit];
1570
1571 /* XXX should check return code first... */
1572 rs->sc_flags |= RAIDF_INITED;
1573
1574 sprintf(rs->sc_xname, "raid%d", unit); /* XXX doesn't check bounds. */
1575
1576 rs->sc_dkdev.dk_name = rs->sc_xname;
1577
1578 /* disk_attach actually creates space for the CPU disklabel, among
1579 * other things, so it's critical to call this *BEFORE* we try putzing
1580 * with disklabels. */
1581
1582 disk_attach(&rs->sc_dkdev);
1583
1584 /* XXX There may be a weird interaction here between this, and
1585 * protectedSectors, as used in RAIDframe. */
1586
1587 rs->sc_size = raidPtr->totalSectors;
1588
1589 }
1590
1591 /* wake up the daemon & tell it to get us a spare table
1592 * XXX
1593 * the entries in the queues should be tagged with the raidPtr
1594 * so that in the extremely rare case that two recons happen at once,
1595 * we know for which device were requesting a spare table
1596 * XXX
1597 *
1598 * XXX This code is not currently used. GO
1599 */
1600 int
1601 rf_GetSpareTableFromDaemon(req)
1602 RF_SparetWait_t *req;
1603 {
1604 int retcode;
1605
1606 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1607 req->next = rf_sparet_wait_queue;
1608 rf_sparet_wait_queue = req;
1609 wakeup(&rf_sparet_wait_queue);
1610
1611 /* mpsleep unlocks the mutex */
1612 while (!rf_sparet_resp_queue) {
1613 tsleep(&rf_sparet_resp_queue, PRIBIO,
1614 "raidframe getsparetable", 0);
1615 }
1616 req = rf_sparet_resp_queue;
1617 rf_sparet_resp_queue = req->next;
1618 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1619
1620 retcode = req->fcol;
1621 RF_Free(req, sizeof(*req)); /* this is not the same req as we
1622 * alloc'd */
1623 return (retcode);
1624 }
1625
1626 /* a wrapper around rf_DoAccess that extracts appropriate info from the
1627 * bp & passes it down.
1628 * any calls originating in the kernel must use non-blocking I/O
1629 * do some extra sanity checking to return "appropriate" error values for
1630 * certain conditions (to make some standard utilities work)
1631 *
1632 * Formerly known as: rf_DoAccessKernel
1633 */
1634 void
1635 raidstart(raidPtr)
1636 RF_Raid_t *raidPtr;
1637 {
1638 RF_SectorCount_t num_blocks, pb, sum;
1639 RF_RaidAddr_t raid_addr;
1640 int retcode;
1641 struct partition *pp;
1642 daddr_t blocknum;
1643 int unit;
1644 struct raid_softc *rs;
1645 int do_async;
1646 struct buf *bp;
1647
1648 unit = raidPtr->raidid;
1649 rs = &raid_softc[unit];
1650
1651 /* quick check to see if anything has died recently */
1652 RF_LOCK_MUTEX(raidPtr->mutex);
1653 if (raidPtr->numNewFailures > 0) {
1654 rf_update_component_labels(raidPtr,
1655 RF_NORMAL_COMPONENT_UPDATE);
1656 raidPtr->numNewFailures--;
1657 }
1658
1659 /* Check to see if we're at the limit... */
1660 while (raidPtr->openings > 0) {
1661 RF_UNLOCK_MUTEX(raidPtr->mutex);
1662
1663 /* get the next item, if any, from the queue */
1664 if ((bp = BUFQ_GET(&rs->buf_queue)) == NULL) {
1665 /* nothing more to do */
1666 return;
1667 }
1668
1669 /* Ok, for the bp we have here, bp->b_blkno is relative to the
1670 * partition.. Need to make it absolute to the underlying
1671 * device.. */
1672
1673 blocknum = bp->b_blkno;
1674 if (DISKPART(bp->b_dev) != RAW_PART) {
1675 pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
1676 blocknum += pp->p_offset;
1677 }
1678
1679 db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
1680 (int) blocknum));
1681
1682 db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
1683 db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
1684
1685 /* *THIS* is where we adjust what block we're going to...
1686 * but DO NOT TOUCH bp->b_blkno!!! */
1687 raid_addr = blocknum;
1688
1689 num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
1690 pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
1691 sum = raid_addr + num_blocks + pb;
1692 if (1 || rf_debugKernelAccess) {
1693 db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
1694 (int) raid_addr, (int) sum, (int) num_blocks,
1695 (int) pb, (int) bp->b_resid));
1696 }
1697 if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
1698 || (sum < num_blocks) || (sum < pb)) {
1699 bp->b_error = ENOSPC;
1700 bp->b_flags |= B_ERROR;
1701 bp->b_resid = bp->b_bcount;
1702 biodone(bp);
1703 RF_LOCK_MUTEX(raidPtr->mutex);
1704 continue;
1705 }
1706 /*
1707 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
1708 */
1709
1710 if (bp->b_bcount & raidPtr->sectorMask) {
1711 bp->b_error = EINVAL;
1712 bp->b_flags |= B_ERROR;
1713 bp->b_resid = bp->b_bcount;
1714 biodone(bp);
1715 RF_LOCK_MUTEX(raidPtr->mutex);
1716 continue;
1717
1718 }
1719 db1_printf(("Calling DoAccess..\n"));
1720
1721
1722 RF_LOCK_MUTEX(raidPtr->mutex);
1723 raidPtr->openings--;
1724 RF_UNLOCK_MUTEX(raidPtr->mutex);
1725
1726 /*
1727 * Everything is async.
1728 */
1729 do_async = 1;
1730
1731 disk_busy(&rs->sc_dkdev);
1732
1733 /* XXX we're still at splbio() here... do we *really*
1734 need to be? */
1735
1736 /* don't ever condition on bp->b_flags & B_WRITE.
1737 * always condition on B_READ instead */
1738
1739 retcode = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
1740 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
1741 do_async, raid_addr, num_blocks,
1742 bp->b_data, bp, RF_DAG_NONBLOCKING_IO);
1743
1744 RF_LOCK_MUTEX(raidPtr->mutex);
1745 }
1746 RF_UNLOCK_MUTEX(raidPtr->mutex);
1747 }
1748
1749
1750
1751
1752 /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */
1753
1754 int
1755 rf_DispatchKernelIO(queue, req)
1756 RF_DiskQueue_t *queue;
1757 RF_DiskQueueData_t *req;
1758 {
1759 int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
1760 struct buf *bp;
1761 struct raidbuf *raidbp = NULL;
1762 struct raid_softc *rs;
1763 int unit;
1764 int s;
1765
1766 s=0;
1767 /* s = splbio();*/ /* want to test this */
1768 /* XXX along with the vnode, we also need the softc associated with
1769 * this device.. */
1770
1771 req->queue = queue;
1772
1773 unit = queue->raidPtr->raidid;
1774
1775 db1_printf(("DispatchKernelIO unit: %d\n", unit));
1776
1777 if (unit >= numraid) {
1778 printf("Invalid unit number: %d %d\n", unit, numraid);
1779 panic("Invalid Unit number in rf_DispatchKernelIO\n");
1780 }
1781 rs = &raid_softc[unit];
1782
1783 bp = req->bp;
1784 #if 1
1785 /* XXX when there is a physical disk failure, someone is passing us a
1786 * buffer that contains old stuff!! Attempt to deal with this problem
1787 * without taking a performance hit... (not sure where the real bug
1788 * is. It's buried in RAIDframe somewhere) :-( GO ) */
1789
1790 if (bp->b_flags & B_ERROR) {
1791 bp->b_flags &= ~B_ERROR;
1792 }
1793 if (bp->b_error != 0) {
1794 bp->b_error = 0;
1795 }
1796 #endif
1797 raidbp = RAIDGETBUF(rs);
1798
1799 raidbp->rf_flags = 0; /* XXX not really used anywhere... */
1800
1801 /*
1802 * context for raidiodone
1803 */
1804 raidbp->rf_obp = bp;
1805 raidbp->req = req;
1806
1807 LIST_INIT(&raidbp->rf_buf.b_dep);
1808
1809 switch (req->type) {
1810 case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */
1811 /* XXX need to do something extra here.. */
1812 /* I'm leaving this in, as I've never actually seen it used,
1813 * and I'd like folks to report it... GO */
1814 printf(("WAKEUP CALLED\n"));
1815 queue->numOutstanding++;
1816
1817 /* XXX need to glue the original buffer into this?? */
1818
1819 KernelWakeupFunc(&raidbp->rf_buf);
1820 break;
1821
1822 case RF_IO_TYPE_READ:
1823 case RF_IO_TYPE_WRITE:
1824
1825 if (req->tracerec) {
1826 RF_ETIMER_START(req->tracerec->timer);
1827 }
1828 InitBP(&raidbp->rf_buf, queue->rf_cinfo->ci_vp,
1829 op | bp->b_flags, queue->rf_cinfo->ci_dev,
1830 req->sectorOffset, req->numSector,
1831 req->buf, KernelWakeupFunc, (void *) req,
1832 queue->raidPtr->logBytesPerSector, req->b_proc);
1833
1834 if (rf_debugKernelAccess) {
1835 db1_printf(("dispatch: bp->b_blkno = %ld\n",
1836 (long) bp->b_blkno));
1837 }
1838 queue->numOutstanding++;
1839 queue->last_deq_sector = req->sectorOffset;
1840 /* acc wouldn't have been let in if there were any pending
1841 * reqs at any other priority */
1842 queue->curPriority = req->priority;
1843
1844 db1_printf(("Going for %c to unit %d row %d col %d\n",
1845 req->type, unit, queue->row, queue->col));
1846 db1_printf(("sector %d count %d (%d bytes) %d\n",
1847 (int) req->sectorOffset, (int) req->numSector,
1848 (int) (req->numSector <<
1849 queue->raidPtr->logBytesPerSector),
1850 (int) queue->raidPtr->logBytesPerSector));
1851 if ((raidbp->rf_buf.b_flags & B_READ) == 0) {
1852 raidbp->rf_buf.b_vp->v_numoutput++;
1853 }
1854 VOP_STRATEGY(&raidbp->rf_buf);
1855
1856 break;
1857
1858 default:
1859 panic("bad req->type in rf_DispatchKernelIO");
1860 }
1861 db1_printf(("Exiting from DispatchKernelIO\n"));
1862 /* splx(s); */ /* want to test this */
1863 return (0);
1864 }
1865 /* this is the callback function associated with a I/O invoked from
1866 kernel code.
1867 */
1868 static void
1869 KernelWakeupFunc(vbp)
1870 struct buf *vbp;
1871 {
1872 RF_DiskQueueData_t *req = NULL;
1873 RF_DiskQueue_t *queue;
1874 struct raidbuf *raidbp = (struct raidbuf *) vbp;
1875 struct buf *bp;
1876 struct raid_softc *rs;
1877 int unit;
1878 int s;
1879
1880 s = splbio();
1881 db1_printf(("recovering the request queue:\n"));
1882 req = raidbp->req;
1883
1884 bp = raidbp->rf_obp;
1885
1886 queue = (RF_DiskQueue_t *) req->queue;
1887
1888 if (raidbp->rf_buf.b_flags & B_ERROR) {
1889 bp->b_flags |= B_ERROR;
1890 bp->b_error = raidbp->rf_buf.b_error ?
1891 raidbp->rf_buf.b_error : EIO;
1892 }
1893
1894 /* XXX methinks this could be wrong... */
1895 #if 1
1896 bp->b_resid = raidbp->rf_buf.b_resid;
1897 #endif
1898
1899 if (req->tracerec) {
1900 RF_ETIMER_STOP(req->tracerec->timer);
1901 RF_ETIMER_EVAL(req->tracerec->timer);
1902 RF_LOCK_MUTEX(rf_tracing_mutex);
1903 req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1904 req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1905 req->tracerec->num_phys_ios++;
1906 RF_UNLOCK_MUTEX(rf_tracing_mutex);
1907 }
1908 bp->b_bcount = raidbp->rf_buf.b_bcount; /* XXXX ?? */
1909
1910 unit = queue->raidPtr->raidid; /* *Much* simpler :-> */
1911
1912
1913 /* XXX Ok, let's get aggressive... If B_ERROR is set, let's go
1914 * ballistic, and mark the component as hosed... */
1915
1916 if (bp->b_flags & B_ERROR) {
1917 /* Mark the disk as dead */
1918 /* but only mark it once... */
1919 if (queue->raidPtr->Disks[queue->row][queue->col].status ==
1920 rf_ds_optimal) {
1921 printf("raid%d: IO Error. Marking %s as failed.\n",
1922 unit, queue->raidPtr->Disks[queue->row][queue->col].devname);
1923 queue->raidPtr->Disks[queue->row][queue->col].status =
1924 rf_ds_failed;
1925 queue->raidPtr->status[queue->row] = rf_rs_degraded;
1926 queue->raidPtr->numFailures++;
1927 queue->raidPtr->numNewFailures++;
1928 } else { /* Disk is already dead... */
1929 /* printf("Disk already marked as dead!\n"); */
1930 }
1931
1932 }
1933
1934 rs = &raid_softc[unit];
1935 RAIDPUTBUF(rs, raidbp);
1936
1937 rf_DiskIOComplete(queue, req, (bp->b_flags & B_ERROR) ? 1 : 0);
1938 (req->CompleteFunc) (req->argument, (bp->b_flags & B_ERROR) ? 1 : 0);
1939
1940 splx(s);
1941 }
1942
1943
1944
1945 /*
1946 * initialize a buf structure for doing an I/O in the kernel.
1947 */
1948 static void
1949 InitBP(bp, b_vp, rw_flag, dev, startSect, numSect, buf, cbFunc, cbArg,
1950 logBytesPerSector, b_proc)
1951 struct buf *bp;
1952 struct vnode *b_vp;
1953 unsigned rw_flag;
1954 dev_t dev;
1955 RF_SectorNum_t startSect;
1956 RF_SectorCount_t numSect;
1957 caddr_t buf;
1958 void (*cbFunc) (struct buf *);
1959 void *cbArg;
1960 int logBytesPerSector;
1961 struct proc *b_proc;
1962 {
1963 /* bp->b_flags = B_PHYS | rw_flag; */
1964 bp->b_flags = B_CALL | rw_flag; /* XXX need B_PHYS here too??? */
1965 bp->b_bcount = numSect << logBytesPerSector;
1966 bp->b_bufsize = bp->b_bcount;
1967 bp->b_error = 0;
1968 bp->b_dev = dev;
1969 bp->b_data = buf;
1970 bp->b_blkno = startSect;
1971 bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */
1972 if (bp->b_bcount == 0) {
1973 panic("bp->b_bcount is zero in InitBP!!\n");
1974 }
1975 bp->b_proc = b_proc;
1976 bp->b_iodone = cbFunc;
1977 bp->b_vp = b_vp;
1978
1979 }
1980
1981 static void
1982 raidgetdefaultlabel(raidPtr, rs, lp)
1983 RF_Raid_t *raidPtr;
1984 struct raid_softc *rs;
1985 struct disklabel *lp;
1986 {
1987 db1_printf(("Building a default label...\n"));
1988 memset(lp, 0, sizeof(*lp));
1989
1990 /* fabricate a label... */
1991 lp->d_secperunit = raidPtr->totalSectors;
1992 lp->d_secsize = raidPtr->bytesPerSector;
1993 lp->d_nsectors = raidPtr->Layout.dataSectorsPerStripe;
1994 lp->d_ntracks = 4 * raidPtr->numCol;
1995 lp->d_ncylinders = raidPtr->totalSectors /
1996 (lp->d_nsectors * lp->d_ntracks);
1997 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
1998
1999 strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
2000 lp->d_type = DTYPE_RAID;
2001 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
2002 lp->d_rpm = 3600;
2003 lp->d_interleave = 1;
2004 lp->d_flags = 0;
2005
2006 lp->d_partitions[RAW_PART].p_offset = 0;
2007 lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors;
2008 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
2009 lp->d_npartitions = RAW_PART + 1;
2010
2011 lp->d_magic = DISKMAGIC;
2012 lp->d_magic2 = DISKMAGIC;
2013 lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
2014
2015 }
2016 /*
2017 * Read the disklabel from the raid device. If one is not present, fake one
2018 * up.
2019 */
2020 static void
2021 raidgetdisklabel(dev)
2022 dev_t dev;
2023 {
2024 int unit = raidunit(dev);
2025 struct raid_softc *rs = &raid_softc[unit];
2026 char *errstring;
2027 struct disklabel *lp = rs->sc_dkdev.dk_label;
2028 struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel;
2029 RF_Raid_t *raidPtr;
2030
2031 db1_printf(("Getting the disklabel...\n"));
2032
2033 memset(clp, 0, sizeof(*clp));
2034
2035 raidPtr = raidPtrs[unit];
2036
2037 raidgetdefaultlabel(raidPtr, rs, lp);
2038
2039 /*
2040 * Call the generic disklabel extraction routine.
2041 */
2042 errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
2043 rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
2044 if (errstring)
2045 raidmakedisklabel(rs);
2046 else {
2047 int i;
2048 struct partition *pp;
2049
2050 /*
2051 * Sanity check whether the found disklabel is valid.
2052 *
2053 * This is necessary since total size of the raid device
2054 * may vary when an interleave is changed even though exactly
2055 * same componets are used, and old disklabel may used
2056 * if that is found.
2057 */
2058 if (lp->d_secperunit != rs->sc_size)
2059 printf("raid%d: WARNING: %s: "
2060 "total sector size in disklabel (%d) != "
2061 "the size of raid (%ld)\n", unit, rs->sc_xname,
2062 lp->d_secperunit, (long) rs->sc_size);
2063 for (i = 0; i < lp->d_npartitions; i++) {
2064 pp = &lp->d_partitions[i];
2065 if (pp->p_offset + pp->p_size > rs->sc_size)
2066 printf("raid%d: WARNING: %s: end of partition `%c' "
2067 "exceeds the size of raid (%ld)\n",
2068 unit, rs->sc_xname, 'a' + i, (long) rs->sc_size);
2069 }
2070 }
2071
2072 }
2073 /*
2074 * Take care of things one might want to take care of in the event
2075 * that a disklabel isn't present.
2076 */
2077 static void
2078 raidmakedisklabel(rs)
2079 struct raid_softc *rs;
2080 {
2081 struct disklabel *lp = rs->sc_dkdev.dk_label;
2082 db1_printf(("Making a label..\n"));
2083
2084 /*
2085 * For historical reasons, if there's no disklabel present
2086 * the raw partition must be marked FS_BSDFFS.
2087 */
2088
2089 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
2090
2091 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
2092
2093 lp->d_checksum = dkcksum(lp);
2094 }
2095 /*
2096 * Lookup the provided name in the filesystem. If the file exists,
2097 * is a valid block device, and isn't being used by anyone else,
2098 * set *vpp to the file's vnode.
2099 * You'll find the original of this in ccd.c
2100 */
2101 int
2102 raidlookup(path, p, vpp)
2103 char *path;
2104 struct proc *p;
2105 struct vnode **vpp; /* result */
2106 {
2107 struct nameidata nd;
2108 struct vnode *vp;
2109 struct vattr va;
2110 int error;
2111
2112 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, path, p);
2113 if ((error = vn_open(&nd, FREAD | FWRITE, 0)) != 0) {
2114 #if 0
2115 printf("RAIDframe: vn_open returned %d\n", error);
2116 #endif
2117 return (error);
2118 }
2119 vp = nd.ni_vp;
2120 if (vp->v_usecount > 1) {
2121 VOP_UNLOCK(vp, 0);
2122 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2123 return (EBUSY);
2124 }
2125 if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)) != 0) {
2126 VOP_UNLOCK(vp, 0);
2127 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2128 return (error);
2129 }
2130 /* XXX: eventually we should handle VREG, too. */
2131 if (va.va_type != VBLK) {
2132 VOP_UNLOCK(vp, 0);
2133 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2134 return (ENOTBLK);
2135 }
2136 VOP_UNLOCK(vp, 0);
2137 *vpp = vp;
2138 return (0);
2139 }
2140 /*
2141 * Wait interruptibly for an exclusive lock.
2142 *
2143 * XXX
2144 * Several drivers do this; it should be abstracted and made MP-safe.
2145 * (Hmm... where have we seen this warning before :-> GO )
2146 */
2147 static int
2148 raidlock(rs)
2149 struct raid_softc *rs;
2150 {
2151 int error;
2152
2153 while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
2154 rs->sc_flags |= RAIDF_WANTED;
2155 if ((error =
2156 tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
2157 return (error);
2158 }
2159 rs->sc_flags |= RAIDF_LOCKED;
2160 return (0);
2161 }
2162 /*
2163 * Unlock and wake up any waiters.
2164 */
2165 static void
2166 raidunlock(rs)
2167 struct raid_softc *rs;
2168 {
2169
2170 rs->sc_flags &= ~RAIDF_LOCKED;
2171 if ((rs->sc_flags & RAIDF_WANTED) != 0) {
2172 rs->sc_flags &= ~RAIDF_WANTED;
2173 wakeup(rs);
2174 }
2175 }
2176
2177
2178 #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */
2179 #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */
2180
2181 int
2182 raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter)
2183 {
2184 RF_ComponentLabel_t clabel;
2185 raidread_component_label(dev, b_vp, &clabel);
2186 clabel.mod_counter = mod_counter;
2187 clabel.clean = RF_RAID_CLEAN;
2188 raidwrite_component_label(dev, b_vp, &clabel);
2189 return(0);
2190 }
2191
2192
2193 int
2194 raidmarkdirty(dev_t dev, struct vnode *b_vp, int mod_counter)
2195 {
2196 RF_ComponentLabel_t clabel;
2197 raidread_component_label(dev, b_vp, &clabel);
2198 clabel.mod_counter = mod_counter;
2199 clabel.clean = RF_RAID_DIRTY;
2200 raidwrite_component_label(dev, b_vp, &clabel);
2201 return(0);
2202 }
2203
2204 /* ARGSUSED */
2205 int
2206 raidread_component_label(dev, b_vp, clabel)
2207 dev_t dev;
2208 struct vnode *b_vp;
2209 RF_ComponentLabel_t *clabel;
2210 {
2211 struct buf *bp;
2212 int error;
2213
2214 /* XXX should probably ensure that we don't try to do this if
2215 someone has changed rf_protected_sectors. */
2216
2217 if (b_vp == NULL) {
2218 /* For whatever reason, this component is not valid.
2219 Don't try to read a component label from it. */
2220 return(EINVAL);
2221 }
2222
2223 /* get a block of the appropriate size... */
2224 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2225 bp->b_dev = dev;
2226
2227 /* get our ducks in a row for the read */
2228 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2229 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2230 bp->b_flags |= B_READ;
2231 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2232
2233 (*bdevsw[major(bp->b_dev)].d_strategy)(bp);
2234
2235 error = biowait(bp);
2236
2237 if (!error) {
2238 memcpy(clabel, bp->b_data,
2239 sizeof(RF_ComponentLabel_t));
2240 #if 0
2241 rf_print_component_label( clabel );
2242 #endif
2243 } else {
2244 #if 0
2245 printf("Failed to read RAID component label!\n");
2246 #endif
2247 }
2248
2249 brelse(bp);
2250 return(error);
2251 }
2252 /* ARGSUSED */
2253 int
2254 raidwrite_component_label(dev, b_vp, clabel)
2255 dev_t dev;
2256 struct vnode *b_vp;
2257 RF_ComponentLabel_t *clabel;
2258 {
2259 struct buf *bp;
2260 int error;
2261
2262 /* get a block of the appropriate size... */
2263 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2264 bp->b_dev = dev;
2265
2266 /* get our ducks in a row for the write */
2267 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2268 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2269 bp->b_flags |= B_WRITE;
2270 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2271
2272 memset(bp->b_data, 0, RF_COMPONENT_INFO_SIZE );
2273
2274 memcpy(bp->b_data, clabel, sizeof(RF_ComponentLabel_t));
2275
2276 (*bdevsw[major(bp->b_dev)].d_strategy)(bp);
2277 error = biowait(bp);
2278 brelse(bp);
2279 if (error) {
2280 #if 1
2281 printf("Failed to write RAID component info!\n");
2282 #endif
2283 }
2284
2285 return(error);
2286 }
2287
2288 void
2289 rf_markalldirty(raidPtr)
2290 RF_Raid_t *raidPtr;
2291 {
2292 RF_ComponentLabel_t clabel;
2293 int r,c;
2294
2295 raidPtr->mod_counter++;
2296 for (r = 0; r < raidPtr->numRow; r++) {
2297 for (c = 0; c < raidPtr->numCol; c++) {
2298 /* we don't want to touch (at all) a disk that has
2299 failed */
2300 if (!RF_DEAD_DISK(raidPtr->Disks[r][c].status)) {
2301 raidread_component_label(
2302 raidPtr->Disks[r][c].dev,
2303 raidPtr->raid_cinfo[r][c].ci_vp,
2304 &clabel);
2305 if (clabel.status == rf_ds_spared) {
2306 /* XXX do something special...
2307 but whatever you do, don't
2308 try to access it!! */
2309 } else {
2310 #if 0
2311 clabel.status =
2312 raidPtr->Disks[r][c].status;
2313 raidwrite_component_label(
2314 raidPtr->Disks[r][c].dev,
2315 raidPtr->raid_cinfo[r][c].ci_vp,
2316 &clabel);
2317 #endif
2318 raidmarkdirty(
2319 raidPtr->Disks[r][c].dev,
2320 raidPtr->raid_cinfo[r][c].ci_vp,
2321 raidPtr->mod_counter);
2322 }
2323 }
2324 }
2325 }
2326 /* printf("Component labels marked dirty.\n"); */
2327 #if 0
2328 for( c = 0; c < raidPtr->numSpare ; c++) {
2329 sparecol = raidPtr->numCol + c;
2330 if (raidPtr->Disks[r][sparecol].status == rf_ds_used_spare) {
2331 /*
2332
2333 XXX this is where we get fancy and map this spare
2334 into it's correct spot in the array.
2335
2336 */
2337 /*
2338
2339 we claim this disk is "optimal" if it's
2340 rf_ds_used_spare, as that means it should be
2341 directly substitutable for the disk it replaced.
2342 We note that too...
2343
2344 */
2345
2346 for(i=0;i<raidPtr->numRow;i++) {
2347 for(j=0;j<raidPtr->numCol;j++) {
2348 if ((raidPtr->Disks[i][j].spareRow ==
2349 r) &&
2350 (raidPtr->Disks[i][j].spareCol ==
2351 sparecol)) {
2352 srow = r;
2353 scol = sparecol;
2354 break;
2355 }
2356 }
2357 }
2358
2359 raidread_component_label(
2360 raidPtr->Disks[r][sparecol].dev,
2361 raidPtr->raid_cinfo[r][sparecol].ci_vp,
2362 &clabel);
2363 /* make sure status is noted */
2364 clabel.version = RF_COMPONENT_LABEL_VERSION;
2365 clabel.mod_counter = raidPtr->mod_counter;
2366 clabel.serial_number = raidPtr->serial_number;
2367 clabel.row = srow;
2368 clabel.column = scol;
2369 clabel.num_rows = raidPtr->numRow;
2370 clabel.num_columns = raidPtr->numCol;
2371 clabel.clean = RF_RAID_DIRTY; /* changed in a bit*/
2372 clabel.status = rf_ds_optimal;
2373 raidwrite_component_label(
2374 raidPtr->Disks[r][sparecol].dev,
2375 raidPtr->raid_cinfo[r][sparecol].ci_vp,
2376 &clabel);
2377 raidmarkclean( raidPtr->Disks[r][sparecol].dev,
2378 raidPtr->raid_cinfo[r][sparecol].ci_vp);
2379 }
2380 }
2381
2382 #endif
2383 }
2384
2385
2386 void
2387 rf_update_component_labels(raidPtr, final)
2388 RF_Raid_t *raidPtr;
2389 int final;
2390 {
2391 RF_ComponentLabel_t clabel;
2392 int sparecol;
2393 int r,c;
2394 int i,j;
2395 int srow, scol;
2396
2397 srow = -1;
2398 scol = -1;
2399
2400 /* XXX should do extra checks to make sure things really are clean,
2401 rather than blindly setting the clean bit... */
2402
2403 raidPtr->mod_counter++;
2404
2405 for (r = 0; r < raidPtr->numRow; r++) {
2406 for (c = 0; c < raidPtr->numCol; c++) {
2407 if (raidPtr->Disks[r][c].status == rf_ds_optimal) {
2408 raidread_component_label(
2409 raidPtr->Disks[r][c].dev,
2410 raidPtr->raid_cinfo[r][c].ci_vp,
2411 &clabel);
2412 /* make sure status is noted */
2413 clabel.status = rf_ds_optimal;
2414 /* bump the counter */
2415 clabel.mod_counter = raidPtr->mod_counter;
2416
2417 raidwrite_component_label(
2418 raidPtr->Disks[r][c].dev,
2419 raidPtr->raid_cinfo[r][c].ci_vp,
2420 &clabel);
2421 if (final == RF_FINAL_COMPONENT_UPDATE) {
2422 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2423 raidmarkclean(
2424 raidPtr->Disks[r][c].dev,
2425 raidPtr->raid_cinfo[r][c].ci_vp,
2426 raidPtr->mod_counter);
2427 }
2428 }
2429 }
2430 /* else we don't touch it.. */
2431 }
2432 }
2433
2434 for( c = 0; c < raidPtr->numSpare ; c++) {
2435 sparecol = raidPtr->numCol + c;
2436 /* Need to ensure that the reconstruct actually completed! */
2437 if (raidPtr->Disks[0][sparecol].status == rf_ds_used_spare) {
2438 /*
2439
2440 we claim this disk is "optimal" if it's
2441 rf_ds_used_spare, as that means it should be
2442 directly substitutable for the disk it replaced.
2443 We note that too...
2444
2445 */
2446
2447 for(i=0;i<raidPtr->numRow;i++) {
2448 for(j=0;j<raidPtr->numCol;j++) {
2449 if ((raidPtr->Disks[i][j].spareRow ==
2450 0) &&
2451 (raidPtr->Disks[i][j].spareCol ==
2452 sparecol)) {
2453 srow = i;
2454 scol = j;
2455 break;
2456 }
2457 }
2458 }
2459
2460 /* XXX shouldn't *really* need this... */
2461 raidread_component_label(
2462 raidPtr->Disks[0][sparecol].dev,
2463 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2464 &clabel);
2465 /* make sure status is noted */
2466
2467 raid_init_component_label(raidPtr, &clabel);
2468
2469 clabel.mod_counter = raidPtr->mod_counter;
2470 clabel.row = srow;
2471 clabel.column = scol;
2472 clabel.status = rf_ds_optimal;
2473
2474 raidwrite_component_label(
2475 raidPtr->Disks[0][sparecol].dev,
2476 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2477 &clabel);
2478 if (final == RF_FINAL_COMPONENT_UPDATE) {
2479 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2480 raidmarkclean( raidPtr->Disks[0][sparecol].dev,
2481 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2482 raidPtr->mod_counter);
2483 }
2484 }
2485 }
2486 }
2487 /* printf("Component labels updated\n"); */
2488 }
2489
2490 void
2491 rf_close_component(raidPtr, vp, auto_configured)
2492 RF_Raid_t *raidPtr;
2493 struct vnode *vp;
2494 int auto_configured;
2495 {
2496 struct proc *p;
2497
2498 p = raidPtr->engine_thread;
2499
2500 if (vp != NULL) {
2501 if (auto_configured == 1) {
2502 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2503 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
2504 vput(vp);
2505
2506 } else {
2507 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2508 }
2509 } else {
2510 #if 0
2511 printf("vnode was NULL\n");
2512 #endif
2513 }
2514 }
2515
2516
2517 void
2518 rf_UnconfigureVnodes(raidPtr)
2519 RF_Raid_t *raidPtr;
2520 {
2521 int r,c;
2522 struct proc *p;
2523 struct vnode *vp;
2524 int acd;
2525
2526
2527 /* We take this opportunity to close the vnodes like we should.. */
2528
2529 p = raidPtr->engine_thread;
2530
2531 for (r = 0; r < raidPtr->numRow; r++) {
2532 for (c = 0; c < raidPtr->numCol; c++) {
2533 #if 0
2534 printf("raid%d: Closing vnode for row: %d col: %d\n",
2535 raidPtr->raidid, r, c);
2536 #endif
2537 vp = raidPtr->raid_cinfo[r][c].ci_vp;
2538 acd = raidPtr->Disks[r][c].auto_configured;
2539 rf_close_component(raidPtr, vp, acd);
2540 raidPtr->raid_cinfo[r][c].ci_vp = NULL;
2541 raidPtr->Disks[r][c].auto_configured = 0;
2542 }
2543 }
2544 for (r = 0; r < raidPtr->numSpare; r++) {
2545 #if 0
2546 printf("raid%d: Closing vnode for spare: %d\n",
2547 raidPtr->raidid, r);
2548 #endif
2549 vp = raidPtr->raid_cinfo[0][raidPtr->numCol + r].ci_vp;
2550 acd = raidPtr->Disks[0][raidPtr->numCol + r].auto_configured;
2551 rf_close_component(raidPtr, vp, acd);
2552 raidPtr->raid_cinfo[0][raidPtr->numCol + r].ci_vp = NULL;
2553 raidPtr->Disks[0][raidPtr->numCol + r].auto_configured = 0;
2554 }
2555 }
2556
2557
2558 void
2559 rf_ReconThread(req)
2560 struct rf_recon_req *req;
2561 {
2562 int s;
2563 RF_Raid_t *raidPtr;
2564
2565 s = splbio();
2566 raidPtr = (RF_Raid_t *) req->raidPtr;
2567 raidPtr->recon_in_progress = 1;
2568
2569 rf_FailDisk((RF_Raid_t *) req->raidPtr, req->row, req->col,
2570 ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
2571
2572 /* XXX get rid of this! we don't need it at all.. */
2573 RF_Free(req, sizeof(*req));
2574
2575 raidPtr->recon_in_progress = 0;
2576 splx(s);
2577
2578 /* That's all... */
2579 kthread_exit(0); /* does not return */
2580 }
2581
2582 void
2583 rf_RewriteParityThread(raidPtr)
2584 RF_Raid_t *raidPtr;
2585 {
2586 int retcode;
2587 int s;
2588
2589 raidPtr->parity_rewrite_in_progress = 1;
2590 s = splbio();
2591 retcode = rf_RewriteParity(raidPtr);
2592 splx(s);
2593 if (retcode) {
2594 printf("raid%d: Error re-writing parity!\n",raidPtr->raidid);
2595 } else {
2596 /* set the clean bit! If we shutdown correctly,
2597 the clean bit on each component label will get
2598 set */
2599 raidPtr->parity_good = RF_RAID_CLEAN;
2600 }
2601 raidPtr->parity_rewrite_in_progress = 0;
2602
2603 /* Anyone waiting for us to stop? If so, inform them... */
2604 if (raidPtr->waitShutdown) {
2605 wakeup(&raidPtr->parity_rewrite_in_progress);
2606 }
2607
2608 /* That's all... */
2609 kthread_exit(0); /* does not return */
2610 }
2611
2612
2613 void
2614 rf_CopybackThread(raidPtr)
2615 RF_Raid_t *raidPtr;
2616 {
2617 int s;
2618
2619 raidPtr->copyback_in_progress = 1;
2620 s = splbio();
2621 rf_CopybackReconstructedData(raidPtr);
2622 splx(s);
2623 raidPtr->copyback_in_progress = 0;
2624
2625 /* That's all... */
2626 kthread_exit(0); /* does not return */
2627 }
2628
2629
2630 void
2631 rf_ReconstructInPlaceThread(req)
2632 struct rf_recon_req *req;
2633 {
2634 int retcode;
2635 int s;
2636 RF_Raid_t *raidPtr;
2637
2638 s = splbio();
2639 raidPtr = req->raidPtr;
2640 raidPtr->recon_in_progress = 1;
2641 retcode = rf_ReconstructInPlace(raidPtr, req->row, req->col);
2642 RF_Free(req, sizeof(*req));
2643 raidPtr->recon_in_progress = 0;
2644 splx(s);
2645
2646 /* That's all... */
2647 kthread_exit(0); /* does not return */
2648 }
2649
2650 void
2651 rf_mountroot_hook(dev)
2652 struct device *dev;
2653 {
2654
2655 }
2656
2657
2658 RF_AutoConfig_t *
2659 rf_find_raid_components()
2660 {
2661 struct devnametobdevmaj *dtobdm;
2662 struct vnode *vp;
2663 struct disklabel label;
2664 struct device *dv;
2665 char *cd_name;
2666 dev_t dev;
2667 int error;
2668 int i;
2669 int good_one;
2670 RF_ComponentLabel_t *clabel;
2671 RF_AutoConfig_t *ac_list;
2672 RF_AutoConfig_t *ac;
2673
2674
2675 /* initialize the AutoConfig list */
2676 ac_list = NULL;
2677
2678 /* we begin by trolling through *all* the devices on the system */
2679
2680 for (dv = alldevs.tqh_first; dv != NULL;
2681 dv = dv->dv_list.tqe_next) {
2682
2683 /* we are only interested in disks... */
2684 if (dv->dv_class != DV_DISK)
2685 continue;
2686
2687 /* we don't care about floppies... */
2688 if (!strcmp(dv->dv_cfdata->cf_driver->cd_name,"fd")) {
2689 continue;
2690 }
2691 /* hdfd is the Atari/Hades floppy driver */
2692 if (!strcmp(dv->dv_cfdata->cf_driver->cd_name,"hdfd")) {
2693 continue;
2694 }
2695 /* fdisa is the Atari/Milan floppy driver */
2696 if (!strcmp(dv->dv_cfdata->cf_driver->cd_name,"fdisa")) {
2697 continue;
2698 }
2699
2700 /* need to find the device_name_to_block_device_major stuff */
2701 cd_name = dv->dv_cfdata->cf_driver->cd_name;
2702 dtobdm = dev_name2blk;
2703 while (dtobdm->d_name && strcmp(dtobdm->d_name, cd_name)) {
2704 dtobdm++;
2705 }
2706
2707 /* get a vnode for the raw partition of this disk */
2708
2709 dev = MAKEDISKDEV(dtobdm->d_maj, dv->dv_unit, RAW_PART);
2710 if (bdevvp(dev, &vp))
2711 panic("RAID can't alloc vnode");
2712
2713 error = VOP_OPEN(vp, FREAD, NOCRED, 0);
2714
2715 if (error) {
2716 /* "Who cares." Continue looking
2717 for something that exists*/
2718 vput(vp);
2719 continue;
2720 }
2721
2722 /* Ok, the disk exists. Go get the disklabel. */
2723 error = VOP_IOCTL(vp, DIOCGDINFO, (caddr_t)&label,
2724 FREAD, NOCRED, 0);
2725 if (error) {
2726 /*
2727 * XXX can't happen - open() would
2728 * have errored out (or faked up one)
2729 */
2730 printf("can't get label for dev %s%c (%d)!?!?\n",
2731 dv->dv_xname, 'a' + RAW_PART, error);
2732 }
2733
2734 /* don't need this any more. We'll allocate it again
2735 a little later if we really do... */
2736 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2737 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
2738 vput(vp);
2739
2740 for (i=0; i < label.d_npartitions; i++) {
2741 /* We only support partitions marked as RAID */
2742 if (label.d_partitions[i].p_fstype != FS_RAID)
2743 continue;
2744
2745 dev = MAKEDISKDEV(dtobdm->d_maj, dv->dv_unit, i);
2746 if (bdevvp(dev, &vp))
2747 panic("RAID can't alloc vnode");
2748
2749 error = VOP_OPEN(vp, FREAD, NOCRED, 0);
2750 if (error) {
2751 /* Whatever... */
2752 vput(vp);
2753 continue;
2754 }
2755
2756 good_one = 0;
2757
2758 clabel = (RF_ComponentLabel_t *)
2759 malloc(sizeof(RF_ComponentLabel_t),
2760 M_RAIDFRAME, M_NOWAIT);
2761 if (clabel == NULL) {
2762 /* XXX CLEANUP HERE */
2763 printf("RAID auto config: out of memory!\n");
2764 return(NULL); /* XXX probably should panic? */
2765 }
2766
2767 if (!raidread_component_label(dev, vp, clabel)) {
2768 /* Got the label. Does it look reasonable? */
2769 if (rf_reasonable_label(clabel) &&
2770 (clabel->partitionSize <=
2771 label.d_partitions[i].p_size)) {
2772 #if DEBUG
2773 printf("Component on: %s%c: %d\n",
2774 dv->dv_xname, 'a'+i,
2775 label.d_partitions[i].p_size);
2776 rf_print_component_label(clabel);
2777 #endif
2778 /* if it's reasonable, add it,
2779 else ignore it. */
2780 ac = (RF_AutoConfig_t *)
2781 malloc(sizeof(RF_AutoConfig_t),
2782 M_RAIDFRAME,
2783 M_NOWAIT);
2784 if (ac == NULL) {
2785 /* XXX should panic?? */
2786 return(NULL);
2787 }
2788
2789 sprintf(ac->devname, "%s%c",
2790 dv->dv_xname, 'a'+i);
2791 ac->dev = dev;
2792 ac->vp = vp;
2793 ac->clabel = clabel;
2794 ac->next = ac_list;
2795 ac_list = ac;
2796 good_one = 1;
2797 }
2798 }
2799 if (!good_one) {
2800 /* cleanup */
2801 free(clabel, M_RAIDFRAME);
2802 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2803 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
2804 vput(vp);
2805 }
2806 }
2807 }
2808 return(ac_list);
2809 }
2810
2811 static int
2812 rf_reasonable_label(clabel)
2813 RF_ComponentLabel_t *clabel;
2814 {
2815
2816 if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) ||
2817 (clabel->version==RF_COMPONENT_LABEL_VERSION)) &&
2818 ((clabel->clean == RF_RAID_CLEAN) ||
2819 (clabel->clean == RF_RAID_DIRTY)) &&
2820 clabel->row >=0 &&
2821 clabel->column >= 0 &&
2822 clabel->num_rows > 0 &&
2823 clabel->num_columns > 0 &&
2824 clabel->row < clabel->num_rows &&
2825 clabel->column < clabel->num_columns &&
2826 clabel->blockSize > 0 &&
2827 clabel->numBlocks > 0) {
2828 /* label looks reasonable enough... */
2829 return(1);
2830 }
2831 return(0);
2832 }
2833
2834
2835 void
2836 rf_print_component_label(clabel)
2837 RF_ComponentLabel_t *clabel;
2838 {
2839 printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
2840 clabel->row, clabel->column,
2841 clabel->num_rows, clabel->num_columns);
2842 printf(" Version: %d Serial Number: %d Mod Counter: %d\n",
2843 clabel->version, clabel->serial_number,
2844 clabel->mod_counter);
2845 printf(" Clean: %s Status: %d\n",
2846 clabel->clean ? "Yes" : "No", clabel->status );
2847 printf(" sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
2848 clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
2849 printf(" RAID Level: %c blocksize: %d numBlocks: %d\n",
2850 (char) clabel->parityConfig, clabel->blockSize,
2851 clabel->numBlocks);
2852 printf(" Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No" );
2853 printf(" Contains root partition: %s\n",
2854 clabel->root_partition ? "Yes" : "No" );
2855 printf(" Last configured as: raid%d\n", clabel->last_unit );
2856 #if 0
2857 printf(" Config order: %d\n", clabel->config_order);
2858 #endif
2859
2860 }
2861
2862 RF_ConfigSet_t *
2863 rf_create_auto_sets(ac_list)
2864 RF_AutoConfig_t *ac_list;
2865 {
2866 RF_AutoConfig_t *ac;
2867 RF_ConfigSet_t *config_sets;
2868 RF_ConfigSet_t *cset;
2869 RF_AutoConfig_t *ac_next;
2870
2871
2872 config_sets = NULL;
2873
2874 /* Go through the AutoConfig list, and figure out which components
2875 belong to what sets. */
2876 ac = ac_list;
2877 while(ac!=NULL) {
2878 /* we're going to putz with ac->next, so save it here
2879 for use at the end of the loop */
2880 ac_next = ac->next;
2881
2882 if (config_sets == NULL) {
2883 /* will need at least this one... */
2884 config_sets = (RF_ConfigSet_t *)
2885 malloc(sizeof(RF_ConfigSet_t),
2886 M_RAIDFRAME, M_NOWAIT);
2887 if (config_sets == NULL) {
2888 panic("rf_create_auto_sets: No memory!\n");
2889 }
2890 /* this one is easy :) */
2891 config_sets->ac = ac;
2892 config_sets->next = NULL;
2893 config_sets->rootable = 0;
2894 ac->next = NULL;
2895 } else {
2896 /* which set does this component fit into? */
2897 cset = config_sets;
2898 while(cset!=NULL) {
2899 if (rf_does_it_fit(cset, ac)) {
2900 /* looks like it matches... */
2901 ac->next = cset->ac;
2902 cset->ac = ac;
2903 break;
2904 }
2905 cset = cset->next;
2906 }
2907 if (cset==NULL) {
2908 /* didn't find a match above... new set..*/
2909 cset = (RF_ConfigSet_t *)
2910 malloc(sizeof(RF_ConfigSet_t),
2911 M_RAIDFRAME, M_NOWAIT);
2912 if (cset == NULL) {
2913 panic("rf_create_auto_sets: No memory!\n");
2914 }
2915 cset->ac = ac;
2916 ac->next = NULL;
2917 cset->next = config_sets;
2918 cset->rootable = 0;
2919 config_sets = cset;
2920 }
2921 }
2922 ac = ac_next;
2923 }
2924
2925
2926 return(config_sets);
2927 }
2928
2929 static int
2930 rf_does_it_fit(cset, ac)
2931 RF_ConfigSet_t *cset;
2932 RF_AutoConfig_t *ac;
2933 {
2934 RF_ComponentLabel_t *clabel1, *clabel2;
2935
2936 /* If this one matches the *first* one in the set, that's good
2937 enough, since the other members of the set would have been
2938 through here too... */
2939 /* note that we are not checking partitionSize here..
2940
2941 Note that we are also not checking the mod_counters here.
2942 If everything else matches execpt the mod_counter, that's
2943 good enough for this test. We will deal with the mod_counters
2944 a little later in the autoconfiguration process.
2945
2946 (clabel1->mod_counter == clabel2->mod_counter) &&
2947
2948 The reason we don't check for this is that failed disks
2949 will have lower modification counts. If those disks are
2950 not added to the set they used to belong to, then they will
2951 form their own set, which may result in 2 different sets,
2952 for example, competing to be configured at raid0, and
2953 perhaps competing to be the root filesystem set. If the
2954 wrong ones get configured, or both attempt to become /,
2955 weird behaviour and or serious lossage will occur. Thus we
2956 need to bring them into the fold here, and kick them out at
2957 a later point.
2958
2959 */
2960
2961 clabel1 = cset->ac->clabel;
2962 clabel2 = ac->clabel;
2963 if ((clabel1->version == clabel2->version) &&
2964 (clabel1->serial_number == clabel2->serial_number) &&
2965 (clabel1->num_rows == clabel2->num_rows) &&
2966 (clabel1->num_columns == clabel2->num_columns) &&
2967 (clabel1->sectPerSU == clabel2->sectPerSU) &&
2968 (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
2969 (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
2970 (clabel1->parityConfig == clabel2->parityConfig) &&
2971 (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
2972 (clabel1->blockSize == clabel2->blockSize) &&
2973 (clabel1->numBlocks == clabel2->numBlocks) &&
2974 (clabel1->autoconfigure == clabel2->autoconfigure) &&
2975 (clabel1->root_partition == clabel2->root_partition) &&
2976 (clabel1->last_unit == clabel2->last_unit) &&
2977 (clabel1->config_order == clabel2->config_order)) {
2978 /* if it get's here, it almost *has* to be a match */
2979 } else {
2980 /* it's not consistent with somebody in the set..
2981 punt */
2982 return(0);
2983 }
2984 /* all was fine.. it must fit... */
2985 return(1);
2986 }
2987
2988 int
2989 rf_have_enough_components(cset)
2990 RF_ConfigSet_t *cset;
2991 {
2992 RF_AutoConfig_t *ac;
2993 RF_AutoConfig_t *auto_config;
2994 RF_ComponentLabel_t *clabel;
2995 int r,c;
2996 int num_rows;
2997 int num_cols;
2998 int num_missing;
2999 int mod_counter;
3000 int mod_counter_found;
3001 int even_pair_failed;
3002 char parity_type;
3003
3004
3005 /* check to see that we have enough 'live' components
3006 of this set. If so, we can configure it if necessary */
3007
3008 num_rows = cset->ac->clabel->num_rows;
3009 num_cols = cset->ac->clabel->num_columns;
3010 parity_type = cset->ac->clabel->parityConfig;
3011
3012 /* XXX Check for duplicate components!?!?!? */
3013
3014 /* Determine what the mod_counter is supposed to be for this set. */
3015
3016 mod_counter_found = 0;
3017 mod_counter = 0;
3018 ac = cset->ac;
3019 while(ac!=NULL) {
3020 if (mod_counter_found==0) {
3021 mod_counter = ac->clabel->mod_counter;
3022 mod_counter_found = 1;
3023 } else {
3024 if (ac->clabel->mod_counter > mod_counter) {
3025 mod_counter = ac->clabel->mod_counter;
3026 }
3027 }
3028 ac = ac->next;
3029 }
3030
3031 num_missing = 0;
3032 auto_config = cset->ac;
3033
3034 for(r=0; r<num_rows; r++) {
3035 even_pair_failed = 0;
3036 for(c=0; c<num_cols; c++) {
3037 ac = auto_config;
3038 while(ac!=NULL) {
3039 if ((ac->clabel->row == r) &&
3040 (ac->clabel->column == c) &&
3041 (ac->clabel->mod_counter == mod_counter)) {
3042 /* it's this one... */
3043 #if DEBUG
3044 printf("Found: %s at %d,%d\n",
3045 ac->devname,r,c);
3046 #endif
3047 break;
3048 }
3049 ac=ac->next;
3050 }
3051 if (ac==NULL) {
3052 /* Didn't find one here! */
3053 /* special case for RAID 1, especially
3054 where there are more than 2
3055 components (where RAIDframe treats
3056 things a little differently :( ) */
3057 if (parity_type == '1') {
3058 if (c%2 == 0) { /* even component */
3059 even_pair_failed = 1;
3060 } else { /* odd component. If
3061 we're failed, and
3062 so is the even
3063 component, it's
3064 "Good Night, Charlie" */
3065 if (even_pair_failed == 1) {
3066 return(0);
3067 }
3068 }
3069 } else {
3070 /* normal accounting */
3071 num_missing++;
3072 }
3073 }
3074 if ((parity_type == '1') && (c%2 == 1)) {
3075 /* Just did an even component, and we didn't
3076 bail.. reset the even_pair_failed flag,
3077 and go on to the next component.... */
3078 even_pair_failed = 0;
3079 }
3080 }
3081 }
3082
3083 clabel = cset->ac->clabel;
3084
3085 if (((clabel->parityConfig == '0') && (num_missing > 0)) ||
3086 ((clabel->parityConfig == '4') && (num_missing > 1)) ||
3087 ((clabel->parityConfig == '5') && (num_missing > 1))) {
3088 /* XXX this needs to be made *much* more general */
3089 /* Too many failures */
3090 return(0);
3091 }
3092 /* otherwise, all is well, and we've got enough to take a kick
3093 at autoconfiguring this set */
3094 return(1);
3095 }
3096
3097 void
3098 rf_create_configuration(ac,config,raidPtr)
3099 RF_AutoConfig_t *ac;
3100 RF_Config_t *config;
3101 RF_Raid_t *raidPtr;
3102 {
3103 RF_ComponentLabel_t *clabel;
3104 int i;
3105
3106 clabel = ac->clabel;
3107
3108 /* 1. Fill in the common stuff */
3109 config->numRow = clabel->num_rows;
3110 config->numCol = clabel->num_columns;
3111 config->numSpare = 0; /* XXX should this be set here? */
3112 config->sectPerSU = clabel->sectPerSU;
3113 config->SUsPerPU = clabel->SUsPerPU;
3114 config->SUsPerRU = clabel->SUsPerRU;
3115 config->parityConfig = clabel->parityConfig;
3116 /* XXX... */
3117 strcpy(config->diskQueueType,"fifo");
3118 config->maxOutstandingDiskReqs = clabel->maxOutstanding;
3119 config->layoutSpecificSize = 0; /* XXX ?? */
3120
3121 while(ac!=NULL) {
3122 /* row/col values will be in range due to the checks
3123 in reasonable_label() */
3124 strcpy(config->devnames[ac->clabel->row][ac->clabel->column],
3125 ac->devname);
3126 ac = ac->next;
3127 }
3128
3129 for(i=0;i<RF_MAXDBGV;i++) {
3130 config->debugVars[i][0] = NULL;
3131 }
3132 }
3133
3134 int
3135 rf_set_autoconfig(raidPtr, new_value)
3136 RF_Raid_t *raidPtr;
3137 int new_value;
3138 {
3139 RF_ComponentLabel_t clabel;
3140 struct vnode *vp;
3141 dev_t dev;
3142 int row, column;
3143
3144 raidPtr->autoconfigure = new_value;
3145 for(row=0; row<raidPtr->numRow; row++) {
3146 for(column=0; column<raidPtr->numCol; column++) {
3147 if (raidPtr->Disks[row][column].status ==
3148 rf_ds_optimal) {
3149 dev = raidPtr->Disks[row][column].dev;
3150 vp = raidPtr->raid_cinfo[row][column].ci_vp;
3151 raidread_component_label(dev, vp, &clabel);
3152 clabel.autoconfigure = new_value;
3153 raidwrite_component_label(dev, vp, &clabel);
3154 }
3155 }
3156 }
3157 return(new_value);
3158 }
3159
3160 int
3161 rf_set_rootpartition(raidPtr, new_value)
3162 RF_Raid_t *raidPtr;
3163 int new_value;
3164 {
3165 RF_ComponentLabel_t clabel;
3166 struct vnode *vp;
3167 dev_t dev;
3168 int row, column;
3169
3170 raidPtr->root_partition = new_value;
3171 for(row=0; row<raidPtr->numRow; row++) {
3172 for(column=0; column<raidPtr->numCol; column++) {
3173 if (raidPtr->Disks[row][column].status ==
3174 rf_ds_optimal) {
3175 dev = raidPtr->Disks[row][column].dev;
3176 vp = raidPtr->raid_cinfo[row][column].ci_vp;
3177 raidread_component_label(dev, vp, &clabel);
3178 clabel.root_partition = new_value;
3179 raidwrite_component_label(dev, vp, &clabel);
3180 }
3181 }
3182 }
3183 return(new_value);
3184 }
3185
3186 void
3187 rf_release_all_vps(cset)
3188 RF_ConfigSet_t *cset;
3189 {
3190 RF_AutoConfig_t *ac;
3191
3192 ac = cset->ac;
3193 while(ac!=NULL) {
3194 /* Close the vp, and give it back */
3195 if (ac->vp) {
3196 vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY);
3197 VOP_CLOSE(ac->vp, FREAD, NOCRED, 0);
3198 vput(ac->vp);
3199 ac->vp = NULL;
3200 }
3201 ac = ac->next;
3202 }
3203 }
3204
3205
3206 void
3207 rf_cleanup_config_set(cset)
3208 RF_ConfigSet_t *cset;
3209 {
3210 RF_AutoConfig_t *ac;
3211 RF_AutoConfig_t *next_ac;
3212
3213 ac = cset->ac;
3214 while(ac!=NULL) {
3215 next_ac = ac->next;
3216 /* nuke the label */
3217 free(ac->clabel, M_RAIDFRAME);
3218 /* cleanup the config structure */
3219 free(ac, M_RAIDFRAME);
3220 /* "next.." */
3221 ac = next_ac;
3222 }
3223 /* and, finally, nuke the config set */
3224 free(cset, M_RAIDFRAME);
3225 }
3226
3227
3228 void
3229 raid_init_component_label(raidPtr, clabel)
3230 RF_Raid_t *raidPtr;
3231 RF_ComponentLabel_t *clabel;
3232 {
3233 /* current version number */
3234 clabel->version = RF_COMPONENT_LABEL_VERSION;
3235 clabel->serial_number = raidPtr->serial_number;
3236 clabel->mod_counter = raidPtr->mod_counter;
3237 clabel->num_rows = raidPtr->numRow;
3238 clabel->num_columns = raidPtr->numCol;
3239 clabel->clean = RF_RAID_DIRTY; /* not clean */
3240 clabel->status = rf_ds_optimal; /* "It's good!" */
3241
3242 clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
3243 clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
3244 clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
3245
3246 clabel->blockSize = raidPtr->bytesPerSector;
3247 clabel->numBlocks = raidPtr->sectorsPerDisk;
3248
3249 /* XXX not portable */
3250 clabel->parityConfig = raidPtr->Layout.map->parityConfig;
3251 clabel->maxOutstanding = raidPtr->maxOutstanding;
3252 clabel->autoconfigure = raidPtr->autoconfigure;
3253 clabel->root_partition = raidPtr->root_partition;
3254 clabel->last_unit = raidPtr->raidid;
3255 clabel->config_order = raidPtr->config_order;
3256 }
3257
3258 int
3259 rf_auto_config_set(cset,unit)
3260 RF_ConfigSet_t *cset;
3261 int *unit;
3262 {
3263 RF_Raid_t *raidPtr;
3264 RF_Config_t *config;
3265 int raidID;
3266 int retcode;
3267
3268 #if DEBUG
3269 printf("RAID autoconfigure\n");
3270 #endif
3271
3272 retcode = 0;
3273 *unit = -1;
3274
3275 /* 1. Create a config structure */
3276
3277 config = (RF_Config_t *)malloc(sizeof(RF_Config_t),
3278 M_RAIDFRAME,
3279 M_NOWAIT);
3280 if (config==NULL) {
3281 printf("Out of mem!?!?\n");
3282 /* XXX do something more intelligent here. */
3283 return(1);
3284 }
3285
3286 memset(config, 0, sizeof(RF_Config_t));
3287
3288 /* XXX raidID needs to be set correctly.. */
3289
3290 /*
3291 2. Figure out what RAID ID this one is supposed to live at
3292 See if we can get the same RAID dev that it was configured
3293 on last time..
3294 */
3295
3296 raidID = cset->ac->clabel->last_unit;
3297 if ((raidID < 0) || (raidID >= numraid)) {
3298 /* let's not wander off into lala land. */
3299 raidID = numraid - 1;
3300 }
3301 if (raidPtrs[raidID]->valid != 0) {
3302
3303 /*
3304 Nope... Go looking for an alternative...
3305 Start high so we don't immediately use raid0 if that's
3306 not taken.
3307 */
3308
3309 for(raidID = numraid - 1; raidID >= 0; raidID--) {
3310 if (raidPtrs[raidID]->valid == 0) {
3311 /* can use this one! */
3312 break;
3313 }
3314 }
3315 }
3316
3317 if (raidID < 0) {
3318 /* punt... */
3319 printf("Unable to auto configure this set!\n");
3320 printf("(Out of RAID devs!)\n");
3321 return(1);
3322 }
3323
3324 #if DEBUG
3325 printf("Configuring raid%d:\n",raidID);
3326 #endif
3327
3328 raidPtr = raidPtrs[raidID];
3329
3330 /* XXX all this stuff should be done SOMEWHERE ELSE! */
3331 raidPtr->raidid = raidID;
3332 raidPtr->openings = RAIDOUTSTANDING;
3333
3334 /* 3. Build the configuration structure */
3335 rf_create_configuration(cset->ac, config, raidPtr);
3336
3337 /* 4. Do the configuration */
3338 retcode = rf_Configure(raidPtr, config, cset->ac);
3339
3340 if (retcode == 0) {
3341
3342 raidinit(raidPtrs[raidID]);
3343
3344 rf_markalldirty(raidPtrs[raidID]);
3345 raidPtrs[raidID]->autoconfigure = 1; /* XXX do this here? */
3346 if (cset->ac->clabel->root_partition==1) {
3347 /* everything configured just fine. Make a note
3348 that this set is eligible to be root. */
3349 cset->rootable = 1;
3350 /* XXX do this here? */
3351 raidPtrs[raidID]->root_partition = 1;
3352 }
3353 }
3354
3355 /* 5. Cleanup */
3356 free(config, M_RAIDFRAME);
3357
3358 *unit = raidID;
3359 return(retcode);
3360 }
3361
3362 void
3363 rf_disk_unbusy(desc)
3364 RF_RaidAccessDesc_t *desc;
3365 {
3366 struct buf *bp;
3367
3368 bp = (struct buf *)desc->bp;
3369 disk_unbusy(&raid_softc[desc->raidPtr->raidid].sc_dkdev,
3370 (bp->b_bcount - bp->b_resid));
3371 }
3372