rf_netbsdkintf.c revision 1.104.2.17 1 /* $NetBSD: rf_netbsdkintf.c,v 1.104.2.17 2002/11/11 22:11:57 nathanw Exp $ */
2 /*-
3 * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
4 * All rights reserved.
5 *
6 * This code is derived from software contributed to The NetBSD Foundation
7 * by Greg Oster; Jason R. Thorpe.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed by the NetBSD
20 * Foundation, Inc. and its contributors.
21 * 4. Neither the name of The NetBSD Foundation nor the names of its
22 * contributors may be used to endorse or promote products derived
23 * from this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
26 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
36 */
37
38 /*
39 * Copyright (c) 1988 University of Utah.
40 * Copyright (c) 1990, 1993
41 * The Regents of the University of California. All rights reserved.
42 *
43 * This code is derived from software contributed to Berkeley by
44 * the Systems Programming Group of the University of Utah Computer
45 * Science Department.
46 *
47 * Redistribution and use in source and binary forms, with or without
48 * modification, are permitted provided that the following conditions
49 * are met:
50 * 1. Redistributions of source code must retain the above copyright
51 * notice, this list of conditions and the following disclaimer.
52 * 2. Redistributions in binary form must reproduce the above copyright
53 * notice, this list of conditions and the following disclaimer in the
54 * documentation and/or other materials provided with the distribution.
55 * 3. All advertising materials mentioning features or use of this software
56 * must display the following acknowledgement:
57 * This product includes software developed by the University of
58 * California, Berkeley and its contributors.
59 * 4. Neither the name of the University nor the names of its contributors
60 * may be used to endorse or promote products derived from this software
61 * without specific prior written permission.
62 *
63 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
64 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
65 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
66 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
67 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
68 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
69 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
70 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
71 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
72 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
73 * SUCH DAMAGE.
74 *
75 * from: Utah $Hdr: cd.c 1.6 90/11/28$
76 *
77 * @(#)cd.c 8.2 (Berkeley) 11/16/93
78 */
79
80
81
82
83 /*
84 * Copyright (c) 1995 Carnegie-Mellon University.
85 * All rights reserved.
86 *
87 * Authors: Mark Holland, Jim Zelenka
88 *
89 * Permission to use, copy, modify and distribute this software and
90 * its documentation is hereby granted, provided that both the copyright
91 * notice and this permission notice appear in all copies of the
92 * software, derivative works or modified versions, and any portions
93 * thereof, and that both notices appear in supporting documentation.
94 *
95 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
96 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
97 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
98 *
99 * Carnegie Mellon requests users of this software to return to
100 *
101 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
102 * School of Computer Science
103 * Carnegie Mellon University
104 * Pittsburgh PA 15213-3890
105 *
106 * any improvements or extensions that they make and grant Carnegie the
107 * rights to redistribute these changes.
108 */
109
110 /***********************************************************
111 *
112 * rf_kintf.c -- the kernel interface routines for RAIDframe
113 *
114 ***********************************************************/
115
116 #include <sys/cdefs.h>
117 __KERNEL_RCSID(0, "$NetBSD: rf_netbsdkintf.c,v 1.104.2.17 2002/11/11 22:11:57 nathanw Exp $");
118
119 #include <sys/param.h>
120 #include <sys/errno.h>
121 #include <sys/pool.h>
122 #include <sys/proc.h>
123 #include <sys/queue.h>
124 #include <sys/disk.h>
125 #include <sys/device.h>
126 #include <sys/stat.h>
127 #include <sys/ioctl.h>
128 #include <sys/fcntl.h>
129 #include <sys/systm.h>
130 #include <sys/namei.h>
131 #include <sys/vnode.h>
132 #include <sys/disklabel.h>
133 #include <sys/conf.h>
134 #include <sys/lock.h>
135 #include <sys/buf.h>
136 #include <sys/user.h>
137 #include <sys/reboot.h>
138
139 #include <dev/raidframe/raidframevar.h>
140 #include <dev/raidframe/raidframeio.h>
141 #include "raid.h"
142 #include "opt_raid_autoconfig.h"
143 #include "rf_raid.h"
144 #include "rf_copyback.h"
145 #include "rf_dag.h"
146 #include "rf_dagflags.h"
147 #include "rf_desc.h"
148 #include "rf_diskqueue.h"
149 #include "rf_etimer.h"
150 #include "rf_general.h"
151 #include "rf_kintf.h"
152 #include "rf_options.h"
153 #include "rf_driver.h"
154 #include "rf_parityscan.h"
155 #include "rf_threadstuff.h"
156
157 #ifdef DEBUG
158 int rf_kdebug_level = 0;
159 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
160 #else /* DEBUG */
161 #define db1_printf(a) { }
162 #endif /* DEBUG */
163
164 static RF_Raid_t **raidPtrs; /* global raid device descriptors */
165
166 RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex)
167
168 static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a
169 * spare table */
170 static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from
171 * installation process */
172
173 /* prototypes */
174 static void KernelWakeupFunc(struct buf * bp);
175 static void InitBP(struct buf * bp, struct vnode *, unsigned rw_flag,
176 dev_t dev, RF_SectorNum_t startSect,
177 RF_SectorCount_t numSect, caddr_t buf,
178 void (*cbFunc) (struct buf *), void *cbArg,
179 int logBytesPerSector, struct proc * b_proc);
180 static void raidinit(RF_Raid_t *);
181
182 void raidattach(int);
183
184 dev_type_open(raidopen);
185 dev_type_close(raidclose);
186 dev_type_read(raidread);
187 dev_type_write(raidwrite);
188 dev_type_ioctl(raidioctl);
189 dev_type_strategy(raidstrategy);
190 dev_type_dump(raiddump);
191 dev_type_size(raidsize);
192
193 const struct bdevsw raid_bdevsw = {
194 raidopen, raidclose, raidstrategy, raidioctl,
195 raiddump, raidsize, D_DISK
196 };
197
198 const struct cdevsw raid_cdevsw = {
199 raidopen, raidclose, raidread, raidwrite, raidioctl,
200 nostop, notty, nopoll, nommap, nokqfilter, D_DISK
201 };
202
203 /*
204 * Pilfered from ccd.c
205 */
206
207 struct raidbuf {
208 struct buf rf_buf; /* new I/O buf. MUST BE FIRST!!! */
209 struct buf *rf_obp; /* ptr. to original I/O buf */
210 RF_DiskQueueData_t *req;/* the request that this was part of.. */
211 };
212
213 /* component buffer pool */
214 struct pool raidframe_cbufpool;
215
216 /* XXX Not sure if the following should be replacing the raidPtrs above,
217 or if it should be used in conjunction with that...
218 */
219
220 struct raid_softc {
221 int sc_flags; /* flags */
222 int sc_cflags; /* configuration flags */
223 size_t sc_size; /* size of the raid device */
224 char sc_xname[20]; /* XXX external name */
225 struct disk sc_dkdev; /* generic disk device info */
226 struct bufq_state buf_queue; /* used for the device queue */
227 };
228 /* sc_flags */
229 #define RAIDF_INITED 0x01 /* unit has been initialized */
230 #define RAIDF_WLABEL 0x02 /* label area is writable */
231 #define RAIDF_LABELLING 0x04 /* unit is currently being labelled */
232 #define RAIDF_WANTED 0x40 /* someone is waiting to obtain a lock */
233 #define RAIDF_LOCKED 0x80 /* unit is locked */
234
235 #define raidunit(x) DISKUNIT(x)
236 int numraid = 0;
237
238 /*
239 * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
240 * Be aware that large numbers can allow the driver to consume a lot of
241 * kernel memory, especially on writes, and in degraded mode reads.
242 *
243 * For example: with a stripe width of 64 blocks (32k) and 5 disks,
244 * a single 64K write will typically require 64K for the old data,
245 * 64K for the old parity, and 64K for the new parity, for a total
246 * of 192K (if the parity buffer is not re-used immediately).
247 * Even it if is used immediately, that's still 128K, which when multiplied
248 * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
249 *
250 * Now in degraded mode, for example, a 64K read on the above setup may
251 * require data reconstruction, which will require *all* of the 4 remaining
252 * disks to participate -- 4 * 32K/disk == 128K again.
253 */
254
255 #ifndef RAIDOUTSTANDING
256 #define RAIDOUTSTANDING 6
257 #endif
258
259 #define RAIDLABELDEV(dev) \
260 (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
261
262 /* declared here, and made public, for the benefit of KVM stuff.. */
263 struct raid_softc *raid_softc;
264
265 static void raidgetdefaultlabel(RF_Raid_t *, struct raid_softc *,
266 struct disklabel *);
267 static void raidgetdisklabel(dev_t);
268 static void raidmakedisklabel(struct raid_softc *);
269
270 static int raidlock(struct raid_softc *);
271 static void raidunlock(struct raid_softc *);
272
273 static void rf_markalldirty(RF_Raid_t *);
274
275 struct device *raidrootdev;
276
277 void rf_ReconThread(struct rf_recon_req *);
278 /* XXX what I want is: */
279 /*void rf_ReconThread(RF_Raid_t *raidPtr); */
280 void rf_RewriteParityThread(RF_Raid_t *raidPtr);
281 void rf_CopybackThread(RF_Raid_t *raidPtr);
282 void rf_ReconstructInPlaceThread(struct rf_recon_req *);
283 int rf_autoconfig(struct device *self);
284 void rf_buildroothack(RF_ConfigSet_t *);
285
286 RF_AutoConfig_t *rf_find_raid_components(void);
287 RF_ConfigSet_t *rf_create_auto_sets(RF_AutoConfig_t *);
288 static int rf_does_it_fit(RF_ConfigSet_t *,RF_AutoConfig_t *);
289 static int rf_reasonable_label(RF_ComponentLabel_t *);
290 void rf_create_configuration(RF_AutoConfig_t *,RF_Config_t *, RF_Raid_t *);
291 int rf_set_autoconfig(RF_Raid_t *, int);
292 int rf_set_rootpartition(RF_Raid_t *, int);
293 void rf_release_all_vps(RF_ConfigSet_t *);
294 void rf_cleanup_config_set(RF_ConfigSet_t *);
295 int rf_have_enough_components(RF_ConfigSet_t *);
296 int rf_auto_config_set(RF_ConfigSet_t *, int *);
297
298 static int raidautoconfig = 0; /* Debugging, mostly. Set to 0 to not
299 allow autoconfig to take place.
300 Note that this is overridden by having
301 RAID_AUTOCONFIG as an option in the
302 kernel config file. */
303
304 void
305 raidattach(num)
306 int num;
307 {
308 int raidID;
309 int i, rc;
310
311 #ifdef DEBUG
312 printf("raidattach: Asked for %d units\n", num);
313 #endif
314
315 if (num <= 0) {
316 #ifdef DIAGNOSTIC
317 panic("raidattach: count <= 0");
318 #endif
319 return;
320 }
321 /* This is where all the initialization stuff gets done. */
322
323 numraid = num;
324
325 /* Make some space for requested number of units... */
326
327 RF_Calloc(raidPtrs, num, sizeof(RF_Raid_t *), (RF_Raid_t **));
328 if (raidPtrs == NULL) {
329 panic("raidPtrs is NULL!!");
330 }
331
332 /* Initialize the component buffer pool. */
333 pool_init(&raidframe_cbufpool, sizeof(struct raidbuf), 0,
334 0, 0, "raidpl", NULL);
335
336 rc = rf_mutex_init(&rf_sparet_wait_mutex);
337 if (rc) {
338 RF_PANIC();
339 }
340
341 rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
342
343 for (i = 0; i < num; i++)
344 raidPtrs[i] = NULL;
345 rc = rf_BootRaidframe();
346 if (rc == 0)
347 printf("Kernelized RAIDframe activated\n");
348 else
349 panic("Serious error booting RAID!!");
350
351 /* put together some datastructures like the CCD device does.. This
352 * lets us lock the device and what-not when it gets opened. */
353
354 raid_softc = (struct raid_softc *)
355 malloc(num * sizeof(struct raid_softc),
356 M_RAIDFRAME, M_NOWAIT);
357 if (raid_softc == NULL) {
358 printf("WARNING: no memory for RAIDframe driver\n");
359 return;
360 }
361
362 memset(raid_softc, 0, num * sizeof(struct raid_softc));
363
364 raidrootdev = (struct device *)malloc(num * sizeof(struct device),
365 M_RAIDFRAME, M_NOWAIT);
366 if (raidrootdev == NULL) {
367 panic("No memory for RAIDframe driver!!?!?!");
368 }
369
370 for (raidID = 0; raidID < num; raidID++) {
371 bufq_alloc(&raid_softc[raidID].buf_queue, BUFQ_FCFS);
372
373 raidrootdev[raidID].dv_class = DV_DISK;
374 raidrootdev[raidID].dv_cfdata = NULL;
375 raidrootdev[raidID].dv_unit = raidID;
376 raidrootdev[raidID].dv_parent = NULL;
377 raidrootdev[raidID].dv_flags = 0;
378 sprintf(raidrootdev[raidID].dv_xname,"raid%d",raidID);
379
380 RF_Calloc(raidPtrs[raidID], 1, sizeof(RF_Raid_t),
381 (RF_Raid_t *));
382 if (raidPtrs[raidID] == NULL) {
383 printf("WARNING: raidPtrs[%d] is NULL\n", raidID);
384 numraid = raidID;
385 return;
386 }
387 }
388
389 #ifdef RAID_AUTOCONFIG
390 raidautoconfig = 1;
391 #endif
392
393 /*
394 * Register a finalizer which will be used to auto-config RAID
395 * sets once all real hardware devices have been found.
396 */
397 if (config_finalize_register(NULL, rf_autoconfig) != 0)
398 printf("WARNING: unable to register RAIDframe finalizer\n");
399 }
400
401 int
402 rf_autoconfig(struct device *self)
403 {
404 RF_AutoConfig_t *ac_list;
405 RF_ConfigSet_t *config_sets;
406
407 if (raidautoconfig == 0)
408 return (0);
409
410 /* XXX This code can only be run once. */
411 raidautoconfig = 0;
412
413 /* 1. locate all RAID components on the system */
414 #ifdef DEBUG
415 printf("Searching for RAID components...\n");
416 #endif
417 ac_list = rf_find_raid_components();
418
419 /* 2. Sort them into their respective sets. */
420 config_sets = rf_create_auto_sets(ac_list);
421
422 /*
423 * 3. Evaluate each set andconfigure the valid ones.
424 * This gets done in rf_buildroothack().
425 */
426 rf_buildroothack(config_sets);
427
428 return (1);
429 }
430
431 void
432 rf_buildroothack(RF_ConfigSet_t *config_sets)
433 {
434 RF_ConfigSet_t *cset;
435 RF_ConfigSet_t *next_cset;
436 int retcode;
437 int raidID;
438 int rootID;
439 int num_root;
440
441 rootID = 0;
442 num_root = 0;
443 cset = config_sets;
444 while(cset != NULL ) {
445 next_cset = cset->next;
446 if (rf_have_enough_components(cset) &&
447 cset->ac->clabel->autoconfigure==1) {
448 retcode = rf_auto_config_set(cset,&raidID);
449 if (!retcode) {
450 if (cset->rootable) {
451 rootID = raidID;
452 num_root++;
453 }
454 } else {
455 /* The autoconfig didn't work :( */
456 #if DEBUG
457 printf("Autoconfig failed with code %d for raid%d\n", retcode, raidID);
458 #endif
459 rf_release_all_vps(cset);
460 }
461 } else {
462 /* we're not autoconfiguring this set...
463 release the associated resources */
464 rf_release_all_vps(cset);
465 }
466 /* cleanup */
467 rf_cleanup_config_set(cset);
468 cset = next_cset;
469 }
470
471 /* we found something bootable... */
472
473 if (num_root == 1) {
474 booted_device = &raidrootdev[rootID];
475 } else if (num_root > 1) {
476 /* we can't guess.. require the user to answer... */
477 boothowto |= RB_ASKNAME;
478 }
479 }
480
481
482 int
483 raidsize(dev)
484 dev_t dev;
485 {
486 struct raid_softc *rs;
487 struct disklabel *lp;
488 int part, unit, omask, size;
489
490 unit = raidunit(dev);
491 if (unit >= numraid)
492 return (-1);
493 rs = &raid_softc[unit];
494
495 if ((rs->sc_flags & RAIDF_INITED) == 0)
496 return (-1);
497
498 part = DISKPART(dev);
499 omask = rs->sc_dkdev.dk_openmask & (1 << part);
500 lp = rs->sc_dkdev.dk_label;
501
502 if (omask == 0 && raidopen(dev, 0, S_IFBLK, curproc))
503 return (-1);
504
505 if (lp->d_partitions[part].p_fstype != FS_SWAP)
506 size = -1;
507 else
508 size = lp->d_partitions[part].p_size *
509 (lp->d_secsize / DEV_BSIZE);
510
511 if (omask == 0 && raidclose(dev, 0, S_IFBLK, curproc))
512 return (-1);
513
514 return (size);
515
516 }
517
518 int
519 raiddump(dev, blkno, va, size)
520 dev_t dev;
521 daddr_t blkno;
522 caddr_t va;
523 size_t size;
524 {
525 /* Not implemented. */
526 return ENXIO;
527 }
528 /* ARGSUSED */
529 int
530 raidopen(dev, flags, fmt, p)
531 dev_t dev;
532 int flags, fmt;
533 struct proc *p;
534 {
535 int unit = raidunit(dev);
536 struct raid_softc *rs;
537 struct disklabel *lp;
538 int part, pmask;
539 int error = 0;
540
541 if (unit >= numraid)
542 return (ENXIO);
543 rs = &raid_softc[unit];
544
545 if ((error = raidlock(rs)) != 0)
546 return (error);
547 lp = rs->sc_dkdev.dk_label;
548
549 part = DISKPART(dev);
550 pmask = (1 << part);
551
552 db1_printf(("Opening raid device number: %d partition: %d\n",
553 unit, part));
554
555
556 if ((rs->sc_flags & RAIDF_INITED) &&
557 (rs->sc_dkdev.dk_openmask == 0))
558 raidgetdisklabel(dev);
559
560 /* make sure that this partition exists */
561
562 if (part != RAW_PART) {
563 db1_printf(("Not a raw partition..\n"));
564 if (((rs->sc_flags & RAIDF_INITED) == 0) ||
565 ((part >= lp->d_npartitions) ||
566 (lp->d_partitions[part].p_fstype == FS_UNUSED))) {
567 error = ENXIO;
568 raidunlock(rs);
569 db1_printf(("Bailing out...\n"));
570 return (error);
571 }
572 }
573 /* Prevent this unit from being unconfigured while open. */
574 switch (fmt) {
575 case S_IFCHR:
576 rs->sc_dkdev.dk_copenmask |= pmask;
577 break;
578
579 case S_IFBLK:
580 rs->sc_dkdev.dk_bopenmask |= pmask;
581 break;
582 }
583
584 if ((rs->sc_dkdev.dk_openmask == 0) &&
585 ((rs->sc_flags & RAIDF_INITED) != 0)) {
586 /* First one... mark things as dirty... Note that we *MUST*
587 have done a configure before this. I DO NOT WANT TO BE
588 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
589 THAT THEY BELONG TOGETHER!!!!! */
590 /* XXX should check to see if we're only open for reading
591 here... If so, we needn't do this, but then need some
592 other way of keeping track of what's happened.. */
593
594 rf_markalldirty( raidPtrs[unit] );
595 }
596
597
598 rs->sc_dkdev.dk_openmask =
599 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
600
601 raidunlock(rs);
602
603 return (error);
604
605
606 }
607 /* ARGSUSED */
608 int
609 raidclose(dev, flags, fmt, p)
610 dev_t dev;
611 int flags, fmt;
612 struct proc *p;
613 {
614 int unit = raidunit(dev);
615 struct raid_softc *rs;
616 int error = 0;
617 int part;
618
619 if (unit >= numraid)
620 return (ENXIO);
621 rs = &raid_softc[unit];
622
623 if ((error = raidlock(rs)) != 0)
624 return (error);
625
626 part = DISKPART(dev);
627
628 /* ...that much closer to allowing unconfiguration... */
629 switch (fmt) {
630 case S_IFCHR:
631 rs->sc_dkdev.dk_copenmask &= ~(1 << part);
632 break;
633
634 case S_IFBLK:
635 rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
636 break;
637 }
638 rs->sc_dkdev.dk_openmask =
639 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
640
641 if ((rs->sc_dkdev.dk_openmask == 0) &&
642 ((rs->sc_flags & RAIDF_INITED) != 0)) {
643 /* Last one... device is not unconfigured yet.
644 Device shutdown has taken care of setting the
645 clean bits if RAIDF_INITED is not set
646 mark things as clean... */
647 #if 0
648 printf("Last one on raid%d. Updating status.\n",unit);
649 #endif
650 rf_update_component_labels(raidPtrs[unit],
651 RF_FINAL_COMPONENT_UPDATE);
652 if (doing_shutdown) {
653 /* last one, and we're going down, so
654 lights out for this RAID set too. */
655 error = rf_Shutdown(raidPtrs[unit]);
656
657 /* It's no longer initialized... */
658 rs->sc_flags &= ~RAIDF_INITED;
659
660 /* Detach the disk. */
661 disk_detach(&rs->sc_dkdev);
662 }
663 }
664
665 raidunlock(rs);
666 return (0);
667
668 }
669
670 void
671 raidstrategy(bp)
672 struct buf *bp;
673 {
674 int s;
675
676 unsigned int raidID = raidunit(bp->b_dev);
677 RF_Raid_t *raidPtr;
678 struct raid_softc *rs = &raid_softc[raidID];
679 struct disklabel *lp;
680 int wlabel;
681
682 if ((rs->sc_flags & RAIDF_INITED) ==0) {
683 bp->b_error = ENXIO;
684 bp->b_flags |= B_ERROR;
685 bp->b_resid = bp->b_bcount;
686 biodone(bp);
687 return;
688 }
689 if (raidID >= numraid || !raidPtrs[raidID]) {
690 bp->b_error = ENODEV;
691 bp->b_flags |= B_ERROR;
692 bp->b_resid = bp->b_bcount;
693 biodone(bp);
694 return;
695 }
696 raidPtr = raidPtrs[raidID];
697 if (!raidPtr->valid) {
698 bp->b_error = ENODEV;
699 bp->b_flags |= B_ERROR;
700 bp->b_resid = bp->b_bcount;
701 biodone(bp);
702 return;
703 }
704 if (bp->b_bcount == 0) {
705 db1_printf(("b_bcount is zero..\n"));
706 biodone(bp);
707 return;
708 }
709 lp = rs->sc_dkdev.dk_label;
710
711 /*
712 * Do bounds checking and adjust transfer. If there's an
713 * error, the bounds check will flag that for us.
714 */
715
716 wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
717 if (DISKPART(bp->b_dev) != RAW_PART)
718 if (bounds_check_with_label(bp, lp, wlabel) <= 0) {
719 db1_printf(("Bounds check failed!!:%d %d\n",
720 (int) bp->b_blkno, (int) wlabel));
721 biodone(bp);
722 return;
723 }
724 s = splbio();
725
726 bp->b_resid = 0;
727
728 /* stuff it onto our queue */
729 BUFQ_PUT(&rs->buf_queue, bp);
730
731 raidstart(raidPtrs[raidID]);
732
733 splx(s);
734 }
735 /* ARGSUSED */
736 int
737 raidread(dev, uio, flags)
738 dev_t dev;
739 struct uio *uio;
740 int flags;
741 {
742 int unit = raidunit(dev);
743 struct raid_softc *rs;
744 int part;
745
746 if (unit >= numraid)
747 return (ENXIO);
748 rs = &raid_softc[unit];
749
750 if ((rs->sc_flags & RAIDF_INITED) == 0)
751 return (ENXIO);
752 part = DISKPART(dev);
753
754 db1_printf(("raidread: unit: %d partition: %d\n", unit, part));
755
756 return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
757
758 }
759 /* ARGSUSED */
760 int
761 raidwrite(dev, uio, flags)
762 dev_t dev;
763 struct uio *uio;
764 int flags;
765 {
766 int unit = raidunit(dev);
767 struct raid_softc *rs;
768
769 if (unit >= numraid)
770 return (ENXIO);
771 rs = &raid_softc[unit];
772
773 if ((rs->sc_flags & RAIDF_INITED) == 0)
774 return (ENXIO);
775 db1_printf(("raidwrite\n"));
776 return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
777
778 }
779
780 int
781 raidioctl(dev, cmd, data, flag, p)
782 dev_t dev;
783 u_long cmd;
784 caddr_t data;
785 int flag;
786 struct proc *p;
787 {
788 int unit = raidunit(dev);
789 int error = 0;
790 int part, pmask;
791 struct raid_softc *rs;
792 RF_Config_t *k_cfg, *u_cfg;
793 RF_Raid_t *raidPtr;
794 RF_RaidDisk_t *diskPtr;
795 RF_AccTotals_t *totals;
796 RF_DeviceConfig_t *d_cfg, **ucfgp;
797 u_char *specific_buf;
798 int retcode = 0;
799 int row;
800 int column;
801 int raidid;
802 struct rf_recon_req *rrcopy, *rr;
803 RF_ComponentLabel_t *clabel;
804 RF_ComponentLabel_t ci_label;
805 RF_ComponentLabel_t **clabel_ptr;
806 RF_SingleComponent_t *sparePtr,*componentPtr;
807 RF_SingleComponent_t hot_spare;
808 RF_SingleComponent_t component;
809 RF_ProgressInfo_t progressInfo, **progressInfoPtr;
810 int i, j, d;
811 #ifdef __HAVE_OLD_DISKLABEL
812 struct disklabel newlabel;
813 #endif
814
815 if (unit >= numraid)
816 return (ENXIO);
817 rs = &raid_softc[unit];
818 raidPtr = raidPtrs[unit];
819
820 db1_printf(("raidioctl: %d %d %d %d\n", (int) dev,
821 (int) DISKPART(dev), (int) unit, (int) cmd));
822
823 /* Must be open for writes for these commands... */
824 switch (cmd) {
825 case DIOCSDINFO:
826 case DIOCWDINFO:
827 #ifdef __HAVE_OLD_DISKLABEL
828 case ODIOCWDINFO:
829 case ODIOCSDINFO:
830 #endif
831 case DIOCWLABEL:
832 if ((flag & FWRITE) == 0)
833 return (EBADF);
834 }
835
836 /* Must be initialized for these... */
837 switch (cmd) {
838 case DIOCGDINFO:
839 case DIOCSDINFO:
840 case DIOCWDINFO:
841 #ifdef __HAVE_OLD_DISKLABEL
842 case ODIOCGDINFO:
843 case ODIOCWDINFO:
844 case ODIOCSDINFO:
845 case ODIOCGDEFLABEL:
846 #endif
847 case DIOCGPART:
848 case DIOCWLABEL:
849 case DIOCGDEFLABEL:
850 case RAIDFRAME_SHUTDOWN:
851 case RAIDFRAME_REWRITEPARITY:
852 case RAIDFRAME_GET_INFO:
853 case RAIDFRAME_RESET_ACCTOTALS:
854 case RAIDFRAME_GET_ACCTOTALS:
855 case RAIDFRAME_KEEP_ACCTOTALS:
856 case RAIDFRAME_GET_SIZE:
857 case RAIDFRAME_FAIL_DISK:
858 case RAIDFRAME_COPYBACK:
859 case RAIDFRAME_CHECK_RECON_STATUS:
860 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
861 case RAIDFRAME_GET_COMPONENT_LABEL:
862 case RAIDFRAME_SET_COMPONENT_LABEL:
863 case RAIDFRAME_ADD_HOT_SPARE:
864 case RAIDFRAME_REMOVE_HOT_SPARE:
865 case RAIDFRAME_INIT_LABELS:
866 case RAIDFRAME_REBUILD_IN_PLACE:
867 case RAIDFRAME_CHECK_PARITY:
868 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
869 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
870 case RAIDFRAME_CHECK_COPYBACK_STATUS:
871 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
872 case RAIDFRAME_SET_AUTOCONFIG:
873 case RAIDFRAME_SET_ROOT:
874 case RAIDFRAME_DELETE_COMPONENT:
875 case RAIDFRAME_INCORPORATE_HOT_SPARE:
876 if ((rs->sc_flags & RAIDF_INITED) == 0)
877 return (ENXIO);
878 }
879
880 switch (cmd) {
881
882 /* configure the system */
883 case RAIDFRAME_CONFIGURE:
884
885 if (raidPtr->valid) {
886 /* There is a valid RAID set running on this unit! */
887 printf("raid%d: Device already configured!\n",unit);
888 return(EINVAL);
889 }
890
891 /* copy-in the configuration information */
892 /* data points to a pointer to the configuration structure */
893
894 u_cfg = *((RF_Config_t **) data);
895 RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
896 if (k_cfg == NULL) {
897 return (ENOMEM);
898 }
899 retcode = copyin((caddr_t) u_cfg, (caddr_t) k_cfg,
900 sizeof(RF_Config_t));
901 if (retcode) {
902 RF_Free(k_cfg, sizeof(RF_Config_t));
903 db1_printf(("rf_ioctl: retcode=%d copyin.1\n",
904 retcode));
905 return (retcode);
906 }
907 /* allocate a buffer for the layout-specific data, and copy it
908 * in */
909 if (k_cfg->layoutSpecificSize) {
910 if (k_cfg->layoutSpecificSize > 10000) {
911 /* sanity check */
912 RF_Free(k_cfg, sizeof(RF_Config_t));
913 return (EINVAL);
914 }
915 RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
916 (u_char *));
917 if (specific_buf == NULL) {
918 RF_Free(k_cfg, sizeof(RF_Config_t));
919 return (ENOMEM);
920 }
921 retcode = copyin(k_cfg->layoutSpecific,
922 (caddr_t) specific_buf,
923 k_cfg->layoutSpecificSize);
924 if (retcode) {
925 RF_Free(k_cfg, sizeof(RF_Config_t));
926 RF_Free(specific_buf,
927 k_cfg->layoutSpecificSize);
928 db1_printf(("rf_ioctl: retcode=%d copyin.2\n",
929 retcode));
930 return (retcode);
931 }
932 } else
933 specific_buf = NULL;
934 k_cfg->layoutSpecific = specific_buf;
935
936 /* should do some kind of sanity check on the configuration.
937 * Store the sum of all the bytes in the last byte? */
938
939 /* configure the system */
940
941 /*
942 * Clear the entire RAID descriptor, just to make sure
943 * there is no stale data left in the case of a
944 * reconfiguration
945 */
946 memset((char *) raidPtr, 0, sizeof(RF_Raid_t));
947 raidPtr->raidid = unit;
948
949 retcode = rf_Configure(raidPtr, k_cfg, NULL);
950
951 if (retcode == 0) {
952
953 /* allow this many simultaneous IO's to
954 this RAID device */
955 raidPtr->openings = RAIDOUTSTANDING;
956
957 raidinit(raidPtr);
958 rf_markalldirty(raidPtr);
959 }
960 /* free the buffers. No return code here. */
961 if (k_cfg->layoutSpecificSize) {
962 RF_Free(specific_buf, k_cfg->layoutSpecificSize);
963 }
964 RF_Free(k_cfg, sizeof(RF_Config_t));
965
966 return (retcode);
967
968 /* shutdown the system */
969 case RAIDFRAME_SHUTDOWN:
970
971 if ((error = raidlock(rs)) != 0)
972 return (error);
973
974 /*
975 * If somebody has a partition mounted, we shouldn't
976 * shutdown.
977 */
978
979 part = DISKPART(dev);
980 pmask = (1 << part);
981 if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
982 ((rs->sc_dkdev.dk_bopenmask & pmask) &&
983 (rs->sc_dkdev.dk_copenmask & pmask))) {
984 raidunlock(rs);
985 return (EBUSY);
986 }
987
988 retcode = rf_Shutdown(raidPtr);
989
990 /* It's no longer initialized... */
991 rs->sc_flags &= ~RAIDF_INITED;
992
993 /* Detach the disk. */
994 disk_detach(&rs->sc_dkdev);
995
996 raidunlock(rs);
997
998 return (retcode);
999 case RAIDFRAME_GET_COMPONENT_LABEL:
1000 clabel_ptr = (RF_ComponentLabel_t **) data;
1001 /* need to read the component label for the disk indicated
1002 by row,column in clabel */
1003
1004 /* For practice, let's get it directly fromdisk, rather
1005 than from the in-core copy */
1006 RF_Malloc( clabel, sizeof( RF_ComponentLabel_t ),
1007 (RF_ComponentLabel_t *));
1008 if (clabel == NULL)
1009 return (ENOMEM);
1010
1011 memset((char *) clabel, 0, sizeof(RF_ComponentLabel_t));
1012
1013 retcode = copyin( *clabel_ptr, clabel,
1014 sizeof(RF_ComponentLabel_t));
1015
1016 if (retcode) {
1017 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
1018 return(retcode);
1019 }
1020
1021 row = clabel->row;
1022 column = clabel->column;
1023
1024 if ((row < 0) || (row >= raidPtr->numRow) ||
1025 (column < 0) || (column >= raidPtr->numCol +
1026 raidPtr->numSpare)) {
1027 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
1028 return(EINVAL);
1029 }
1030
1031 raidread_component_label(raidPtr->Disks[row][column].dev,
1032 raidPtr->raid_cinfo[row][column].ci_vp,
1033 clabel );
1034
1035 retcode = copyout((caddr_t) clabel,
1036 (caddr_t) *clabel_ptr,
1037 sizeof(RF_ComponentLabel_t));
1038 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
1039 return (retcode);
1040
1041 case RAIDFRAME_SET_COMPONENT_LABEL:
1042 clabel = (RF_ComponentLabel_t *) data;
1043
1044 /* XXX check the label for valid stuff... */
1045 /* Note that some things *should not* get modified --
1046 the user should be re-initing the labels instead of
1047 trying to patch things.
1048 */
1049
1050 raidid = raidPtr->raidid;
1051 printf("raid%d: Got component label:\n", raidid);
1052 printf("raid%d: Version: %d\n", raidid, clabel->version);
1053 printf("raid%d: Serial Number: %d\n", raidid, clabel->serial_number);
1054 printf("raid%d: Mod counter: %d\n", raidid, clabel->mod_counter);
1055 printf("raid%d: Row: %d\n", raidid, clabel->row);
1056 printf("raid%d: Column: %d\n", raidid, clabel->column);
1057 printf("raid%d: Num Rows: %d\n", raidid, clabel->num_rows);
1058 printf("raid%d: Num Columns: %d\n", raidid, clabel->num_columns);
1059 printf("raid%d: Clean: %d\n", raidid, clabel->clean);
1060 printf("raid%d: Status: %d\n", raidid, clabel->status);
1061
1062 row = clabel->row;
1063 column = clabel->column;
1064
1065 if ((row < 0) || (row >= raidPtr->numRow) ||
1066 (column < 0) || (column >= raidPtr->numCol)) {
1067 return(EINVAL);
1068 }
1069
1070 /* XXX this isn't allowed to do anything for now :-) */
1071
1072 /* XXX and before it is, we need to fill in the rest
1073 of the fields!?!?!?! */
1074 #if 0
1075 raidwrite_component_label(
1076 raidPtr->Disks[row][column].dev,
1077 raidPtr->raid_cinfo[row][column].ci_vp,
1078 clabel );
1079 #endif
1080 return (0);
1081
1082 case RAIDFRAME_INIT_LABELS:
1083 clabel = (RF_ComponentLabel_t *) data;
1084 /*
1085 we only want the serial number from
1086 the above. We get all the rest of the information
1087 from the config that was used to create this RAID
1088 set.
1089 */
1090
1091 raidPtr->serial_number = clabel->serial_number;
1092
1093 raid_init_component_label(raidPtr, &ci_label);
1094 ci_label.serial_number = clabel->serial_number;
1095
1096 for(row=0;row<raidPtr->numRow;row++) {
1097 ci_label.row = row;
1098 for(column=0;column<raidPtr->numCol;column++) {
1099 diskPtr = &raidPtr->Disks[row][column];
1100 if (!RF_DEAD_DISK(diskPtr->status)) {
1101 ci_label.partitionSize = diskPtr->partitionSize;
1102 ci_label.column = column;
1103 raidwrite_component_label(
1104 raidPtr->Disks[row][column].dev,
1105 raidPtr->raid_cinfo[row][column].ci_vp,
1106 &ci_label );
1107 }
1108 }
1109 }
1110
1111 return (retcode);
1112 case RAIDFRAME_SET_AUTOCONFIG:
1113 d = rf_set_autoconfig(raidPtr, *(int *) data);
1114 printf("raid%d: New autoconfig value is: %d\n",
1115 raidPtr->raidid, d);
1116 *(int *) data = d;
1117 return (retcode);
1118
1119 case RAIDFRAME_SET_ROOT:
1120 d = rf_set_rootpartition(raidPtr, *(int *) data);
1121 printf("raid%d: New rootpartition value is: %d\n",
1122 raidPtr->raidid, d);
1123 *(int *) data = d;
1124 return (retcode);
1125
1126 /* initialize all parity */
1127 case RAIDFRAME_REWRITEPARITY:
1128
1129 if (raidPtr->Layout.map->faultsTolerated == 0) {
1130 /* Parity for RAID 0 is trivially correct */
1131 raidPtr->parity_good = RF_RAID_CLEAN;
1132 return(0);
1133 }
1134
1135 if (raidPtr->parity_rewrite_in_progress == 1) {
1136 /* Re-write is already in progress! */
1137 return(EINVAL);
1138 }
1139
1140 retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
1141 rf_RewriteParityThread,
1142 raidPtr,"raid_parity");
1143 return (retcode);
1144
1145
1146 case RAIDFRAME_ADD_HOT_SPARE:
1147 sparePtr = (RF_SingleComponent_t *) data;
1148 memcpy( &hot_spare, sparePtr, sizeof(RF_SingleComponent_t));
1149 retcode = rf_add_hot_spare(raidPtr, &hot_spare);
1150 return(retcode);
1151
1152 case RAIDFRAME_REMOVE_HOT_SPARE:
1153 return(retcode);
1154
1155 case RAIDFRAME_DELETE_COMPONENT:
1156 componentPtr = (RF_SingleComponent_t *)data;
1157 memcpy( &component, componentPtr,
1158 sizeof(RF_SingleComponent_t));
1159 retcode = rf_delete_component(raidPtr, &component);
1160 return(retcode);
1161
1162 case RAIDFRAME_INCORPORATE_HOT_SPARE:
1163 componentPtr = (RF_SingleComponent_t *)data;
1164 memcpy( &component, componentPtr,
1165 sizeof(RF_SingleComponent_t));
1166 retcode = rf_incorporate_hot_spare(raidPtr, &component);
1167 return(retcode);
1168
1169 case RAIDFRAME_REBUILD_IN_PLACE:
1170
1171 if (raidPtr->Layout.map->faultsTolerated == 0) {
1172 /* Can't do this on a RAID 0!! */
1173 return(EINVAL);
1174 }
1175
1176 if (raidPtr->recon_in_progress == 1) {
1177 /* a reconstruct is already in progress! */
1178 return(EINVAL);
1179 }
1180
1181 componentPtr = (RF_SingleComponent_t *) data;
1182 memcpy( &component, componentPtr,
1183 sizeof(RF_SingleComponent_t));
1184 row = component.row;
1185 column = component.column;
1186 printf("raid%d: Rebuild: %d %d\n", raidPtr->raidid,
1187 row, column);
1188 if ((row < 0) || (row >= raidPtr->numRow) ||
1189 (column < 0) || (column >= raidPtr->numCol)) {
1190 return(EINVAL);
1191 }
1192
1193 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1194 if (rrcopy == NULL)
1195 return(ENOMEM);
1196
1197 rrcopy->raidPtr = (void *) raidPtr;
1198 rrcopy->row = row;
1199 rrcopy->col = column;
1200
1201 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1202 rf_ReconstructInPlaceThread,
1203 rrcopy,"raid_reconip");
1204 return(retcode);
1205
1206 case RAIDFRAME_GET_INFO:
1207 if (!raidPtr->valid)
1208 return (ENODEV);
1209 ucfgp = (RF_DeviceConfig_t **) data;
1210 RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t),
1211 (RF_DeviceConfig_t *));
1212 if (d_cfg == NULL)
1213 return (ENOMEM);
1214 memset((char *) d_cfg, 0, sizeof(RF_DeviceConfig_t));
1215 d_cfg->rows = raidPtr->numRow;
1216 d_cfg->cols = raidPtr->numCol;
1217 d_cfg->ndevs = raidPtr->numRow * raidPtr->numCol;
1218 if (d_cfg->ndevs >= RF_MAX_DISKS) {
1219 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1220 return (ENOMEM);
1221 }
1222 d_cfg->nspares = raidPtr->numSpare;
1223 if (d_cfg->nspares >= RF_MAX_DISKS) {
1224 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1225 return (ENOMEM);
1226 }
1227 d_cfg->maxqdepth = raidPtr->maxQueueDepth;
1228 d = 0;
1229 for (i = 0; i < d_cfg->rows; i++) {
1230 for (j = 0; j < d_cfg->cols; j++) {
1231 d_cfg->devs[d] = raidPtr->Disks[i][j];
1232 d++;
1233 }
1234 }
1235 for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) {
1236 d_cfg->spares[i] = raidPtr->Disks[0][j];
1237 }
1238 retcode = copyout((caddr_t) d_cfg, (caddr_t) * ucfgp,
1239 sizeof(RF_DeviceConfig_t));
1240 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1241
1242 return (retcode);
1243
1244 case RAIDFRAME_CHECK_PARITY:
1245 *(int *) data = raidPtr->parity_good;
1246 return (0);
1247
1248 case RAIDFRAME_RESET_ACCTOTALS:
1249 memset(&raidPtr->acc_totals, 0, sizeof(raidPtr->acc_totals));
1250 return (0);
1251
1252 case RAIDFRAME_GET_ACCTOTALS:
1253 totals = (RF_AccTotals_t *) data;
1254 *totals = raidPtr->acc_totals;
1255 return (0);
1256
1257 case RAIDFRAME_KEEP_ACCTOTALS:
1258 raidPtr->keep_acc_totals = *(int *)data;
1259 return (0);
1260
1261 case RAIDFRAME_GET_SIZE:
1262 *(int *) data = raidPtr->totalSectors;
1263 return (0);
1264
1265 /* fail a disk & optionally start reconstruction */
1266 case RAIDFRAME_FAIL_DISK:
1267
1268 if (raidPtr->Layout.map->faultsTolerated == 0) {
1269 /* Can't do this on a RAID 0!! */
1270 return(EINVAL);
1271 }
1272
1273 rr = (struct rf_recon_req *) data;
1274
1275 if (rr->row < 0 || rr->row >= raidPtr->numRow
1276 || rr->col < 0 || rr->col >= raidPtr->numCol)
1277 return (EINVAL);
1278
1279 printf("raid%d: Failing the disk: row: %d col: %d\n",
1280 unit, rr->row, rr->col);
1281
1282 /* make a copy of the recon request so that we don't rely on
1283 * the user's buffer */
1284 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1285 if (rrcopy == NULL)
1286 return(ENOMEM);
1287 memcpy(rrcopy, rr, sizeof(*rr));
1288 rrcopy->raidPtr = (void *) raidPtr;
1289
1290 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1291 rf_ReconThread,
1292 rrcopy,"raid_recon");
1293 return (0);
1294
1295 /* invoke a copyback operation after recon on whatever disk
1296 * needs it, if any */
1297 case RAIDFRAME_COPYBACK:
1298
1299 if (raidPtr->Layout.map->faultsTolerated == 0) {
1300 /* This makes no sense on a RAID 0!! */
1301 return(EINVAL);
1302 }
1303
1304 if (raidPtr->copyback_in_progress == 1) {
1305 /* Copyback is already in progress! */
1306 return(EINVAL);
1307 }
1308
1309 retcode = RF_CREATE_THREAD(raidPtr->copyback_thread,
1310 rf_CopybackThread,
1311 raidPtr,"raid_copyback");
1312 return (retcode);
1313
1314 /* return the percentage completion of reconstruction */
1315 case RAIDFRAME_CHECK_RECON_STATUS:
1316 if (raidPtr->Layout.map->faultsTolerated == 0) {
1317 /* This makes no sense on a RAID 0, so tell the
1318 user it's done. */
1319 *(int *) data = 100;
1320 return(0);
1321 }
1322 row = 0; /* XXX we only consider a single row... */
1323 if (raidPtr->status[row] != rf_rs_reconstructing)
1324 *(int *) data = 100;
1325 else
1326 *(int *) data = raidPtr->reconControl[row]->percentComplete;
1327 return (0);
1328 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
1329 progressInfoPtr = (RF_ProgressInfo_t **) data;
1330 row = 0; /* XXX we only consider a single row... */
1331 if (raidPtr->status[row] != rf_rs_reconstructing) {
1332 progressInfo.remaining = 0;
1333 progressInfo.completed = 100;
1334 progressInfo.total = 100;
1335 } else {
1336 progressInfo.total =
1337 raidPtr->reconControl[row]->numRUsTotal;
1338 progressInfo.completed =
1339 raidPtr->reconControl[row]->numRUsComplete;
1340 progressInfo.remaining = progressInfo.total -
1341 progressInfo.completed;
1342 }
1343 retcode = copyout((caddr_t) &progressInfo,
1344 (caddr_t) *progressInfoPtr,
1345 sizeof(RF_ProgressInfo_t));
1346 return (retcode);
1347
1348 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1349 if (raidPtr->Layout.map->faultsTolerated == 0) {
1350 /* This makes no sense on a RAID 0, so tell the
1351 user it's done. */
1352 *(int *) data = 100;
1353 return(0);
1354 }
1355 if (raidPtr->parity_rewrite_in_progress == 1) {
1356 *(int *) data = 100 *
1357 raidPtr->parity_rewrite_stripes_done /
1358 raidPtr->Layout.numStripe;
1359 } else {
1360 *(int *) data = 100;
1361 }
1362 return (0);
1363
1364 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
1365 progressInfoPtr = (RF_ProgressInfo_t **) data;
1366 if (raidPtr->parity_rewrite_in_progress == 1) {
1367 progressInfo.total = raidPtr->Layout.numStripe;
1368 progressInfo.completed =
1369 raidPtr->parity_rewrite_stripes_done;
1370 progressInfo.remaining = progressInfo.total -
1371 progressInfo.completed;
1372 } else {
1373 progressInfo.remaining = 0;
1374 progressInfo.completed = 100;
1375 progressInfo.total = 100;
1376 }
1377 retcode = copyout((caddr_t) &progressInfo,
1378 (caddr_t) *progressInfoPtr,
1379 sizeof(RF_ProgressInfo_t));
1380 return (retcode);
1381
1382 case RAIDFRAME_CHECK_COPYBACK_STATUS:
1383 if (raidPtr->Layout.map->faultsTolerated == 0) {
1384 /* This makes no sense on a RAID 0 */
1385 *(int *) data = 100;
1386 return(0);
1387 }
1388 if (raidPtr->copyback_in_progress == 1) {
1389 *(int *) data = 100 * raidPtr->copyback_stripes_done /
1390 raidPtr->Layout.numStripe;
1391 } else {
1392 *(int *) data = 100;
1393 }
1394 return (0);
1395
1396 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
1397 progressInfoPtr = (RF_ProgressInfo_t **) data;
1398 if (raidPtr->copyback_in_progress == 1) {
1399 progressInfo.total = raidPtr->Layout.numStripe;
1400 progressInfo.completed =
1401 raidPtr->copyback_stripes_done;
1402 progressInfo.remaining = progressInfo.total -
1403 progressInfo.completed;
1404 } else {
1405 progressInfo.remaining = 0;
1406 progressInfo.completed = 100;
1407 progressInfo.total = 100;
1408 }
1409 retcode = copyout((caddr_t) &progressInfo,
1410 (caddr_t) *progressInfoPtr,
1411 sizeof(RF_ProgressInfo_t));
1412 return (retcode);
1413
1414 /* the sparetable daemon calls this to wait for the kernel to
1415 * need a spare table. this ioctl does not return until a
1416 * spare table is needed. XXX -- calling mpsleep here in the
1417 * ioctl code is almost certainly wrong and evil. -- XXX XXX
1418 * -- I should either compute the spare table in the kernel,
1419 * or have a different -- XXX XXX -- interface (a different
1420 * character device) for delivering the table -- XXX */
1421 #if 0
1422 case RAIDFRAME_SPARET_WAIT:
1423 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1424 while (!rf_sparet_wait_queue)
1425 mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE);
1426 waitreq = rf_sparet_wait_queue;
1427 rf_sparet_wait_queue = rf_sparet_wait_queue->next;
1428 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1429
1430 /* structure assignment */
1431 *((RF_SparetWait_t *) data) = *waitreq;
1432
1433 RF_Free(waitreq, sizeof(*waitreq));
1434 return (0);
1435
1436 /* wakes up a process waiting on SPARET_WAIT and puts an error
1437 * code in it that will cause the dameon to exit */
1438 case RAIDFRAME_ABORT_SPARET_WAIT:
1439 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1440 waitreq->fcol = -1;
1441 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1442 waitreq->next = rf_sparet_wait_queue;
1443 rf_sparet_wait_queue = waitreq;
1444 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1445 wakeup(&rf_sparet_wait_queue);
1446 return (0);
1447
1448 /* used by the spare table daemon to deliver a spare table
1449 * into the kernel */
1450 case RAIDFRAME_SEND_SPARET:
1451
1452 /* install the spare table */
1453 retcode = rf_SetSpareTable(raidPtr, *(void **) data);
1454
1455 /* respond to the requestor. the return status of the spare
1456 * table installation is passed in the "fcol" field */
1457 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1458 waitreq->fcol = retcode;
1459 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1460 waitreq->next = rf_sparet_resp_queue;
1461 rf_sparet_resp_queue = waitreq;
1462 wakeup(&rf_sparet_resp_queue);
1463 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1464
1465 return (retcode);
1466 #endif
1467
1468 default:
1469 break; /* fall through to the os-specific code below */
1470
1471 }
1472
1473 if (!raidPtr->valid)
1474 return (EINVAL);
1475
1476 /*
1477 * Add support for "regular" device ioctls here.
1478 */
1479
1480 switch (cmd) {
1481 case DIOCGDINFO:
1482 *(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
1483 break;
1484 #ifdef __HAVE_OLD_DISKLABEL
1485 case ODIOCGDINFO:
1486 newlabel = *(rs->sc_dkdev.dk_label);
1487 if (newlabel.d_npartitions > OLDMAXPARTITIONS)
1488 return ENOTTY;
1489 memcpy(data, &newlabel, sizeof (struct olddisklabel));
1490 break;
1491 #endif
1492
1493 case DIOCGPART:
1494 ((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
1495 ((struct partinfo *) data)->part =
1496 &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
1497 break;
1498
1499 case DIOCWDINFO:
1500 case DIOCSDINFO:
1501 #ifdef __HAVE_OLD_DISKLABEL
1502 case ODIOCWDINFO:
1503 case ODIOCSDINFO:
1504 #endif
1505 {
1506 struct disklabel *lp;
1507 #ifdef __HAVE_OLD_DISKLABEL
1508 if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) {
1509 memset(&newlabel, 0, sizeof newlabel);
1510 memcpy(&newlabel, data, sizeof (struct olddisklabel));
1511 lp = &newlabel;
1512 } else
1513 #endif
1514 lp = (struct disklabel *)data;
1515
1516 if ((error = raidlock(rs)) != 0)
1517 return (error);
1518
1519 rs->sc_flags |= RAIDF_LABELLING;
1520
1521 error = setdisklabel(rs->sc_dkdev.dk_label,
1522 lp, 0, rs->sc_dkdev.dk_cpulabel);
1523 if (error == 0) {
1524 if (cmd == DIOCWDINFO
1525 #ifdef __HAVE_OLD_DISKLABEL
1526 || cmd == ODIOCWDINFO
1527 #endif
1528 )
1529 error = writedisklabel(RAIDLABELDEV(dev),
1530 raidstrategy, rs->sc_dkdev.dk_label,
1531 rs->sc_dkdev.dk_cpulabel);
1532 }
1533 rs->sc_flags &= ~RAIDF_LABELLING;
1534
1535 raidunlock(rs);
1536
1537 if (error)
1538 return (error);
1539 break;
1540 }
1541
1542 case DIOCWLABEL:
1543 if (*(int *) data != 0)
1544 rs->sc_flags |= RAIDF_WLABEL;
1545 else
1546 rs->sc_flags &= ~RAIDF_WLABEL;
1547 break;
1548
1549 case DIOCGDEFLABEL:
1550 raidgetdefaultlabel(raidPtr, rs, (struct disklabel *) data);
1551 break;
1552
1553 #ifdef __HAVE_OLD_DISKLABEL
1554 case ODIOCGDEFLABEL:
1555 raidgetdefaultlabel(raidPtr, rs, &newlabel);
1556 if (newlabel.d_npartitions > OLDMAXPARTITIONS)
1557 return ENOTTY;
1558 memcpy(data, &newlabel, sizeof (struct olddisklabel));
1559 break;
1560 #endif
1561
1562 default:
1563 retcode = ENOTTY;
1564 }
1565 return (retcode);
1566
1567 }
1568
1569
1570 /* raidinit -- complete the rest of the initialization for the
1571 RAIDframe device. */
1572
1573
1574 static void
1575 raidinit(raidPtr)
1576 RF_Raid_t *raidPtr;
1577 {
1578 struct raid_softc *rs;
1579 int unit;
1580
1581 unit = raidPtr->raidid;
1582
1583 rs = &raid_softc[unit];
1584
1585 /* XXX should check return code first... */
1586 rs->sc_flags |= RAIDF_INITED;
1587
1588 sprintf(rs->sc_xname, "raid%d", unit); /* XXX doesn't check bounds. */
1589
1590 rs->sc_dkdev.dk_name = rs->sc_xname;
1591
1592 /* disk_attach actually creates space for the CPU disklabel, among
1593 * other things, so it's critical to call this *BEFORE* we try putzing
1594 * with disklabels. */
1595
1596 disk_attach(&rs->sc_dkdev);
1597
1598 /* XXX There may be a weird interaction here between this, and
1599 * protectedSectors, as used in RAIDframe. */
1600
1601 rs->sc_size = raidPtr->totalSectors;
1602
1603 }
1604
1605 /* wake up the daemon & tell it to get us a spare table
1606 * XXX
1607 * the entries in the queues should be tagged with the raidPtr
1608 * so that in the extremely rare case that two recons happen at once,
1609 * we know for which device were requesting a spare table
1610 * XXX
1611 *
1612 * XXX This code is not currently used. GO
1613 */
1614 int
1615 rf_GetSpareTableFromDaemon(req)
1616 RF_SparetWait_t *req;
1617 {
1618 int retcode;
1619
1620 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1621 req->next = rf_sparet_wait_queue;
1622 rf_sparet_wait_queue = req;
1623 wakeup(&rf_sparet_wait_queue);
1624
1625 /* mpsleep unlocks the mutex */
1626 while (!rf_sparet_resp_queue) {
1627 tsleep(&rf_sparet_resp_queue, PRIBIO,
1628 "raidframe getsparetable", 0);
1629 }
1630 req = rf_sparet_resp_queue;
1631 rf_sparet_resp_queue = req->next;
1632 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1633
1634 retcode = req->fcol;
1635 RF_Free(req, sizeof(*req)); /* this is not the same req as we
1636 * alloc'd */
1637 return (retcode);
1638 }
1639
1640 /* a wrapper around rf_DoAccess that extracts appropriate info from the
1641 * bp & passes it down.
1642 * any calls originating in the kernel must use non-blocking I/O
1643 * do some extra sanity checking to return "appropriate" error values for
1644 * certain conditions (to make some standard utilities work)
1645 *
1646 * Formerly known as: rf_DoAccessKernel
1647 */
1648 void
1649 raidstart(raidPtr)
1650 RF_Raid_t *raidPtr;
1651 {
1652 RF_SectorCount_t num_blocks, pb, sum;
1653 RF_RaidAddr_t raid_addr;
1654 int retcode;
1655 struct partition *pp;
1656 daddr_t blocknum;
1657 int unit;
1658 struct raid_softc *rs;
1659 int do_async;
1660 struct buf *bp;
1661
1662 unit = raidPtr->raidid;
1663 rs = &raid_softc[unit];
1664
1665 /* quick check to see if anything has died recently */
1666 RF_LOCK_MUTEX(raidPtr->mutex);
1667 if (raidPtr->numNewFailures > 0) {
1668 rf_update_component_labels(raidPtr,
1669 RF_NORMAL_COMPONENT_UPDATE);
1670 raidPtr->numNewFailures--;
1671 }
1672
1673 /* Check to see if we're at the limit... */
1674 while (raidPtr->openings > 0) {
1675 RF_UNLOCK_MUTEX(raidPtr->mutex);
1676
1677 /* get the next item, if any, from the queue */
1678 if ((bp = BUFQ_GET(&rs->buf_queue)) == NULL) {
1679 /* nothing more to do */
1680 return;
1681 }
1682
1683 /* Ok, for the bp we have here, bp->b_blkno is relative to the
1684 * partition.. Need to make it absolute to the underlying
1685 * device.. */
1686
1687 blocknum = bp->b_blkno;
1688 if (DISKPART(bp->b_dev) != RAW_PART) {
1689 pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
1690 blocknum += pp->p_offset;
1691 }
1692
1693 db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
1694 (int) blocknum));
1695
1696 db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
1697 db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
1698
1699 /* *THIS* is where we adjust what block we're going to...
1700 * but DO NOT TOUCH bp->b_blkno!!! */
1701 raid_addr = blocknum;
1702
1703 num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
1704 pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
1705 sum = raid_addr + num_blocks + pb;
1706 if (1 || rf_debugKernelAccess) {
1707 db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
1708 (int) raid_addr, (int) sum, (int) num_blocks,
1709 (int) pb, (int) bp->b_resid));
1710 }
1711 if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
1712 || (sum < num_blocks) || (sum < pb)) {
1713 bp->b_error = ENOSPC;
1714 bp->b_flags |= B_ERROR;
1715 bp->b_resid = bp->b_bcount;
1716 biodone(bp);
1717 RF_LOCK_MUTEX(raidPtr->mutex);
1718 continue;
1719 }
1720 /*
1721 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
1722 */
1723
1724 if (bp->b_bcount & raidPtr->sectorMask) {
1725 bp->b_error = EINVAL;
1726 bp->b_flags |= B_ERROR;
1727 bp->b_resid = bp->b_bcount;
1728 biodone(bp);
1729 RF_LOCK_MUTEX(raidPtr->mutex);
1730 continue;
1731
1732 }
1733 db1_printf(("Calling DoAccess..\n"));
1734
1735
1736 RF_LOCK_MUTEX(raidPtr->mutex);
1737 raidPtr->openings--;
1738 RF_UNLOCK_MUTEX(raidPtr->mutex);
1739
1740 /*
1741 * Everything is async.
1742 */
1743 do_async = 1;
1744
1745 disk_busy(&rs->sc_dkdev);
1746
1747 /* XXX we're still at splbio() here... do we *really*
1748 need to be? */
1749
1750 /* don't ever condition on bp->b_flags & B_WRITE.
1751 * always condition on B_READ instead */
1752
1753 retcode = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
1754 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
1755 do_async, raid_addr, num_blocks,
1756 bp->b_data, bp, RF_DAG_NONBLOCKING_IO);
1757
1758 RF_LOCK_MUTEX(raidPtr->mutex);
1759 }
1760 RF_UNLOCK_MUTEX(raidPtr->mutex);
1761 }
1762
1763
1764
1765
1766 /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */
1767
1768 int
1769 rf_DispatchKernelIO(queue, req)
1770 RF_DiskQueue_t *queue;
1771 RF_DiskQueueData_t *req;
1772 {
1773 int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
1774 struct buf *bp;
1775 struct raidbuf *raidbp = NULL;
1776
1777 req->queue = queue;
1778
1779 #if DIAGNOSTIC
1780 if (queue->raidPtr->raidid >= numraid) {
1781 printf("Invalid unit number: %d %d\n", queue->raidPtr->raidid,
1782 numraid);
1783 panic("Invalid Unit number in rf_DispatchKernelIO");
1784 }
1785 #endif
1786
1787 bp = req->bp;
1788 #if 1
1789 /* XXX when there is a physical disk failure, someone is passing us a
1790 * buffer that contains old stuff!! Attempt to deal with this problem
1791 * without taking a performance hit... (not sure where the real bug
1792 * is. It's buried in RAIDframe somewhere) :-( GO ) */
1793
1794 if (bp->b_flags & B_ERROR) {
1795 bp->b_flags &= ~B_ERROR;
1796 }
1797 if (bp->b_error != 0) {
1798 bp->b_error = 0;
1799 }
1800 #endif
1801 raidbp = pool_get(&raidframe_cbufpool, PR_NOWAIT);
1802
1803 /*
1804 * context for raidiodone
1805 */
1806 raidbp->rf_obp = bp;
1807 raidbp->req = req;
1808
1809 LIST_INIT(&raidbp->rf_buf.b_dep);
1810
1811 switch (req->type) {
1812 case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */
1813 /* XXX need to do something extra here.. */
1814 /* I'm leaving this in, as I've never actually seen it used,
1815 * and I'd like folks to report it... GO */
1816 printf(("WAKEUP CALLED\n"));
1817 queue->numOutstanding++;
1818
1819 /* XXX need to glue the original buffer into this?? */
1820
1821 KernelWakeupFunc(&raidbp->rf_buf);
1822 break;
1823
1824 case RF_IO_TYPE_READ:
1825 case RF_IO_TYPE_WRITE:
1826
1827 if (req->tracerec) {
1828 RF_ETIMER_START(req->tracerec->timer);
1829 }
1830 InitBP(&raidbp->rf_buf, queue->rf_cinfo->ci_vp,
1831 op | bp->b_flags, queue->rf_cinfo->ci_dev,
1832 req->sectorOffset, req->numSector,
1833 req->buf, KernelWakeupFunc, (void *) req,
1834 queue->raidPtr->logBytesPerSector, req->b_proc);
1835
1836 if (rf_debugKernelAccess) {
1837 db1_printf(("dispatch: bp->b_blkno = %ld\n",
1838 (long) bp->b_blkno));
1839 }
1840 queue->numOutstanding++;
1841 queue->last_deq_sector = req->sectorOffset;
1842 /* acc wouldn't have been let in if there were any pending
1843 * reqs at any other priority */
1844 queue->curPriority = req->priority;
1845
1846 db1_printf(("Going for %c to unit %d row %d col %d\n",
1847 req->type, queue->raidPtr->raidid,
1848 queue->row, queue->col));
1849 db1_printf(("sector %d count %d (%d bytes) %d\n",
1850 (int) req->sectorOffset, (int) req->numSector,
1851 (int) (req->numSector <<
1852 queue->raidPtr->logBytesPerSector),
1853 (int) queue->raidPtr->logBytesPerSector));
1854 if ((raidbp->rf_buf.b_flags & B_READ) == 0) {
1855 raidbp->rf_buf.b_vp->v_numoutput++;
1856 }
1857 VOP_STRATEGY(&raidbp->rf_buf);
1858
1859 break;
1860
1861 default:
1862 panic("bad req->type in rf_DispatchKernelIO");
1863 }
1864 db1_printf(("Exiting from DispatchKernelIO\n"));
1865
1866 return (0);
1867 }
1868 /* this is the callback function associated with a I/O invoked from
1869 kernel code.
1870 */
1871 static void
1872 KernelWakeupFunc(vbp)
1873 struct buf *vbp;
1874 {
1875 RF_DiskQueueData_t *req = NULL;
1876 RF_DiskQueue_t *queue;
1877 struct raidbuf *raidbp = (struct raidbuf *) vbp;
1878 struct buf *bp;
1879 int s;
1880
1881 s = splbio();
1882 db1_printf(("recovering the request queue:\n"));
1883 req = raidbp->req;
1884
1885 bp = raidbp->rf_obp;
1886
1887 queue = (RF_DiskQueue_t *) req->queue;
1888
1889 if (raidbp->rf_buf.b_flags & B_ERROR) {
1890 bp->b_flags |= B_ERROR;
1891 bp->b_error = raidbp->rf_buf.b_error ?
1892 raidbp->rf_buf.b_error : EIO;
1893 }
1894
1895 /* XXX methinks this could be wrong... */
1896 #if 1
1897 bp->b_resid = raidbp->rf_buf.b_resid;
1898 #endif
1899
1900 if (req->tracerec) {
1901 RF_ETIMER_STOP(req->tracerec->timer);
1902 RF_ETIMER_EVAL(req->tracerec->timer);
1903 RF_LOCK_MUTEX(rf_tracing_mutex);
1904 req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1905 req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1906 req->tracerec->num_phys_ios++;
1907 RF_UNLOCK_MUTEX(rf_tracing_mutex);
1908 }
1909 bp->b_bcount = raidbp->rf_buf.b_bcount; /* XXXX ?? */
1910
1911 /* XXX Ok, let's get aggressive... If B_ERROR is set, let's go
1912 * ballistic, and mark the component as hosed... */
1913
1914 if (bp->b_flags & B_ERROR) {
1915 /* Mark the disk as dead */
1916 /* but only mark it once... */
1917 if (queue->raidPtr->Disks[queue->row][queue->col].status ==
1918 rf_ds_optimal) {
1919 printf("raid%d: IO Error. Marking %s as failed.\n",
1920 queue->raidPtr->raidid,
1921 queue->raidPtr->Disks[queue->row][queue->col].devname);
1922 queue->raidPtr->Disks[queue->row][queue->col].status =
1923 rf_ds_failed;
1924 queue->raidPtr->status[queue->row] = rf_rs_degraded;
1925 queue->raidPtr->numFailures++;
1926 queue->raidPtr->numNewFailures++;
1927 } else { /* Disk is already dead... */
1928 /* printf("Disk already marked as dead!\n"); */
1929 }
1930
1931 }
1932
1933 pool_put(&raidframe_cbufpool, raidbp);
1934
1935 /* Fill in the error value */
1936
1937 req->error = (bp->b_flags & B_ERROR) ? bp->b_error : 0;
1938
1939 simple_lock(&queue->raidPtr->iodone_lock);
1940
1941 /* Drop this one on the "finished" queue... */
1942 TAILQ_INSERT_TAIL(&(queue->raidPtr->iodone), req, iodone_entries);
1943
1944 /* Let the raidio thread know there is work to be done. */
1945 wakeup(&(queue->raidPtr->iodone));
1946
1947 simple_unlock(&queue->raidPtr->iodone_lock);
1948
1949 splx(s);
1950 }
1951
1952
1953
1954 /*
1955 * initialize a buf structure for doing an I/O in the kernel.
1956 */
1957 static void
1958 InitBP(bp, b_vp, rw_flag, dev, startSect, numSect, buf, cbFunc, cbArg,
1959 logBytesPerSector, b_proc)
1960 struct buf *bp;
1961 struct vnode *b_vp;
1962 unsigned rw_flag;
1963 dev_t dev;
1964 RF_SectorNum_t startSect;
1965 RF_SectorCount_t numSect;
1966 caddr_t buf;
1967 void (*cbFunc) (struct buf *);
1968 void *cbArg;
1969 int logBytesPerSector;
1970 struct proc *b_proc;
1971 {
1972 /* bp->b_flags = B_PHYS | rw_flag; */
1973 bp->b_flags = B_CALL | rw_flag; /* XXX need B_PHYS here too??? */
1974 bp->b_bcount = numSect << logBytesPerSector;
1975 bp->b_bufsize = bp->b_bcount;
1976 bp->b_error = 0;
1977 bp->b_dev = dev;
1978 bp->b_data = buf;
1979 bp->b_blkno = startSect;
1980 bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */
1981 if (bp->b_bcount == 0) {
1982 panic("bp->b_bcount is zero in InitBP!!");
1983 }
1984 bp->b_proc = b_proc;
1985 bp->b_iodone = cbFunc;
1986 bp->b_vp = b_vp;
1987
1988 }
1989
1990 static void
1991 raidgetdefaultlabel(raidPtr, rs, lp)
1992 RF_Raid_t *raidPtr;
1993 struct raid_softc *rs;
1994 struct disklabel *lp;
1995 {
1996 db1_printf(("Building a default label...\n"));
1997 memset(lp, 0, sizeof(*lp));
1998
1999 /* fabricate a label... */
2000 lp->d_secperunit = raidPtr->totalSectors;
2001 lp->d_secsize = raidPtr->bytesPerSector;
2002 lp->d_nsectors = raidPtr->Layout.dataSectorsPerStripe;
2003 lp->d_ntracks = 4 * raidPtr->numCol;
2004 lp->d_ncylinders = raidPtr->totalSectors /
2005 (lp->d_nsectors * lp->d_ntracks);
2006 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
2007
2008 strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
2009 lp->d_type = DTYPE_RAID;
2010 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
2011 lp->d_rpm = 3600;
2012 lp->d_interleave = 1;
2013 lp->d_flags = 0;
2014
2015 lp->d_partitions[RAW_PART].p_offset = 0;
2016 lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors;
2017 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
2018 lp->d_npartitions = RAW_PART + 1;
2019
2020 lp->d_magic = DISKMAGIC;
2021 lp->d_magic2 = DISKMAGIC;
2022 lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
2023
2024 }
2025 /*
2026 * Read the disklabel from the raid device. If one is not present, fake one
2027 * up.
2028 */
2029 static void
2030 raidgetdisklabel(dev)
2031 dev_t dev;
2032 {
2033 int unit = raidunit(dev);
2034 struct raid_softc *rs = &raid_softc[unit];
2035 char *errstring;
2036 struct disklabel *lp = rs->sc_dkdev.dk_label;
2037 struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel;
2038 RF_Raid_t *raidPtr;
2039
2040 db1_printf(("Getting the disklabel...\n"));
2041
2042 memset(clp, 0, sizeof(*clp));
2043
2044 raidPtr = raidPtrs[unit];
2045
2046 raidgetdefaultlabel(raidPtr, rs, lp);
2047
2048 /*
2049 * Call the generic disklabel extraction routine.
2050 */
2051 errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
2052 rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
2053 if (errstring)
2054 raidmakedisklabel(rs);
2055 else {
2056 int i;
2057 struct partition *pp;
2058
2059 /*
2060 * Sanity check whether the found disklabel is valid.
2061 *
2062 * This is necessary since total size of the raid device
2063 * may vary when an interleave is changed even though exactly
2064 * same componets are used, and old disklabel may used
2065 * if that is found.
2066 */
2067 if (lp->d_secperunit != rs->sc_size)
2068 printf("raid%d: WARNING: %s: "
2069 "total sector size in disklabel (%d) != "
2070 "the size of raid (%ld)\n", unit, rs->sc_xname,
2071 lp->d_secperunit, (long) rs->sc_size);
2072 for (i = 0; i < lp->d_npartitions; i++) {
2073 pp = &lp->d_partitions[i];
2074 if (pp->p_offset + pp->p_size > rs->sc_size)
2075 printf("raid%d: WARNING: %s: end of partition `%c' "
2076 "exceeds the size of raid (%ld)\n",
2077 unit, rs->sc_xname, 'a' + i, (long) rs->sc_size);
2078 }
2079 }
2080
2081 }
2082 /*
2083 * Take care of things one might want to take care of in the event
2084 * that a disklabel isn't present.
2085 */
2086 static void
2087 raidmakedisklabel(rs)
2088 struct raid_softc *rs;
2089 {
2090 struct disklabel *lp = rs->sc_dkdev.dk_label;
2091 db1_printf(("Making a label..\n"));
2092
2093 /*
2094 * For historical reasons, if there's no disklabel present
2095 * the raw partition must be marked FS_BSDFFS.
2096 */
2097
2098 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
2099
2100 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
2101
2102 lp->d_checksum = dkcksum(lp);
2103 }
2104 /*
2105 * Lookup the provided name in the filesystem. If the file exists,
2106 * is a valid block device, and isn't being used by anyone else,
2107 * set *vpp to the file's vnode.
2108 * You'll find the original of this in ccd.c
2109 */
2110 int
2111 raidlookup(path, p, vpp)
2112 char *path;
2113 struct proc *p;
2114 struct vnode **vpp; /* result */
2115 {
2116 struct nameidata nd;
2117 struct vnode *vp;
2118 struct vattr va;
2119 int error;
2120
2121 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, path, p);
2122 if ((error = vn_open(&nd, FREAD | FWRITE, 0)) != 0) {
2123 #if 0
2124 printf("RAIDframe: vn_open returned %d\n", error);
2125 #endif
2126 return (error);
2127 }
2128 vp = nd.ni_vp;
2129 if (vp->v_usecount > 1) {
2130 VOP_UNLOCK(vp, 0);
2131 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2132 return (EBUSY);
2133 }
2134 if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)) != 0) {
2135 VOP_UNLOCK(vp, 0);
2136 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2137 return (error);
2138 }
2139 /* XXX: eventually we should handle VREG, too. */
2140 if (va.va_type != VBLK) {
2141 VOP_UNLOCK(vp, 0);
2142 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2143 return (ENOTBLK);
2144 }
2145 VOP_UNLOCK(vp, 0);
2146 *vpp = vp;
2147 return (0);
2148 }
2149 /*
2150 * Wait interruptibly for an exclusive lock.
2151 *
2152 * XXX
2153 * Several drivers do this; it should be abstracted and made MP-safe.
2154 * (Hmm... where have we seen this warning before :-> GO )
2155 */
2156 static int
2157 raidlock(rs)
2158 struct raid_softc *rs;
2159 {
2160 int error;
2161
2162 while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
2163 rs->sc_flags |= RAIDF_WANTED;
2164 if ((error =
2165 tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
2166 return (error);
2167 }
2168 rs->sc_flags |= RAIDF_LOCKED;
2169 return (0);
2170 }
2171 /*
2172 * Unlock and wake up any waiters.
2173 */
2174 static void
2175 raidunlock(rs)
2176 struct raid_softc *rs;
2177 {
2178
2179 rs->sc_flags &= ~RAIDF_LOCKED;
2180 if ((rs->sc_flags & RAIDF_WANTED) != 0) {
2181 rs->sc_flags &= ~RAIDF_WANTED;
2182 wakeup(rs);
2183 }
2184 }
2185
2186
2187 #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */
2188 #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */
2189
2190 int
2191 raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter)
2192 {
2193 RF_ComponentLabel_t clabel;
2194 raidread_component_label(dev, b_vp, &clabel);
2195 clabel.mod_counter = mod_counter;
2196 clabel.clean = RF_RAID_CLEAN;
2197 raidwrite_component_label(dev, b_vp, &clabel);
2198 return(0);
2199 }
2200
2201
2202 int
2203 raidmarkdirty(dev_t dev, struct vnode *b_vp, int mod_counter)
2204 {
2205 RF_ComponentLabel_t clabel;
2206 raidread_component_label(dev, b_vp, &clabel);
2207 clabel.mod_counter = mod_counter;
2208 clabel.clean = RF_RAID_DIRTY;
2209 raidwrite_component_label(dev, b_vp, &clabel);
2210 return(0);
2211 }
2212
2213 /* ARGSUSED */
2214 int
2215 raidread_component_label(dev, b_vp, clabel)
2216 dev_t dev;
2217 struct vnode *b_vp;
2218 RF_ComponentLabel_t *clabel;
2219 {
2220 struct buf *bp;
2221 const struct bdevsw *bdev;
2222 int error;
2223
2224 /* XXX should probably ensure that we don't try to do this if
2225 someone has changed rf_protected_sectors. */
2226
2227 if (b_vp == NULL) {
2228 /* For whatever reason, this component is not valid.
2229 Don't try to read a component label from it. */
2230 return(EINVAL);
2231 }
2232
2233 /* get a block of the appropriate size... */
2234 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2235 bp->b_dev = dev;
2236
2237 /* get our ducks in a row for the read */
2238 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2239 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2240 bp->b_flags |= B_READ;
2241 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2242
2243 bdev = bdevsw_lookup(bp->b_dev);
2244 if (bdev == NULL)
2245 return (ENXIO);
2246 (*bdev->d_strategy)(bp);
2247
2248 error = biowait(bp);
2249
2250 if (!error) {
2251 memcpy(clabel, bp->b_data,
2252 sizeof(RF_ComponentLabel_t));
2253 #if 0
2254 rf_print_component_label( clabel );
2255 #endif
2256 } else {
2257 #if 0
2258 printf("Failed to read RAID component label!\n");
2259 #endif
2260 }
2261
2262 brelse(bp);
2263 return(error);
2264 }
2265 /* ARGSUSED */
2266 int
2267 raidwrite_component_label(dev, b_vp, clabel)
2268 dev_t dev;
2269 struct vnode *b_vp;
2270 RF_ComponentLabel_t *clabel;
2271 {
2272 struct buf *bp;
2273 const struct bdevsw *bdev;
2274 int error;
2275
2276 /* get a block of the appropriate size... */
2277 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2278 bp->b_dev = dev;
2279
2280 /* get our ducks in a row for the write */
2281 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2282 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2283 bp->b_flags |= B_WRITE;
2284 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2285
2286 memset(bp->b_data, 0, RF_COMPONENT_INFO_SIZE );
2287
2288 memcpy(bp->b_data, clabel, sizeof(RF_ComponentLabel_t));
2289
2290 bdev = bdevsw_lookup(bp->b_dev);
2291 if (bdev == NULL)
2292 return (ENXIO);
2293 (*bdev->d_strategy)(bp);
2294 error = biowait(bp);
2295 brelse(bp);
2296 if (error) {
2297 #if 1
2298 printf("Failed to write RAID component info!\n");
2299 #endif
2300 }
2301
2302 return(error);
2303 }
2304
2305 void
2306 rf_markalldirty(raidPtr)
2307 RF_Raid_t *raidPtr;
2308 {
2309 RF_ComponentLabel_t clabel;
2310 int r,c;
2311
2312 raidPtr->mod_counter++;
2313 for (r = 0; r < raidPtr->numRow; r++) {
2314 for (c = 0; c < raidPtr->numCol; c++) {
2315 /* we don't want to touch (at all) a disk that has
2316 failed */
2317 if (!RF_DEAD_DISK(raidPtr->Disks[r][c].status)) {
2318 raidread_component_label(
2319 raidPtr->Disks[r][c].dev,
2320 raidPtr->raid_cinfo[r][c].ci_vp,
2321 &clabel);
2322 if (clabel.status == rf_ds_spared) {
2323 /* XXX do something special...
2324 but whatever you do, don't
2325 try to access it!! */
2326 } else {
2327 #if 0
2328 clabel.status =
2329 raidPtr->Disks[r][c].status;
2330 raidwrite_component_label(
2331 raidPtr->Disks[r][c].dev,
2332 raidPtr->raid_cinfo[r][c].ci_vp,
2333 &clabel);
2334 #endif
2335 raidmarkdirty(
2336 raidPtr->Disks[r][c].dev,
2337 raidPtr->raid_cinfo[r][c].ci_vp,
2338 raidPtr->mod_counter);
2339 }
2340 }
2341 }
2342 }
2343 /* printf("Component labels marked dirty.\n"); */
2344 #if 0
2345 for( c = 0; c < raidPtr->numSpare ; c++) {
2346 sparecol = raidPtr->numCol + c;
2347 if (raidPtr->Disks[r][sparecol].status == rf_ds_used_spare) {
2348 /*
2349
2350 XXX this is where we get fancy and map this spare
2351 into it's correct spot in the array.
2352
2353 */
2354 /*
2355
2356 we claim this disk is "optimal" if it's
2357 rf_ds_used_spare, as that means it should be
2358 directly substitutable for the disk it replaced.
2359 We note that too...
2360
2361 */
2362
2363 for(i=0;i<raidPtr->numRow;i++) {
2364 for(j=0;j<raidPtr->numCol;j++) {
2365 if ((raidPtr->Disks[i][j].spareRow ==
2366 r) &&
2367 (raidPtr->Disks[i][j].spareCol ==
2368 sparecol)) {
2369 srow = r;
2370 scol = sparecol;
2371 break;
2372 }
2373 }
2374 }
2375
2376 raidread_component_label(
2377 raidPtr->Disks[r][sparecol].dev,
2378 raidPtr->raid_cinfo[r][sparecol].ci_vp,
2379 &clabel);
2380 /* make sure status is noted */
2381 clabel.version = RF_COMPONENT_LABEL_VERSION;
2382 clabel.mod_counter = raidPtr->mod_counter;
2383 clabel.serial_number = raidPtr->serial_number;
2384 clabel.row = srow;
2385 clabel.column = scol;
2386 clabel.num_rows = raidPtr->numRow;
2387 clabel.num_columns = raidPtr->numCol;
2388 clabel.clean = RF_RAID_DIRTY; /* changed in a bit*/
2389 clabel.status = rf_ds_optimal;
2390 raidwrite_component_label(
2391 raidPtr->Disks[r][sparecol].dev,
2392 raidPtr->raid_cinfo[r][sparecol].ci_vp,
2393 &clabel);
2394 raidmarkclean( raidPtr->Disks[r][sparecol].dev,
2395 raidPtr->raid_cinfo[r][sparecol].ci_vp);
2396 }
2397 }
2398
2399 #endif
2400 }
2401
2402
2403 void
2404 rf_update_component_labels(raidPtr, final)
2405 RF_Raid_t *raidPtr;
2406 int final;
2407 {
2408 RF_ComponentLabel_t clabel;
2409 int sparecol;
2410 int r,c;
2411 int i,j;
2412 int srow, scol;
2413
2414 srow = -1;
2415 scol = -1;
2416
2417 /* XXX should do extra checks to make sure things really are clean,
2418 rather than blindly setting the clean bit... */
2419
2420 raidPtr->mod_counter++;
2421
2422 for (r = 0; r < raidPtr->numRow; r++) {
2423 for (c = 0; c < raidPtr->numCol; c++) {
2424 if (raidPtr->Disks[r][c].status == rf_ds_optimal) {
2425 raidread_component_label(
2426 raidPtr->Disks[r][c].dev,
2427 raidPtr->raid_cinfo[r][c].ci_vp,
2428 &clabel);
2429 /* make sure status is noted */
2430 clabel.status = rf_ds_optimal;
2431 /* bump the counter */
2432 clabel.mod_counter = raidPtr->mod_counter;
2433
2434 raidwrite_component_label(
2435 raidPtr->Disks[r][c].dev,
2436 raidPtr->raid_cinfo[r][c].ci_vp,
2437 &clabel);
2438 if (final == RF_FINAL_COMPONENT_UPDATE) {
2439 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2440 raidmarkclean(
2441 raidPtr->Disks[r][c].dev,
2442 raidPtr->raid_cinfo[r][c].ci_vp,
2443 raidPtr->mod_counter);
2444 }
2445 }
2446 }
2447 /* else we don't touch it.. */
2448 }
2449 }
2450
2451 for( c = 0; c < raidPtr->numSpare ; c++) {
2452 sparecol = raidPtr->numCol + c;
2453 /* Need to ensure that the reconstruct actually completed! */
2454 if (raidPtr->Disks[0][sparecol].status == rf_ds_used_spare) {
2455 /*
2456
2457 we claim this disk is "optimal" if it's
2458 rf_ds_used_spare, as that means it should be
2459 directly substitutable for the disk it replaced.
2460 We note that too...
2461
2462 */
2463
2464 for(i=0;i<raidPtr->numRow;i++) {
2465 for(j=0;j<raidPtr->numCol;j++) {
2466 if ((raidPtr->Disks[i][j].spareRow ==
2467 0) &&
2468 (raidPtr->Disks[i][j].spareCol ==
2469 sparecol)) {
2470 srow = i;
2471 scol = j;
2472 break;
2473 }
2474 }
2475 }
2476
2477 /* XXX shouldn't *really* need this... */
2478 raidread_component_label(
2479 raidPtr->Disks[0][sparecol].dev,
2480 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2481 &clabel);
2482 /* make sure status is noted */
2483
2484 raid_init_component_label(raidPtr, &clabel);
2485
2486 clabel.mod_counter = raidPtr->mod_counter;
2487 clabel.row = srow;
2488 clabel.column = scol;
2489 clabel.status = rf_ds_optimal;
2490
2491 raidwrite_component_label(
2492 raidPtr->Disks[0][sparecol].dev,
2493 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2494 &clabel);
2495 if (final == RF_FINAL_COMPONENT_UPDATE) {
2496 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2497 raidmarkclean( raidPtr->Disks[0][sparecol].dev,
2498 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2499 raidPtr->mod_counter);
2500 }
2501 }
2502 }
2503 }
2504 /* printf("Component labels updated\n"); */
2505 }
2506
2507 void
2508 rf_close_component(raidPtr, vp, auto_configured)
2509 RF_Raid_t *raidPtr;
2510 struct vnode *vp;
2511 int auto_configured;
2512 {
2513 struct proc *p;
2514
2515 p = raidPtr->engine_thread;
2516
2517 if (vp != NULL) {
2518 if (auto_configured == 1) {
2519 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2520 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
2521 vput(vp);
2522
2523 } else {
2524 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2525 }
2526 } else {
2527 #if 0
2528 printf("vnode was NULL\n");
2529 #endif
2530 }
2531 }
2532
2533
2534 void
2535 rf_UnconfigureVnodes(raidPtr)
2536 RF_Raid_t *raidPtr;
2537 {
2538 int r,c;
2539 struct vnode *vp;
2540 int acd;
2541
2542
2543 /* We take this opportunity to close the vnodes like we should.. */
2544
2545 for (r = 0; r < raidPtr->numRow; r++) {
2546 for (c = 0; c < raidPtr->numCol; c++) {
2547 #if 0
2548 printf("raid%d: Closing vnode for row: %d col: %d\n",
2549 raidPtr->raidid, r, c);
2550 #endif
2551 vp = raidPtr->raid_cinfo[r][c].ci_vp;
2552 acd = raidPtr->Disks[r][c].auto_configured;
2553 rf_close_component(raidPtr, vp, acd);
2554 raidPtr->raid_cinfo[r][c].ci_vp = NULL;
2555 raidPtr->Disks[r][c].auto_configured = 0;
2556 }
2557 }
2558 for (r = 0; r < raidPtr->numSpare; r++) {
2559 #if 0
2560 printf("raid%d: Closing vnode for spare: %d\n",
2561 raidPtr->raidid, r);
2562 #endif
2563 vp = raidPtr->raid_cinfo[0][raidPtr->numCol + r].ci_vp;
2564 acd = raidPtr->Disks[0][raidPtr->numCol + r].auto_configured;
2565 rf_close_component(raidPtr, vp, acd);
2566 raidPtr->raid_cinfo[0][raidPtr->numCol + r].ci_vp = NULL;
2567 raidPtr->Disks[0][raidPtr->numCol + r].auto_configured = 0;
2568 }
2569 }
2570
2571
2572 void
2573 rf_ReconThread(req)
2574 struct rf_recon_req *req;
2575 {
2576 int s;
2577 RF_Raid_t *raidPtr;
2578
2579 s = splbio();
2580 raidPtr = (RF_Raid_t *) req->raidPtr;
2581 raidPtr->recon_in_progress = 1;
2582
2583 rf_FailDisk((RF_Raid_t *) req->raidPtr, req->row, req->col,
2584 ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
2585
2586 /* XXX get rid of this! we don't need it at all.. */
2587 RF_Free(req, sizeof(*req));
2588
2589 raidPtr->recon_in_progress = 0;
2590 splx(s);
2591
2592 /* That's all... */
2593 kthread_exit(0); /* does not return */
2594 }
2595
2596 void
2597 rf_RewriteParityThread(raidPtr)
2598 RF_Raid_t *raidPtr;
2599 {
2600 int retcode;
2601 int s;
2602
2603 raidPtr->parity_rewrite_in_progress = 1;
2604 s = splbio();
2605 retcode = rf_RewriteParity(raidPtr);
2606 splx(s);
2607 if (retcode) {
2608 printf("raid%d: Error re-writing parity!\n",raidPtr->raidid);
2609 } else {
2610 /* set the clean bit! If we shutdown correctly,
2611 the clean bit on each component label will get
2612 set */
2613 raidPtr->parity_good = RF_RAID_CLEAN;
2614 }
2615 raidPtr->parity_rewrite_in_progress = 0;
2616
2617 /* Anyone waiting for us to stop? If so, inform them... */
2618 if (raidPtr->waitShutdown) {
2619 wakeup(&raidPtr->parity_rewrite_in_progress);
2620 }
2621
2622 /* That's all... */
2623 kthread_exit(0); /* does not return */
2624 }
2625
2626
2627 void
2628 rf_CopybackThread(raidPtr)
2629 RF_Raid_t *raidPtr;
2630 {
2631 int s;
2632
2633 raidPtr->copyback_in_progress = 1;
2634 s = splbio();
2635 rf_CopybackReconstructedData(raidPtr);
2636 splx(s);
2637 raidPtr->copyback_in_progress = 0;
2638
2639 /* That's all... */
2640 kthread_exit(0); /* does not return */
2641 }
2642
2643
2644 void
2645 rf_ReconstructInPlaceThread(req)
2646 struct rf_recon_req *req;
2647 {
2648 int retcode;
2649 int s;
2650 RF_Raid_t *raidPtr;
2651
2652 s = splbio();
2653 raidPtr = req->raidPtr;
2654 raidPtr->recon_in_progress = 1;
2655 retcode = rf_ReconstructInPlace(raidPtr, req->row, req->col);
2656 RF_Free(req, sizeof(*req));
2657 raidPtr->recon_in_progress = 0;
2658 splx(s);
2659
2660 /* That's all... */
2661 kthread_exit(0); /* does not return */
2662 }
2663
2664 RF_AutoConfig_t *
2665 rf_find_raid_components()
2666 {
2667 struct vnode *vp;
2668 struct disklabel label;
2669 struct device *dv;
2670 dev_t dev;
2671 int bmajor;
2672 int error;
2673 int i;
2674 int good_one;
2675 RF_ComponentLabel_t *clabel;
2676 RF_AutoConfig_t *ac_list;
2677 RF_AutoConfig_t *ac;
2678
2679
2680 /* initialize the AutoConfig list */
2681 ac_list = NULL;
2682
2683 /* we begin by trolling through *all* the devices on the system */
2684
2685 for (dv = alldevs.tqh_first; dv != NULL;
2686 dv = dv->dv_list.tqe_next) {
2687
2688 /* we are only interested in disks... */
2689 if (dv->dv_class != DV_DISK)
2690 continue;
2691
2692 /* we don't care about floppies... */
2693 if (!strcmp(dv->dv_cfdata->cf_name,"fd")) {
2694 continue;
2695 }
2696
2697 /* we don't care about CD's... */
2698 if (!strcmp(dv->dv_cfdata->cf_name,"cd")) {
2699 continue;
2700 }
2701
2702 /* hdfd is the Atari/Hades floppy driver */
2703 if (!strcmp(dv->dv_cfdata->cf_name,"hdfd")) {
2704 continue;
2705 }
2706 /* fdisa is the Atari/Milan floppy driver */
2707 if (!strcmp(dv->dv_cfdata->cf_name,"fdisa")) {
2708 continue;
2709 }
2710
2711 /* need to find the device_name_to_block_device_major stuff */
2712 bmajor = devsw_name2blk(dv->dv_xname, NULL, 0);
2713
2714 /* get a vnode for the raw partition of this disk */
2715
2716 dev = MAKEDISKDEV(bmajor, dv->dv_unit, RAW_PART);
2717 if (bdevvp(dev, &vp))
2718 panic("RAID can't alloc vnode");
2719
2720 error = VOP_OPEN(vp, FREAD, NOCRED, 0);
2721
2722 if (error) {
2723 /* "Who cares." Continue looking
2724 for something that exists*/
2725 vput(vp);
2726 continue;
2727 }
2728
2729 /* Ok, the disk exists. Go get the disklabel. */
2730 error = VOP_IOCTL(vp, DIOCGDINFO, (caddr_t)&label,
2731 FREAD, NOCRED, 0);
2732 if (error) {
2733 /*
2734 * XXX can't happen - open() would
2735 * have errored out (or faked up one)
2736 */
2737 printf("can't get label for dev %s%c (%d)!?!?\n",
2738 dv->dv_xname, 'a' + RAW_PART, error);
2739 }
2740
2741 /* don't need this any more. We'll allocate it again
2742 a little later if we really do... */
2743 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2744 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
2745 vput(vp);
2746
2747 for (i=0; i < label.d_npartitions; i++) {
2748 /* We only support partitions marked as RAID */
2749 if (label.d_partitions[i].p_fstype != FS_RAID)
2750 continue;
2751
2752 dev = MAKEDISKDEV(bmajor, dv->dv_unit, i);
2753 if (bdevvp(dev, &vp))
2754 panic("RAID can't alloc vnode");
2755
2756 error = VOP_OPEN(vp, FREAD, NOCRED, 0);
2757 if (error) {
2758 /* Whatever... */
2759 vput(vp);
2760 continue;
2761 }
2762
2763 good_one = 0;
2764
2765 clabel = (RF_ComponentLabel_t *)
2766 malloc(sizeof(RF_ComponentLabel_t),
2767 M_RAIDFRAME, M_NOWAIT);
2768 if (clabel == NULL) {
2769 /* XXX CLEANUP HERE */
2770 printf("RAID auto config: out of memory!\n");
2771 return(NULL); /* XXX probably should panic? */
2772 }
2773
2774 if (!raidread_component_label(dev, vp, clabel)) {
2775 /* Got the label. Does it look reasonable? */
2776 if (rf_reasonable_label(clabel) &&
2777 (clabel->partitionSize <=
2778 label.d_partitions[i].p_size)) {
2779 #if DEBUG
2780 printf("Component on: %s%c: %d\n",
2781 dv->dv_xname, 'a'+i,
2782 label.d_partitions[i].p_size);
2783 rf_print_component_label(clabel);
2784 #endif
2785 /* if it's reasonable, add it,
2786 else ignore it. */
2787 ac = (RF_AutoConfig_t *)
2788 malloc(sizeof(RF_AutoConfig_t),
2789 M_RAIDFRAME,
2790 M_NOWAIT);
2791 if (ac == NULL) {
2792 /* XXX should panic?? */
2793 return(NULL);
2794 }
2795
2796 sprintf(ac->devname, "%s%c",
2797 dv->dv_xname, 'a'+i);
2798 ac->dev = dev;
2799 ac->vp = vp;
2800 ac->clabel = clabel;
2801 ac->next = ac_list;
2802 ac_list = ac;
2803 good_one = 1;
2804 }
2805 }
2806 if (!good_one) {
2807 /* cleanup */
2808 free(clabel, M_RAIDFRAME);
2809 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2810 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
2811 vput(vp);
2812 }
2813 }
2814 }
2815 return(ac_list);
2816 }
2817
2818 static int
2819 rf_reasonable_label(clabel)
2820 RF_ComponentLabel_t *clabel;
2821 {
2822
2823 if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) ||
2824 (clabel->version==RF_COMPONENT_LABEL_VERSION)) &&
2825 ((clabel->clean == RF_RAID_CLEAN) ||
2826 (clabel->clean == RF_RAID_DIRTY)) &&
2827 clabel->row >=0 &&
2828 clabel->column >= 0 &&
2829 clabel->num_rows > 0 &&
2830 clabel->num_columns > 0 &&
2831 clabel->row < clabel->num_rows &&
2832 clabel->column < clabel->num_columns &&
2833 clabel->blockSize > 0 &&
2834 clabel->numBlocks > 0) {
2835 /* label looks reasonable enough... */
2836 return(1);
2837 }
2838 return(0);
2839 }
2840
2841
2842 #if DEBUG
2843 void
2844 rf_print_component_label(clabel)
2845 RF_ComponentLabel_t *clabel;
2846 {
2847 printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
2848 clabel->row, clabel->column,
2849 clabel->num_rows, clabel->num_columns);
2850 printf(" Version: %d Serial Number: %d Mod Counter: %d\n",
2851 clabel->version, clabel->serial_number,
2852 clabel->mod_counter);
2853 printf(" Clean: %s Status: %d\n",
2854 clabel->clean ? "Yes" : "No", clabel->status );
2855 printf(" sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
2856 clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
2857 printf(" RAID Level: %c blocksize: %d numBlocks: %d\n",
2858 (char) clabel->parityConfig, clabel->blockSize,
2859 clabel->numBlocks);
2860 printf(" Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No" );
2861 printf(" Contains root partition: %s\n",
2862 clabel->root_partition ? "Yes" : "No" );
2863 printf(" Last configured as: raid%d\n", clabel->last_unit );
2864 #if 0
2865 printf(" Config order: %d\n", clabel->config_order);
2866 #endif
2867
2868 }
2869 #endif
2870
2871 RF_ConfigSet_t *
2872 rf_create_auto_sets(ac_list)
2873 RF_AutoConfig_t *ac_list;
2874 {
2875 RF_AutoConfig_t *ac;
2876 RF_ConfigSet_t *config_sets;
2877 RF_ConfigSet_t *cset;
2878 RF_AutoConfig_t *ac_next;
2879
2880
2881 config_sets = NULL;
2882
2883 /* Go through the AutoConfig list, and figure out which components
2884 belong to what sets. */
2885 ac = ac_list;
2886 while(ac!=NULL) {
2887 /* we're going to putz with ac->next, so save it here
2888 for use at the end of the loop */
2889 ac_next = ac->next;
2890
2891 if (config_sets == NULL) {
2892 /* will need at least this one... */
2893 config_sets = (RF_ConfigSet_t *)
2894 malloc(sizeof(RF_ConfigSet_t),
2895 M_RAIDFRAME, M_NOWAIT);
2896 if (config_sets == NULL) {
2897 panic("rf_create_auto_sets: No memory!");
2898 }
2899 /* this one is easy :) */
2900 config_sets->ac = ac;
2901 config_sets->next = NULL;
2902 config_sets->rootable = 0;
2903 ac->next = NULL;
2904 } else {
2905 /* which set does this component fit into? */
2906 cset = config_sets;
2907 while(cset!=NULL) {
2908 if (rf_does_it_fit(cset, ac)) {
2909 /* looks like it matches... */
2910 ac->next = cset->ac;
2911 cset->ac = ac;
2912 break;
2913 }
2914 cset = cset->next;
2915 }
2916 if (cset==NULL) {
2917 /* didn't find a match above... new set..*/
2918 cset = (RF_ConfigSet_t *)
2919 malloc(sizeof(RF_ConfigSet_t),
2920 M_RAIDFRAME, M_NOWAIT);
2921 if (cset == NULL) {
2922 panic("rf_create_auto_sets: No memory!");
2923 }
2924 cset->ac = ac;
2925 ac->next = NULL;
2926 cset->next = config_sets;
2927 cset->rootable = 0;
2928 config_sets = cset;
2929 }
2930 }
2931 ac = ac_next;
2932 }
2933
2934
2935 return(config_sets);
2936 }
2937
2938 static int
2939 rf_does_it_fit(cset, ac)
2940 RF_ConfigSet_t *cset;
2941 RF_AutoConfig_t *ac;
2942 {
2943 RF_ComponentLabel_t *clabel1, *clabel2;
2944
2945 /* If this one matches the *first* one in the set, that's good
2946 enough, since the other members of the set would have been
2947 through here too... */
2948 /* note that we are not checking partitionSize here..
2949
2950 Note that we are also not checking the mod_counters here.
2951 If everything else matches execpt the mod_counter, that's
2952 good enough for this test. We will deal with the mod_counters
2953 a little later in the autoconfiguration process.
2954
2955 (clabel1->mod_counter == clabel2->mod_counter) &&
2956
2957 The reason we don't check for this is that failed disks
2958 will have lower modification counts. If those disks are
2959 not added to the set they used to belong to, then they will
2960 form their own set, which may result in 2 different sets,
2961 for example, competing to be configured at raid0, and
2962 perhaps competing to be the root filesystem set. If the
2963 wrong ones get configured, or both attempt to become /,
2964 weird behaviour and or serious lossage will occur. Thus we
2965 need to bring them into the fold here, and kick them out at
2966 a later point.
2967
2968 */
2969
2970 clabel1 = cset->ac->clabel;
2971 clabel2 = ac->clabel;
2972 if ((clabel1->version == clabel2->version) &&
2973 (clabel1->serial_number == clabel2->serial_number) &&
2974 (clabel1->num_rows == clabel2->num_rows) &&
2975 (clabel1->num_columns == clabel2->num_columns) &&
2976 (clabel1->sectPerSU == clabel2->sectPerSU) &&
2977 (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
2978 (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
2979 (clabel1->parityConfig == clabel2->parityConfig) &&
2980 (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
2981 (clabel1->blockSize == clabel2->blockSize) &&
2982 (clabel1->numBlocks == clabel2->numBlocks) &&
2983 (clabel1->autoconfigure == clabel2->autoconfigure) &&
2984 (clabel1->root_partition == clabel2->root_partition) &&
2985 (clabel1->last_unit == clabel2->last_unit) &&
2986 (clabel1->config_order == clabel2->config_order)) {
2987 /* if it get's here, it almost *has* to be a match */
2988 } else {
2989 /* it's not consistent with somebody in the set..
2990 punt */
2991 return(0);
2992 }
2993 /* all was fine.. it must fit... */
2994 return(1);
2995 }
2996
2997 int
2998 rf_have_enough_components(cset)
2999 RF_ConfigSet_t *cset;
3000 {
3001 RF_AutoConfig_t *ac;
3002 RF_AutoConfig_t *auto_config;
3003 RF_ComponentLabel_t *clabel;
3004 int r,c;
3005 int num_rows;
3006 int num_cols;
3007 int num_missing;
3008 int mod_counter;
3009 int mod_counter_found;
3010 int even_pair_failed;
3011 char parity_type;
3012
3013
3014 /* check to see that we have enough 'live' components
3015 of this set. If so, we can configure it if necessary */
3016
3017 num_rows = cset->ac->clabel->num_rows;
3018 num_cols = cset->ac->clabel->num_columns;
3019 parity_type = cset->ac->clabel->parityConfig;
3020
3021 /* XXX Check for duplicate components!?!?!? */
3022
3023 /* Determine what the mod_counter is supposed to be for this set. */
3024
3025 mod_counter_found = 0;
3026 mod_counter = 0;
3027 ac = cset->ac;
3028 while(ac!=NULL) {
3029 if (mod_counter_found==0) {
3030 mod_counter = ac->clabel->mod_counter;
3031 mod_counter_found = 1;
3032 } else {
3033 if (ac->clabel->mod_counter > mod_counter) {
3034 mod_counter = ac->clabel->mod_counter;
3035 }
3036 }
3037 ac = ac->next;
3038 }
3039
3040 num_missing = 0;
3041 auto_config = cset->ac;
3042
3043 for(r=0; r<num_rows; r++) {
3044 even_pair_failed = 0;
3045 for(c=0; c<num_cols; c++) {
3046 ac = auto_config;
3047 while(ac!=NULL) {
3048 if ((ac->clabel->row == r) &&
3049 (ac->clabel->column == c) &&
3050 (ac->clabel->mod_counter == mod_counter)) {
3051 /* it's this one... */
3052 #if DEBUG
3053 printf("Found: %s at %d,%d\n",
3054 ac->devname,r,c);
3055 #endif
3056 break;
3057 }
3058 ac=ac->next;
3059 }
3060 if (ac==NULL) {
3061 /* Didn't find one here! */
3062 /* special case for RAID 1, especially
3063 where there are more than 2
3064 components (where RAIDframe treats
3065 things a little differently :( ) */
3066 if (parity_type == '1') {
3067 if (c%2 == 0) { /* even component */
3068 even_pair_failed = 1;
3069 } else { /* odd component. If
3070 we're failed, and
3071 so is the even
3072 component, it's
3073 "Good Night, Charlie" */
3074 if (even_pair_failed == 1) {
3075 return(0);
3076 }
3077 }
3078 } else {
3079 /* normal accounting */
3080 num_missing++;
3081 }
3082 }
3083 if ((parity_type == '1') && (c%2 == 1)) {
3084 /* Just did an even component, and we didn't
3085 bail.. reset the even_pair_failed flag,
3086 and go on to the next component.... */
3087 even_pair_failed = 0;
3088 }
3089 }
3090 }
3091
3092 clabel = cset->ac->clabel;
3093
3094 if (((clabel->parityConfig == '0') && (num_missing > 0)) ||
3095 ((clabel->parityConfig == '4') && (num_missing > 1)) ||
3096 ((clabel->parityConfig == '5') && (num_missing > 1))) {
3097 /* XXX this needs to be made *much* more general */
3098 /* Too many failures */
3099 return(0);
3100 }
3101 /* otherwise, all is well, and we've got enough to take a kick
3102 at autoconfiguring this set */
3103 return(1);
3104 }
3105
3106 void
3107 rf_create_configuration(ac,config,raidPtr)
3108 RF_AutoConfig_t *ac;
3109 RF_Config_t *config;
3110 RF_Raid_t *raidPtr;
3111 {
3112 RF_ComponentLabel_t *clabel;
3113 int i;
3114
3115 clabel = ac->clabel;
3116
3117 /* 1. Fill in the common stuff */
3118 config->numRow = clabel->num_rows;
3119 config->numCol = clabel->num_columns;
3120 config->numSpare = 0; /* XXX should this be set here? */
3121 config->sectPerSU = clabel->sectPerSU;
3122 config->SUsPerPU = clabel->SUsPerPU;
3123 config->SUsPerRU = clabel->SUsPerRU;
3124 config->parityConfig = clabel->parityConfig;
3125 /* XXX... */
3126 strcpy(config->diskQueueType,"fifo");
3127 config->maxOutstandingDiskReqs = clabel->maxOutstanding;
3128 config->layoutSpecificSize = 0; /* XXX ?? */
3129
3130 while(ac!=NULL) {
3131 /* row/col values will be in range due to the checks
3132 in reasonable_label() */
3133 strcpy(config->devnames[ac->clabel->row][ac->clabel->column],
3134 ac->devname);
3135 ac = ac->next;
3136 }
3137
3138 for(i=0;i<RF_MAXDBGV;i++) {
3139 config->debugVars[i][0] = NULL;
3140 }
3141 }
3142
3143 int
3144 rf_set_autoconfig(raidPtr, new_value)
3145 RF_Raid_t *raidPtr;
3146 int new_value;
3147 {
3148 RF_ComponentLabel_t clabel;
3149 struct vnode *vp;
3150 dev_t dev;
3151 int row, column;
3152
3153 raidPtr->autoconfigure = new_value;
3154 for(row=0; row<raidPtr->numRow; row++) {
3155 for(column=0; column<raidPtr->numCol; column++) {
3156 if (raidPtr->Disks[row][column].status ==
3157 rf_ds_optimal) {
3158 dev = raidPtr->Disks[row][column].dev;
3159 vp = raidPtr->raid_cinfo[row][column].ci_vp;
3160 raidread_component_label(dev, vp, &clabel);
3161 clabel.autoconfigure = new_value;
3162 raidwrite_component_label(dev, vp, &clabel);
3163 }
3164 }
3165 }
3166 return(new_value);
3167 }
3168
3169 int
3170 rf_set_rootpartition(raidPtr, new_value)
3171 RF_Raid_t *raidPtr;
3172 int new_value;
3173 {
3174 RF_ComponentLabel_t clabel;
3175 struct vnode *vp;
3176 dev_t dev;
3177 int row, column;
3178
3179 raidPtr->root_partition = new_value;
3180 for(row=0; row<raidPtr->numRow; row++) {
3181 for(column=0; column<raidPtr->numCol; column++) {
3182 if (raidPtr->Disks[row][column].status ==
3183 rf_ds_optimal) {
3184 dev = raidPtr->Disks[row][column].dev;
3185 vp = raidPtr->raid_cinfo[row][column].ci_vp;
3186 raidread_component_label(dev, vp, &clabel);
3187 clabel.root_partition = new_value;
3188 raidwrite_component_label(dev, vp, &clabel);
3189 }
3190 }
3191 }
3192 return(new_value);
3193 }
3194
3195 void
3196 rf_release_all_vps(cset)
3197 RF_ConfigSet_t *cset;
3198 {
3199 RF_AutoConfig_t *ac;
3200
3201 ac = cset->ac;
3202 while(ac!=NULL) {
3203 /* Close the vp, and give it back */
3204 if (ac->vp) {
3205 vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY);
3206 VOP_CLOSE(ac->vp, FREAD, NOCRED, 0);
3207 vput(ac->vp);
3208 ac->vp = NULL;
3209 }
3210 ac = ac->next;
3211 }
3212 }
3213
3214
3215 void
3216 rf_cleanup_config_set(cset)
3217 RF_ConfigSet_t *cset;
3218 {
3219 RF_AutoConfig_t *ac;
3220 RF_AutoConfig_t *next_ac;
3221
3222 ac = cset->ac;
3223 while(ac!=NULL) {
3224 next_ac = ac->next;
3225 /* nuke the label */
3226 free(ac->clabel, M_RAIDFRAME);
3227 /* cleanup the config structure */
3228 free(ac, M_RAIDFRAME);
3229 /* "next.." */
3230 ac = next_ac;
3231 }
3232 /* and, finally, nuke the config set */
3233 free(cset, M_RAIDFRAME);
3234 }
3235
3236
3237 void
3238 raid_init_component_label(raidPtr, clabel)
3239 RF_Raid_t *raidPtr;
3240 RF_ComponentLabel_t *clabel;
3241 {
3242 /* current version number */
3243 clabel->version = RF_COMPONENT_LABEL_VERSION;
3244 clabel->serial_number = raidPtr->serial_number;
3245 clabel->mod_counter = raidPtr->mod_counter;
3246 clabel->num_rows = raidPtr->numRow;
3247 clabel->num_columns = raidPtr->numCol;
3248 clabel->clean = RF_RAID_DIRTY; /* not clean */
3249 clabel->status = rf_ds_optimal; /* "It's good!" */
3250
3251 clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
3252 clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
3253 clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
3254
3255 clabel->blockSize = raidPtr->bytesPerSector;
3256 clabel->numBlocks = raidPtr->sectorsPerDisk;
3257
3258 /* XXX not portable */
3259 clabel->parityConfig = raidPtr->Layout.map->parityConfig;
3260 clabel->maxOutstanding = raidPtr->maxOutstanding;
3261 clabel->autoconfigure = raidPtr->autoconfigure;
3262 clabel->root_partition = raidPtr->root_partition;
3263 clabel->last_unit = raidPtr->raidid;
3264 clabel->config_order = raidPtr->config_order;
3265 }
3266
3267 int
3268 rf_auto_config_set(cset,unit)
3269 RF_ConfigSet_t *cset;
3270 int *unit;
3271 {
3272 RF_Raid_t *raidPtr;
3273 RF_Config_t *config;
3274 int raidID;
3275 int retcode;
3276
3277 #if DEBUG
3278 printf("RAID autoconfigure\n");
3279 #endif
3280
3281 retcode = 0;
3282 *unit = -1;
3283
3284 /* 1. Create a config structure */
3285
3286 config = (RF_Config_t *)malloc(sizeof(RF_Config_t),
3287 M_RAIDFRAME,
3288 M_NOWAIT);
3289 if (config==NULL) {
3290 printf("Out of mem!?!?\n");
3291 /* XXX do something more intelligent here. */
3292 return(1);
3293 }
3294
3295 memset(config, 0, sizeof(RF_Config_t));
3296
3297 /*
3298 2. Figure out what RAID ID this one is supposed to live at
3299 See if we can get the same RAID dev that it was configured
3300 on last time..
3301 */
3302
3303 raidID = cset->ac->clabel->last_unit;
3304 if ((raidID < 0) || (raidID >= numraid)) {
3305 /* let's not wander off into lala land. */
3306 raidID = numraid - 1;
3307 }
3308 if (raidPtrs[raidID]->valid != 0) {
3309
3310 /*
3311 Nope... Go looking for an alternative...
3312 Start high so we don't immediately use raid0 if that's
3313 not taken.
3314 */
3315
3316 for(raidID = numraid - 1; raidID >= 0; raidID--) {
3317 if (raidPtrs[raidID]->valid == 0) {
3318 /* can use this one! */
3319 break;
3320 }
3321 }
3322 }
3323
3324 if (raidID < 0) {
3325 /* punt... */
3326 printf("Unable to auto configure this set!\n");
3327 printf("(Out of RAID devs!)\n");
3328 return(1);
3329 }
3330
3331 #if DEBUG
3332 printf("Configuring raid%d:\n",raidID);
3333 #endif
3334
3335 raidPtr = raidPtrs[raidID];
3336
3337 /* XXX all this stuff should be done SOMEWHERE ELSE! */
3338 raidPtr->raidid = raidID;
3339 raidPtr->openings = RAIDOUTSTANDING;
3340
3341 /* 3. Build the configuration structure */
3342 rf_create_configuration(cset->ac, config, raidPtr);
3343
3344 /* 4. Do the configuration */
3345 retcode = rf_Configure(raidPtr, config, cset->ac);
3346
3347 if (retcode == 0) {
3348
3349 raidinit(raidPtrs[raidID]);
3350
3351 rf_markalldirty(raidPtrs[raidID]);
3352 raidPtrs[raidID]->autoconfigure = 1; /* XXX do this here? */
3353 if (cset->ac->clabel->root_partition==1) {
3354 /* everything configured just fine. Make a note
3355 that this set is eligible to be root. */
3356 cset->rootable = 1;
3357 /* XXX do this here? */
3358 raidPtrs[raidID]->root_partition = 1;
3359 }
3360 }
3361
3362 /* 5. Cleanup */
3363 free(config, M_RAIDFRAME);
3364
3365 *unit = raidID;
3366 return(retcode);
3367 }
3368
3369 void
3370 rf_disk_unbusy(desc)
3371 RF_RaidAccessDesc_t *desc;
3372 {
3373 struct buf *bp;
3374
3375 bp = (struct buf *)desc->bp;
3376 disk_unbusy(&raid_softc[desc->raidPtr->raidid].sc_dkdev,
3377 (bp->b_bcount - bp->b_resid), (bp->b_flags & B_READ));
3378 }
3379