rf_netbsdkintf.c revision 1.104.2.18 1 /* $NetBSD: rf_netbsdkintf.c,v 1.104.2.18 2002/12/11 06:38:34 thorpej Exp $ */
2 /*-
3 * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
4 * All rights reserved.
5 *
6 * This code is derived from software contributed to The NetBSD Foundation
7 * by Greg Oster; Jason R. Thorpe.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed by the NetBSD
20 * Foundation, Inc. and its contributors.
21 * 4. Neither the name of The NetBSD Foundation nor the names of its
22 * contributors may be used to endorse or promote products derived
23 * from this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
26 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
36 */
37
38 /*
39 * Copyright (c) 1988 University of Utah.
40 * Copyright (c) 1990, 1993
41 * The Regents of the University of California. All rights reserved.
42 *
43 * This code is derived from software contributed to Berkeley by
44 * the Systems Programming Group of the University of Utah Computer
45 * Science Department.
46 *
47 * Redistribution and use in source and binary forms, with or without
48 * modification, are permitted provided that the following conditions
49 * are met:
50 * 1. Redistributions of source code must retain the above copyright
51 * notice, this list of conditions and the following disclaimer.
52 * 2. Redistributions in binary form must reproduce the above copyright
53 * notice, this list of conditions and the following disclaimer in the
54 * documentation and/or other materials provided with the distribution.
55 * 3. All advertising materials mentioning features or use of this software
56 * must display the following acknowledgement:
57 * This product includes software developed by the University of
58 * California, Berkeley and its contributors.
59 * 4. Neither the name of the University nor the names of its contributors
60 * may be used to endorse or promote products derived from this software
61 * without specific prior written permission.
62 *
63 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
64 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
65 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
66 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
67 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
68 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
69 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
70 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
71 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
72 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
73 * SUCH DAMAGE.
74 *
75 * from: Utah $Hdr: cd.c 1.6 90/11/28$
76 *
77 * @(#)cd.c 8.2 (Berkeley) 11/16/93
78 */
79
80
81
82
83 /*
84 * Copyright (c) 1995 Carnegie-Mellon University.
85 * All rights reserved.
86 *
87 * Authors: Mark Holland, Jim Zelenka
88 *
89 * Permission to use, copy, modify and distribute this software and
90 * its documentation is hereby granted, provided that both the copyright
91 * notice and this permission notice appear in all copies of the
92 * software, derivative works or modified versions, and any portions
93 * thereof, and that both notices appear in supporting documentation.
94 *
95 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
96 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
97 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
98 *
99 * Carnegie Mellon requests users of this software to return to
100 *
101 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
102 * School of Computer Science
103 * Carnegie Mellon University
104 * Pittsburgh PA 15213-3890
105 *
106 * any improvements or extensions that they make and grant Carnegie the
107 * rights to redistribute these changes.
108 */
109
110 /***********************************************************
111 *
112 * rf_kintf.c -- the kernel interface routines for RAIDframe
113 *
114 ***********************************************************/
115
116 #include <sys/cdefs.h>
117 __KERNEL_RCSID(0, "$NetBSD: rf_netbsdkintf.c,v 1.104.2.18 2002/12/11 06:38:34 thorpej Exp $");
118
119 #include <sys/param.h>
120 #include <sys/errno.h>
121 #include <sys/pool.h>
122 #include <sys/proc.h>
123 #include <sys/queue.h>
124 #include <sys/disk.h>
125 #include <sys/device.h>
126 #include <sys/stat.h>
127 #include <sys/ioctl.h>
128 #include <sys/fcntl.h>
129 #include <sys/systm.h>
130 #include <sys/namei.h>
131 #include <sys/vnode.h>
132 #include <sys/disklabel.h>
133 #include <sys/conf.h>
134 #include <sys/lock.h>
135 #include <sys/buf.h>
136 #include <sys/user.h>
137 #include <sys/reboot.h>
138
139 #include <dev/raidframe/raidframevar.h>
140 #include <dev/raidframe/raidframeio.h>
141 #include "raid.h"
142 #include "opt_raid_autoconfig.h"
143 #include "rf_raid.h"
144 #include "rf_copyback.h"
145 #include "rf_dag.h"
146 #include "rf_dagflags.h"
147 #include "rf_desc.h"
148 #include "rf_diskqueue.h"
149 #include "rf_etimer.h"
150 #include "rf_general.h"
151 #include "rf_kintf.h"
152 #include "rf_options.h"
153 #include "rf_driver.h"
154 #include "rf_parityscan.h"
155 #include "rf_threadstuff.h"
156
157 #ifdef DEBUG
158 int rf_kdebug_level = 0;
159 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
160 #else /* DEBUG */
161 #define db1_printf(a) { }
162 #endif /* DEBUG */
163
164 static RF_Raid_t **raidPtrs; /* global raid device descriptors */
165
166 RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex)
167
168 static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a
169 * spare table */
170 static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from
171 * installation process */
172
173 /* prototypes */
174 static void KernelWakeupFunc(struct buf * bp);
175 static void InitBP(struct buf * bp, struct vnode *, unsigned rw_flag,
176 dev_t dev, RF_SectorNum_t startSect,
177 RF_SectorCount_t numSect, caddr_t buf,
178 void (*cbFunc) (struct buf *), void *cbArg,
179 int logBytesPerSector, struct proc * b_proc);
180 static void raidinit(RF_Raid_t *);
181
182 void raidattach(int);
183
184 dev_type_open(raidopen);
185 dev_type_close(raidclose);
186 dev_type_read(raidread);
187 dev_type_write(raidwrite);
188 dev_type_ioctl(raidioctl);
189 dev_type_strategy(raidstrategy);
190 dev_type_dump(raiddump);
191 dev_type_size(raidsize);
192
193 const struct bdevsw raid_bdevsw = {
194 raidopen, raidclose, raidstrategy, raidioctl,
195 raiddump, raidsize, D_DISK
196 };
197
198 const struct cdevsw raid_cdevsw = {
199 raidopen, raidclose, raidread, raidwrite, raidioctl,
200 nostop, notty, nopoll, nommap, nokqfilter, D_DISK
201 };
202
203 /*
204 * Pilfered from ccd.c
205 */
206
207 struct raidbuf {
208 struct buf rf_buf; /* new I/O buf. MUST BE FIRST!!! */
209 struct buf *rf_obp; /* ptr. to original I/O buf */
210 RF_DiskQueueData_t *req;/* the request that this was part of.. */
211 };
212
213 /* component buffer pool */
214 struct pool raidframe_cbufpool;
215
216 /* XXX Not sure if the following should be replacing the raidPtrs above,
217 or if it should be used in conjunction with that...
218 */
219
220 struct raid_softc {
221 int sc_flags; /* flags */
222 int sc_cflags; /* configuration flags */
223 size_t sc_size; /* size of the raid device */
224 char sc_xname[20]; /* XXX external name */
225 struct disk sc_dkdev; /* generic disk device info */
226 struct bufq_state buf_queue; /* used for the device queue */
227 };
228 /* sc_flags */
229 #define RAIDF_INITED 0x01 /* unit has been initialized */
230 #define RAIDF_WLABEL 0x02 /* label area is writable */
231 #define RAIDF_LABELLING 0x04 /* unit is currently being labelled */
232 #define RAIDF_WANTED 0x40 /* someone is waiting to obtain a lock */
233 #define RAIDF_LOCKED 0x80 /* unit is locked */
234
235 #define raidunit(x) DISKUNIT(x)
236 int numraid = 0;
237
238 /*
239 * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
240 * Be aware that large numbers can allow the driver to consume a lot of
241 * kernel memory, especially on writes, and in degraded mode reads.
242 *
243 * For example: with a stripe width of 64 blocks (32k) and 5 disks,
244 * a single 64K write will typically require 64K for the old data,
245 * 64K for the old parity, and 64K for the new parity, for a total
246 * of 192K (if the parity buffer is not re-used immediately).
247 * Even it if is used immediately, that's still 128K, which when multiplied
248 * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
249 *
250 * Now in degraded mode, for example, a 64K read on the above setup may
251 * require data reconstruction, which will require *all* of the 4 remaining
252 * disks to participate -- 4 * 32K/disk == 128K again.
253 */
254
255 #ifndef RAIDOUTSTANDING
256 #define RAIDOUTSTANDING 6
257 #endif
258
259 #define RAIDLABELDEV(dev) \
260 (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
261
262 /* declared here, and made public, for the benefit of KVM stuff.. */
263 struct raid_softc *raid_softc;
264
265 static void raidgetdefaultlabel(RF_Raid_t *, struct raid_softc *,
266 struct disklabel *);
267 static void raidgetdisklabel(dev_t);
268 static void raidmakedisklabel(struct raid_softc *);
269
270 static int raidlock(struct raid_softc *);
271 static void raidunlock(struct raid_softc *);
272
273 static void rf_markalldirty(RF_Raid_t *);
274
275 struct device *raidrootdev;
276
277 void rf_ReconThread(struct rf_recon_req *);
278 /* XXX what I want is: */
279 /*void rf_ReconThread(RF_Raid_t *raidPtr); */
280 void rf_RewriteParityThread(RF_Raid_t *raidPtr);
281 void rf_CopybackThread(RF_Raid_t *raidPtr);
282 void rf_ReconstructInPlaceThread(struct rf_recon_req *);
283 int rf_autoconfig(struct device *self);
284 void rf_buildroothack(RF_ConfigSet_t *);
285
286 RF_AutoConfig_t *rf_find_raid_components(void);
287 RF_ConfigSet_t *rf_create_auto_sets(RF_AutoConfig_t *);
288 static int rf_does_it_fit(RF_ConfigSet_t *,RF_AutoConfig_t *);
289 static int rf_reasonable_label(RF_ComponentLabel_t *);
290 void rf_create_configuration(RF_AutoConfig_t *,RF_Config_t *, RF_Raid_t *);
291 int rf_set_autoconfig(RF_Raid_t *, int);
292 int rf_set_rootpartition(RF_Raid_t *, int);
293 void rf_release_all_vps(RF_ConfigSet_t *);
294 void rf_cleanup_config_set(RF_ConfigSet_t *);
295 int rf_have_enough_components(RF_ConfigSet_t *);
296 int rf_auto_config_set(RF_ConfigSet_t *, int *);
297
298 static int raidautoconfig = 0; /* Debugging, mostly. Set to 0 to not
299 allow autoconfig to take place.
300 Note that this is overridden by having
301 RAID_AUTOCONFIG as an option in the
302 kernel config file. */
303
304 void
305 raidattach(num)
306 int num;
307 {
308 int raidID;
309 int i, rc;
310
311 #ifdef DEBUG
312 printf("raidattach: Asked for %d units\n", num);
313 #endif
314
315 if (num <= 0) {
316 #ifdef DIAGNOSTIC
317 panic("raidattach: count <= 0");
318 #endif
319 return;
320 }
321 /* This is where all the initialization stuff gets done. */
322
323 numraid = num;
324
325 /* Make some space for requested number of units... */
326
327 RF_Calloc(raidPtrs, num, sizeof(RF_Raid_t *), (RF_Raid_t **));
328 if (raidPtrs == NULL) {
329 panic("raidPtrs is NULL!!");
330 }
331
332 /* Initialize the component buffer pool. */
333 pool_init(&raidframe_cbufpool, sizeof(struct raidbuf), 0,
334 0, 0, "raidpl", NULL);
335
336 rc = rf_mutex_init(&rf_sparet_wait_mutex);
337 if (rc) {
338 RF_PANIC();
339 }
340
341 rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
342
343 for (i = 0; i < num; i++)
344 raidPtrs[i] = NULL;
345 rc = rf_BootRaidframe();
346 if (rc == 0)
347 printf("Kernelized RAIDframe activated\n");
348 else
349 panic("Serious error booting RAID!!");
350
351 /* put together some datastructures like the CCD device does.. This
352 * lets us lock the device and what-not when it gets opened. */
353
354 raid_softc = (struct raid_softc *)
355 malloc(num * sizeof(struct raid_softc),
356 M_RAIDFRAME, M_NOWAIT);
357 if (raid_softc == NULL) {
358 printf("WARNING: no memory for RAIDframe driver\n");
359 return;
360 }
361
362 memset(raid_softc, 0, num * sizeof(struct raid_softc));
363
364 raidrootdev = (struct device *)malloc(num * sizeof(struct device),
365 M_RAIDFRAME, M_NOWAIT);
366 if (raidrootdev == NULL) {
367 panic("No memory for RAIDframe driver!!?!?!");
368 }
369
370 for (raidID = 0; raidID < num; raidID++) {
371 bufq_alloc(&raid_softc[raidID].buf_queue, BUFQ_FCFS);
372
373 raidrootdev[raidID].dv_class = DV_DISK;
374 raidrootdev[raidID].dv_cfdata = NULL;
375 raidrootdev[raidID].dv_unit = raidID;
376 raidrootdev[raidID].dv_parent = NULL;
377 raidrootdev[raidID].dv_flags = 0;
378 sprintf(raidrootdev[raidID].dv_xname,"raid%d",raidID);
379
380 RF_Calloc(raidPtrs[raidID], 1, sizeof(RF_Raid_t),
381 (RF_Raid_t *));
382 if (raidPtrs[raidID] == NULL) {
383 printf("WARNING: raidPtrs[%d] is NULL\n", raidID);
384 numraid = raidID;
385 return;
386 }
387 }
388
389 #ifdef RAID_AUTOCONFIG
390 raidautoconfig = 1;
391 #endif
392
393 /*
394 * Register a finalizer which will be used to auto-config RAID
395 * sets once all real hardware devices have been found.
396 */
397 if (config_finalize_register(NULL, rf_autoconfig) != 0)
398 printf("WARNING: unable to register RAIDframe finalizer\n");
399 }
400
401 int
402 rf_autoconfig(struct device *self)
403 {
404 RF_AutoConfig_t *ac_list;
405 RF_ConfigSet_t *config_sets;
406
407 if (raidautoconfig == 0)
408 return (0);
409
410 /* XXX This code can only be run once. */
411 raidautoconfig = 0;
412
413 /* 1. locate all RAID components on the system */
414 #ifdef DEBUG
415 printf("Searching for RAID components...\n");
416 #endif
417 ac_list = rf_find_raid_components();
418
419 /* 2. Sort them into their respective sets. */
420 config_sets = rf_create_auto_sets(ac_list);
421
422 /*
423 * 3. Evaluate each set andconfigure the valid ones.
424 * This gets done in rf_buildroothack().
425 */
426 rf_buildroothack(config_sets);
427
428 return (1);
429 }
430
431 void
432 rf_buildroothack(RF_ConfigSet_t *config_sets)
433 {
434 RF_ConfigSet_t *cset;
435 RF_ConfigSet_t *next_cset;
436 int retcode;
437 int raidID;
438 int rootID;
439 int num_root;
440
441 rootID = 0;
442 num_root = 0;
443 cset = config_sets;
444 while(cset != NULL ) {
445 next_cset = cset->next;
446 if (rf_have_enough_components(cset) &&
447 cset->ac->clabel->autoconfigure==1) {
448 retcode = rf_auto_config_set(cset,&raidID);
449 if (!retcode) {
450 if (cset->rootable) {
451 rootID = raidID;
452 num_root++;
453 }
454 } else {
455 /* The autoconfig didn't work :( */
456 #if DEBUG
457 printf("Autoconfig failed with code %d for raid%d\n", retcode, raidID);
458 #endif
459 rf_release_all_vps(cset);
460 }
461 } else {
462 /* we're not autoconfiguring this set...
463 release the associated resources */
464 rf_release_all_vps(cset);
465 }
466 /* cleanup */
467 rf_cleanup_config_set(cset);
468 cset = next_cset;
469 }
470
471 /* we found something bootable... */
472
473 if (num_root == 1) {
474 booted_device = &raidrootdev[rootID];
475 } else if (num_root > 1) {
476 /* we can't guess.. require the user to answer... */
477 boothowto |= RB_ASKNAME;
478 }
479 }
480
481
482 int
483 raidsize(dev)
484 dev_t dev;
485 {
486 struct raid_softc *rs;
487 struct disklabel *lp;
488 int part, unit, omask, size;
489
490 unit = raidunit(dev);
491 if (unit >= numraid)
492 return (-1);
493 rs = &raid_softc[unit];
494
495 if ((rs->sc_flags & RAIDF_INITED) == 0)
496 return (-1);
497
498 part = DISKPART(dev);
499 omask = rs->sc_dkdev.dk_openmask & (1 << part);
500 lp = rs->sc_dkdev.dk_label;
501
502 if (omask == 0 && raidopen(dev, 0, S_IFBLK, curproc))
503 return (-1);
504
505 if (lp->d_partitions[part].p_fstype != FS_SWAP)
506 size = -1;
507 else
508 size = lp->d_partitions[part].p_size *
509 (lp->d_secsize / DEV_BSIZE);
510
511 if (omask == 0 && raidclose(dev, 0, S_IFBLK, curproc))
512 return (-1);
513
514 return (size);
515
516 }
517
518 int
519 raiddump(dev, blkno, va, size)
520 dev_t dev;
521 daddr_t blkno;
522 caddr_t va;
523 size_t size;
524 {
525 /* Not implemented. */
526 return ENXIO;
527 }
528 /* ARGSUSED */
529 int
530 raidopen(dev, flags, fmt, p)
531 dev_t dev;
532 int flags, fmt;
533 struct proc *p;
534 {
535 int unit = raidunit(dev);
536 struct raid_softc *rs;
537 struct disklabel *lp;
538 int part, pmask;
539 int error = 0;
540
541 if (unit >= numraid)
542 return (ENXIO);
543 rs = &raid_softc[unit];
544
545 if ((error = raidlock(rs)) != 0)
546 return (error);
547 lp = rs->sc_dkdev.dk_label;
548
549 part = DISKPART(dev);
550 pmask = (1 << part);
551
552 if ((rs->sc_flags & RAIDF_INITED) &&
553 (rs->sc_dkdev.dk_openmask == 0))
554 raidgetdisklabel(dev);
555
556 /* make sure that this partition exists */
557
558 if (part != RAW_PART) {
559 if (((rs->sc_flags & RAIDF_INITED) == 0) ||
560 ((part >= lp->d_npartitions) ||
561 (lp->d_partitions[part].p_fstype == FS_UNUSED))) {
562 error = ENXIO;
563 raidunlock(rs);
564 return (error);
565 }
566 }
567 /* Prevent this unit from being unconfigured while open. */
568 switch (fmt) {
569 case S_IFCHR:
570 rs->sc_dkdev.dk_copenmask |= pmask;
571 break;
572
573 case S_IFBLK:
574 rs->sc_dkdev.dk_bopenmask |= pmask;
575 break;
576 }
577
578 if ((rs->sc_dkdev.dk_openmask == 0) &&
579 ((rs->sc_flags & RAIDF_INITED) != 0)) {
580 /* First one... mark things as dirty... Note that we *MUST*
581 have done a configure before this. I DO NOT WANT TO BE
582 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
583 THAT THEY BELONG TOGETHER!!!!! */
584 /* XXX should check to see if we're only open for reading
585 here... If so, we needn't do this, but then need some
586 other way of keeping track of what's happened.. */
587
588 rf_markalldirty( raidPtrs[unit] );
589 }
590
591
592 rs->sc_dkdev.dk_openmask =
593 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
594
595 raidunlock(rs);
596
597 return (error);
598
599
600 }
601 /* ARGSUSED */
602 int
603 raidclose(dev, flags, fmt, p)
604 dev_t dev;
605 int flags, fmt;
606 struct proc *p;
607 {
608 int unit = raidunit(dev);
609 struct raid_softc *rs;
610 int error = 0;
611 int part;
612
613 if (unit >= numraid)
614 return (ENXIO);
615 rs = &raid_softc[unit];
616
617 if ((error = raidlock(rs)) != 0)
618 return (error);
619
620 part = DISKPART(dev);
621
622 /* ...that much closer to allowing unconfiguration... */
623 switch (fmt) {
624 case S_IFCHR:
625 rs->sc_dkdev.dk_copenmask &= ~(1 << part);
626 break;
627
628 case S_IFBLK:
629 rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
630 break;
631 }
632 rs->sc_dkdev.dk_openmask =
633 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
634
635 if ((rs->sc_dkdev.dk_openmask == 0) &&
636 ((rs->sc_flags & RAIDF_INITED) != 0)) {
637 /* Last one... device is not unconfigured yet.
638 Device shutdown has taken care of setting the
639 clean bits if RAIDF_INITED is not set
640 mark things as clean... */
641
642 rf_update_component_labels(raidPtrs[unit],
643 RF_FINAL_COMPONENT_UPDATE);
644 if (doing_shutdown) {
645 /* last one, and we're going down, so
646 lights out for this RAID set too. */
647 error = rf_Shutdown(raidPtrs[unit]);
648
649 /* It's no longer initialized... */
650 rs->sc_flags &= ~RAIDF_INITED;
651
652 /* Detach the disk. */
653 disk_detach(&rs->sc_dkdev);
654 }
655 }
656
657 raidunlock(rs);
658 return (0);
659
660 }
661
662 void
663 raidstrategy(bp)
664 struct buf *bp;
665 {
666 int s;
667
668 unsigned int raidID = raidunit(bp->b_dev);
669 RF_Raid_t *raidPtr;
670 struct raid_softc *rs = &raid_softc[raidID];
671 struct disklabel *lp;
672 int wlabel;
673
674 if ((rs->sc_flags & RAIDF_INITED) ==0) {
675 bp->b_error = ENXIO;
676 bp->b_flags |= B_ERROR;
677 bp->b_resid = bp->b_bcount;
678 biodone(bp);
679 return;
680 }
681 if (raidID >= numraid || !raidPtrs[raidID]) {
682 bp->b_error = ENODEV;
683 bp->b_flags |= B_ERROR;
684 bp->b_resid = bp->b_bcount;
685 biodone(bp);
686 return;
687 }
688 raidPtr = raidPtrs[raidID];
689 if (!raidPtr->valid) {
690 bp->b_error = ENODEV;
691 bp->b_flags |= B_ERROR;
692 bp->b_resid = bp->b_bcount;
693 biodone(bp);
694 return;
695 }
696 if (bp->b_bcount == 0) {
697 db1_printf(("b_bcount is zero..\n"));
698 biodone(bp);
699 return;
700 }
701 lp = rs->sc_dkdev.dk_label;
702
703 /*
704 * Do bounds checking and adjust transfer. If there's an
705 * error, the bounds check will flag that for us.
706 */
707
708 wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
709 if (DISKPART(bp->b_dev) != RAW_PART)
710 if (bounds_check_with_label(bp, lp, wlabel) <= 0) {
711 db1_printf(("Bounds check failed!!:%d %d\n",
712 (int) bp->b_blkno, (int) wlabel));
713 biodone(bp);
714 return;
715 }
716 s = splbio();
717
718 bp->b_resid = 0;
719
720 /* stuff it onto our queue */
721 BUFQ_PUT(&rs->buf_queue, bp);
722
723 raidstart(raidPtrs[raidID]);
724
725 splx(s);
726 }
727 /* ARGSUSED */
728 int
729 raidread(dev, uio, flags)
730 dev_t dev;
731 struct uio *uio;
732 int flags;
733 {
734 int unit = raidunit(dev);
735 struct raid_softc *rs;
736 int part;
737
738 if (unit >= numraid)
739 return (ENXIO);
740 rs = &raid_softc[unit];
741
742 if ((rs->sc_flags & RAIDF_INITED) == 0)
743 return (ENXIO);
744 part = DISKPART(dev);
745
746 return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
747
748 }
749 /* ARGSUSED */
750 int
751 raidwrite(dev, uio, flags)
752 dev_t dev;
753 struct uio *uio;
754 int flags;
755 {
756 int unit = raidunit(dev);
757 struct raid_softc *rs;
758
759 if (unit >= numraid)
760 return (ENXIO);
761 rs = &raid_softc[unit];
762
763 if ((rs->sc_flags & RAIDF_INITED) == 0)
764 return (ENXIO);
765
766 return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
767
768 }
769
770 int
771 raidioctl(dev, cmd, data, flag, p)
772 dev_t dev;
773 u_long cmd;
774 caddr_t data;
775 int flag;
776 struct proc *p;
777 {
778 int unit = raidunit(dev);
779 int error = 0;
780 int part, pmask;
781 struct raid_softc *rs;
782 RF_Config_t *k_cfg, *u_cfg;
783 RF_Raid_t *raidPtr;
784 RF_RaidDisk_t *diskPtr;
785 RF_AccTotals_t *totals;
786 RF_DeviceConfig_t *d_cfg, **ucfgp;
787 u_char *specific_buf;
788 int retcode = 0;
789 int row;
790 int column;
791 int raidid;
792 struct rf_recon_req *rrcopy, *rr;
793 RF_ComponentLabel_t *clabel;
794 RF_ComponentLabel_t ci_label;
795 RF_ComponentLabel_t **clabel_ptr;
796 RF_SingleComponent_t *sparePtr,*componentPtr;
797 RF_SingleComponent_t hot_spare;
798 RF_SingleComponent_t component;
799 RF_ProgressInfo_t progressInfo, **progressInfoPtr;
800 int i, j, d;
801 #ifdef __HAVE_OLD_DISKLABEL
802 struct disklabel newlabel;
803 #endif
804
805 if (unit >= numraid)
806 return (ENXIO);
807 rs = &raid_softc[unit];
808 raidPtr = raidPtrs[unit];
809
810 db1_printf(("raidioctl: %d %d %d %d\n", (int) dev,
811 (int) DISKPART(dev), (int) unit, (int) cmd));
812
813 /* Must be open for writes for these commands... */
814 switch (cmd) {
815 case DIOCSDINFO:
816 case DIOCWDINFO:
817 #ifdef __HAVE_OLD_DISKLABEL
818 case ODIOCWDINFO:
819 case ODIOCSDINFO:
820 #endif
821 case DIOCWLABEL:
822 if ((flag & FWRITE) == 0)
823 return (EBADF);
824 }
825
826 /* Must be initialized for these... */
827 switch (cmd) {
828 case DIOCGDINFO:
829 case DIOCSDINFO:
830 case DIOCWDINFO:
831 #ifdef __HAVE_OLD_DISKLABEL
832 case ODIOCGDINFO:
833 case ODIOCWDINFO:
834 case ODIOCSDINFO:
835 case ODIOCGDEFLABEL:
836 #endif
837 case DIOCGPART:
838 case DIOCWLABEL:
839 case DIOCGDEFLABEL:
840 case RAIDFRAME_SHUTDOWN:
841 case RAIDFRAME_REWRITEPARITY:
842 case RAIDFRAME_GET_INFO:
843 case RAIDFRAME_RESET_ACCTOTALS:
844 case RAIDFRAME_GET_ACCTOTALS:
845 case RAIDFRAME_KEEP_ACCTOTALS:
846 case RAIDFRAME_GET_SIZE:
847 case RAIDFRAME_FAIL_DISK:
848 case RAIDFRAME_COPYBACK:
849 case RAIDFRAME_CHECK_RECON_STATUS:
850 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
851 case RAIDFRAME_GET_COMPONENT_LABEL:
852 case RAIDFRAME_SET_COMPONENT_LABEL:
853 case RAIDFRAME_ADD_HOT_SPARE:
854 case RAIDFRAME_REMOVE_HOT_SPARE:
855 case RAIDFRAME_INIT_LABELS:
856 case RAIDFRAME_REBUILD_IN_PLACE:
857 case RAIDFRAME_CHECK_PARITY:
858 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
859 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
860 case RAIDFRAME_CHECK_COPYBACK_STATUS:
861 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
862 case RAIDFRAME_SET_AUTOCONFIG:
863 case RAIDFRAME_SET_ROOT:
864 case RAIDFRAME_DELETE_COMPONENT:
865 case RAIDFRAME_INCORPORATE_HOT_SPARE:
866 if ((rs->sc_flags & RAIDF_INITED) == 0)
867 return (ENXIO);
868 }
869
870 switch (cmd) {
871
872 /* configure the system */
873 case RAIDFRAME_CONFIGURE:
874
875 if (raidPtr->valid) {
876 /* There is a valid RAID set running on this unit! */
877 printf("raid%d: Device already configured!\n",unit);
878 return(EINVAL);
879 }
880
881 /* copy-in the configuration information */
882 /* data points to a pointer to the configuration structure */
883
884 u_cfg = *((RF_Config_t **) data);
885 RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
886 if (k_cfg == NULL) {
887 return (ENOMEM);
888 }
889 retcode = copyin((caddr_t) u_cfg, (caddr_t) k_cfg,
890 sizeof(RF_Config_t));
891 if (retcode) {
892 RF_Free(k_cfg, sizeof(RF_Config_t));
893 db1_printf(("rf_ioctl: retcode=%d copyin.1\n",
894 retcode));
895 return (retcode);
896 }
897 /* allocate a buffer for the layout-specific data, and copy it
898 * in */
899 if (k_cfg->layoutSpecificSize) {
900 if (k_cfg->layoutSpecificSize > 10000) {
901 /* sanity check */
902 RF_Free(k_cfg, sizeof(RF_Config_t));
903 return (EINVAL);
904 }
905 RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
906 (u_char *));
907 if (specific_buf == NULL) {
908 RF_Free(k_cfg, sizeof(RF_Config_t));
909 return (ENOMEM);
910 }
911 retcode = copyin(k_cfg->layoutSpecific,
912 (caddr_t) specific_buf,
913 k_cfg->layoutSpecificSize);
914 if (retcode) {
915 RF_Free(k_cfg, sizeof(RF_Config_t));
916 RF_Free(specific_buf,
917 k_cfg->layoutSpecificSize);
918 db1_printf(("rf_ioctl: retcode=%d copyin.2\n",
919 retcode));
920 return (retcode);
921 }
922 } else
923 specific_buf = NULL;
924 k_cfg->layoutSpecific = specific_buf;
925
926 /* should do some kind of sanity check on the configuration.
927 * Store the sum of all the bytes in the last byte? */
928
929 /* configure the system */
930
931 /*
932 * Clear the entire RAID descriptor, just to make sure
933 * there is no stale data left in the case of a
934 * reconfiguration
935 */
936 memset((char *) raidPtr, 0, sizeof(RF_Raid_t));
937 raidPtr->raidid = unit;
938
939 retcode = rf_Configure(raidPtr, k_cfg, NULL);
940
941 if (retcode == 0) {
942
943 /* allow this many simultaneous IO's to
944 this RAID device */
945 raidPtr->openings = RAIDOUTSTANDING;
946
947 raidinit(raidPtr);
948 rf_markalldirty(raidPtr);
949 }
950 /* free the buffers. No return code here. */
951 if (k_cfg->layoutSpecificSize) {
952 RF_Free(specific_buf, k_cfg->layoutSpecificSize);
953 }
954 RF_Free(k_cfg, sizeof(RF_Config_t));
955
956 return (retcode);
957
958 /* shutdown the system */
959 case RAIDFRAME_SHUTDOWN:
960
961 if ((error = raidlock(rs)) != 0)
962 return (error);
963
964 /*
965 * If somebody has a partition mounted, we shouldn't
966 * shutdown.
967 */
968
969 part = DISKPART(dev);
970 pmask = (1 << part);
971 if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
972 ((rs->sc_dkdev.dk_bopenmask & pmask) &&
973 (rs->sc_dkdev.dk_copenmask & pmask))) {
974 raidunlock(rs);
975 return (EBUSY);
976 }
977
978 retcode = rf_Shutdown(raidPtr);
979
980 /* It's no longer initialized... */
981 rs->sc_flags &= ~RAIDF_INITED;
982
983 /* Detach the disk. */
984 disk_detach(&rs->sc_dkdev);
985
986 raidunlock(rs);
987
988 return (retcode);
989 case RAIDFRAME_GET_COMPONENT_LABEL:
990 clabel_ptr = (RF_ComponentLabel_t **) data;
991 /* need to read the component label for the disk indicated
992 by row,column in clabel */
993
994 /* For practice, let's get it directly fromdisk, rather
995 than from the in-core copy */
996 RF_Malloc( clabel, sizeof( RF_ComponentLabel_t ),
997 (RF_ComponentLabel_t *));
998 if (clabel == NULL)
999 return (ENOMEM);
1000
1001 memset((char *) clabel, 0, sizeof(RF_ComponentLabel_t));
1002
1003 retcode = copyin( *clabel_ptr, clabel,
1004 sizeof(RF_ComponentLabel_t));
1005
1006 if (retcode) {
1007 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
1008 return(retcode);
1009 }
1010
1011 row = clabel->row;
1012 column = clabel->column;
1013
1014 if ((row < 0) || (row >= raidPtr->numRow) ||
1015 (column < 0) || (column >= raidPtr->numCol +
1016 raidPtr->numSpare)) {
1017 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
1018 return(EINVAL);
1019 }
1020
1021 raidread_component_label(raidPtr->Disks[row][column].dev,
1022 raidPtr->raid_cinfo[row][column].ci_vp,
1023 clabel );
1024
1025 retcode = copyout((caddr_t) clabel,
1026 (caddr_t) *clabel_ptr,
1027 sizeof(RF_ComponentLabel_t));
1028 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
1029 return (retcode);
1030
1031 case RAIDFRAME_SET_COMPONENT_LABEL:
1032 clabel = (RF_ComponentLabel_t *) data;
1033
1034 /* XXX check the label for valid stuff... */
1035 /* Note that some things *should not* get modified --
1036 the user should be re-initing the labels instead of
1037 trying to patch things.
1038 */
1039
1040 raidid = raidPtr->raidid;
1041 printf("raid%d: Got component label:\n", raidid);
1042 printf("raid%d: Version: %d\n", raidid, clabel->version);
1043 printf("raid%d: Serial Number: %d\n", raidid, clabel->serial_number);
1044 printf("raid%d: Mod counter: %d\n", raidid, clabel->mod_counter);
1045 printf("raid%d: Row: %d\n", raidid, clabel->row);
1046 printf("raid%d: Column: %d\n", raidid, clabel->column);
1047 printf("raid%d: Num Rows: %d\n", raidid, clabel->num_rows);
1048 printf("raid%d: Num Columns: %d\n", raidid, clabel->num_columns);
1049 printf("raid%d: Clean: %d\n", raidid, clabel->clean);
1050 printf("raid%d: Status: %d\n", raidid, clabel->status);
1051
1052 row = clabel->row;
1053 column = clabel->column;
1054
1055 if ((row < 0) || (row >= raidPtr->numRow) ||
1056 (column < 0) || (column >= raidPtr->numCol)) {
1057 return(EINVAL);
1058 }
1059
1060 /* XXX this isn't allowed to do anything for now :-) */
1061
1062 /* XXX and before it is, we need to fill in the rest
1063 of the fields!?!?!?! */
1064 #if 0
1065 raidwrite_component_label(
1066 raidPtr->Disks[row][column].dev,
1067 raidPtr->raid_cinfo[row][column].ci_vp,
1068 clabel );
1069 #endif
1070 return (0);
1071
1072 case RAIDFRAME_INIT_LABELS:
1073 clabel = (RF_ComponentLabel_t *) data;
1074 /*
1075 we only want the serial number from
1076 the above. We get all the rest of the information
1077 from the config that was used to create this RAID
1078 set.
1079 */
1080
1081 raidPtr->serial_number = clabel->serial_number;
1082
1083 raid_init_component_label(raidPtr, &ci_label);
1084 ci_label.serial_number = clabel->serial_number;
1085
1086 for(row=0;row<raidPtr->numRow;row++) {
1087 ci_label.row = row;
1088 for(column=0;column<raidPtr->numCol;column++) {
1089 diskPtr = &raidPtr->Disks[row][column];
1090 if (!RF_DEAD_DISK(diskPtr->status)) {
1091 ci_label.partitionSize = diskPtr->partitionSize;
1092 ci_label.column = column;
1093 raidwrite_component_label(
1094 raidPtr->Disks[row][column].dev,
1095 raidPtr->raid_cinfo[row][column].ci_vp,
1096 &ci_label );
1097 }
1098 }
1099 }
1100
1101 return (retcode);
1102 case RAIDFRAME_SET_AUTOCONFIG:
1103 d = rf_set_autoconfig(raidPtr, *(int *) data);
1104 printf("raid%d: New autoconfig value is: %d\n",
1105 raidPtr->raidid, d);
1106 *(int *) data = d;
1107 return (retcode);
1108
1109 case RAIDFRAME_SET_ROOT:
1110 d = rf_set_rootpartition(raidPtr, *(int *) data);
1111 printf("raid%d: New rootpartition value is: %d\n",
1112 raidPtr->raidid, d);
1113 *(int *) data = d;
1114 return (retcode);
1115
1116 /* initialize all parity */
1117 case RAIDFRAME_REWRITEPARITY:
1118
1119 if (raidPtr->Layout.map->faultsTolerated == 0) {
1120 /* Parity for RAID 0 is trivially correct */
1121 raidPtr->parity_good = RF_RAID_CLEAN;
1122 return(0);
1123 }
1124
1125 if (raidPtr->parity_rewrite_in_progress == 1) {
1126 /* Re-write is already in progress! */
1127 return(EINVAL);
1128 }
1129
1130 retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
1131 rf_RewriteParityThread,
1132 raidPtr,"raid_parity");
1133 return (retcode);
1134
1135
1136 case RAIDFRAME_ADD_HOT_SPARE:
1137 sparePtr = (RF_SingleComponent_t *) data;
1138 memcpy( &hot_spare, sparePtr, sizeof(RF_SingleComponent_t));
1139 retcode = rf_add_hot_spare(raidPtr, &hot_spare);
1140 return(retcode);
1141
1142 case RAIDFRAME_REMOVE_HOT_SPARE:
1143 return(retcode);
1144
1145 case RAIDFRAME_DELETE_COMPONENT:
1146 componentPtr = (RF_SingleComponent_t *)data;
1147 memcpy( &component, componentPtr,
1148 sizeof(RF_SingleComponent_t));
1149 retcode = rf_delete_component(raidPtr, &component);
1150 return(retcode);
1151
1152 case RAIDFRAME_INCORPORATE_HOT_SPARE:
1153 componentPtr = (RF_SingleComponent_t *)data;
1154 memcpy( &component, componentPtr,
1155 sizeof(RF_SingleComponent_t));
1156 retcode = rf_incorporate_hot_spare(raidPtr, &component);
1157 return(retcode);
1158
1159 case RAIDFRAME_REBUILD_IN_PLACE:
1160
1161 if (raidPtr->Layout.map->faultsTolerated == 0) {
1162 /* Can't do this on a RAID 0!! */
1163 return(EINVAL);
1164 }
1165
1166 if (raidPtr->recon_in_progress == 1) {
1167 /* a reconstruct is already in progress! */
1168 return(EINVAL);
1169 }
1170
1171 componentPtr = (RF_SingleComponent_t *) data;
1172 memcpy( &component, componentPtr,
1173 sizeof(RF_SingleComponent_t));
1174 row = component.row;
1175 column = component.column;
1176
1177 if ((row < 0) || (row >= raidPtr->numRow) ||
1178 (column < 0) || (column >= raidPtr->numCol)) {
1179 return(EINVAL);
1180 }
1181
1182 RF_LOCK_MUTEX(raidPtr->mutex);
1183 if ((raidPtr->Disks[row][column].status == rf_ds_optimal) &&
1184 (raidPtr->numFailures > 0)) {
1185 /* XXX 0 above shouldn't be constant!!! */
1186 /* some component other than this has failed.
1187 Let's not make things worse than they already
1188 are... */
1189 printf("raid%d: Unable to reconstruct to disk at:\n",
1190 raidPtr->raidid);
1191 printf("raid%d: Row: %d Col: %d Too many failures.\n",
1192 raidPtr->raidid, row, column);
1193 RF_UNLOCK_MUTEX(raidPtr->mutex);
1194 return (EINVAL);
1195 }
1196 if (raidPtr->Disks[row][column].status ==
1197 rf_ds_reconstructing) {
1198 printf("raid%d: Unable to reconstruct to disk at:\n",
1199 raidPtr->raidid);
1200 printf("raid%d: Row: %d Col: %d Reconstruction already occuring!\n", raidPtr->raidid, row, column);
1201
1202 RF_UNLOCK_MUTEX(raidPtr->mutex);
1203 return (EINVAL);
1204 }
1205 if (raidPtr->Disks[row][column].status == rf_ds_spared) {
1206 RF_UNLOCK_MUTEX(raidPtr->mutex);
1207 return (EINVAL);
1208 }
1209 RF_UNLOCK_MUTEX(raidPtr->mutex);
1210
1211 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1212 if (rrcopy == NULL)
1213 return(ENOMEM);
1214
1215 rrcopy->raidPtr = (void *) raidPtr;
1216 rrcopy->row = row;
1217 rrcopy->col = column;
1218
1219 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1220 rf_ReconstructInPlaceThread,
1221 rrcopy,"raid_reconip");
1222 return(retcode);
1223
1224 case RAIDFRAME_GET_INFO:
1225 if (!raidPtr->valid)
1226 return (ENODEV);
1227 ucfgp = (RF_DeviceConfig_t **) data;
1228 RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t),
1229 (RF_DeviceConfig_t *));
1230 if (d_cfg == NULL)
1231 return (ENOMEM);
1232 memset((char *) d_cfg, 0, sizeof(RF_DeviceConfig_t));
1233 d_cfg->rows = raidPtr->numRow;
1234 d_cfg->cols = raidPtr->numCol;
1235 d_cfg->ndevs = raidPtr->numRow * raidPtr->numCol;
1236 if (d_cfg->ndevs >= RF_MAX_DISKS) {
1237 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1238 return (ENOMEM);
1239 }
1240 d_cfg->nspares = raidPtr->numSpare;
1241 if (d_cfg->nspares >= RF_MAX_DISKS) {
1242 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1243 return (ENOMEM);
1244 }
1245 d_cfg->maxqdepth = raidPtr->maxQueueDepth;
1246 d = 0;
1247 for (i = 0; i < d_cfg->rows; i++) {
1248 for (j = 0; j < d_cfg->cols; j++) {
1249 d_cfg->devs[d] = raidPtr->Disks[i][j];
1250 d++;
1251 }
1252 }
1253 for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) {
1254 d_cfg->spares[i] = raidPtr->Disks[0][j];
1255 }
1256 retcode = copyout((caddr_t) d_cfg, (caddr_t) * ucfgp,
1257 sizeof(RF_DeviceConfig_t));
1258 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1259
1260 return (retcode);
1261
1262 case RAIDFRAME_CHECK_PARITY:
1263 *(int *) data = raidPtr->parity_good;
1264 return (0);
1265
1266 case RAIDFRAME_RESET_ACCTOTALS:
1267 memset(&raidPtr->acc_totals, 0, sizeof(raidPtr->acc_totals));
1268 return (0);
1269
1270 case RAIDFRAME_GET_ACCTOTALS:
1271 totals = (RF_AccTotals_t *) data;
1272 *totals = raidPtr->acc_totals;
1273 return (0);
1274
1275 case RAIDFRAME_KEEP_ACCTOTALS:
1276 raidPtr->keep_acc_totals = *(int *)data;
1277 return (0);
1278
1279 case RAIDFRAME_GET_SIZE:
1280 *(int *) data = raidPtr->totalSectors;
1281 return (0);
1282
1283 /* fail a disk & optionally start reconstruction */
1284 case RAIDFRAME_FAIL_DISK:
1285
1286 if (raidPtr->Layout.map->faultsTolerated == 0) {
1287 /* Can't do this on a RAID 0!! */
1288 return(EINVAL);
1289 }
1290
1291 rr = (struct rf_recon_req *) data;
1292
1293 if (rr->row < 0 || rr->row >= raidPtr->numRow
1294 || rr->col < 0 || rr->col >= raidPtr->numCol)
1295 return (EINVAL);
1296
1297
1298 RF_LOCK_MUTEX(raidPtr->mutex);
1299 if ((raidPtr->Disks[rr->row][rr->col].status ==
1300 rf_ds_optimal) && (raidPtr->numFailures > 0)) {
1301 /* some other component has failed. Let's not make
1302 things worse. XXX wrong for RAID6 */
1303 RF_UNLOCK_MUTEX(raidPtr->mutex);
1304 return (EINVAL);
1305 }
1306 if (raidPtr->Disks[rr->row][rr->col].status == rf_ds_spared) {
1307 /* Can't fail a spared disk! */
1308 RF_UNLOCK_MUTEX(raidPtr->mutex);
1309 return (EINVAL);
1310 }
1311 RF_UNLOCK_MUTEX(raidPtr->mutex);
1312
1313 /* make a copy of the recon request so that we don't rely on
1314 * the user's buffer */
1315 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1316 if (rrcopy == NULL)
1317 return(ENOMEM);
1318 memcpy(rrcopy, rr, sizeof(*rr));
1319 rrcopy->raidPtr = (void *) raidPtr;
1320
1321 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1322 rf_ReconThread,
1323 rrcopy,"raid_recon");
1324 return (0);
1325
1326 /* invoke a copyback operation after recon on whatever disk
1327 * needs it, if any */
1328 case RAIDFRAME_COPYBACK:
1329
1330 if (raidPtr->Layout.map->faultsTolerated == 0) {
1331 /* This makes no sense on a RAID 0!! */
1332 return(EINVAL);
1333 }
1334
1335 if (raidPtr->copyback_in_progress == 1) {
1336 /* Copyback is already in progress! */
1337 return(EINVAL);
1338 }
1339
1340 retcode = RF_CREATE_THREAD(raidPtr->copyback_thread,
1341 rf_CopybackThread,
1342 raidPtr,"raid_copyback");
1343 return (retcode);
1344
1345 /* return the percentage completion of reconstruction */
1346 case RAIDFRAME_CHECK_RECON_STATUS:
1347 if (raidPtr->Layout.map->faultsTolerated == 0) {
1348 /* This makes no sense on a RAID 0, so tell the
1349 user it's done. */
1350 *(int *) data = 100;
1351 return(0);
1352 }
1353 row = 0; /* XXX we only consider a single row... */
1354 if (raidPtr->status[row] != rf_rs_reconstructing)
1355 *(int *) data = 100;
1356 else
1357 *(int *) data = raidPtr->reconControl[row]->percentComplete;
1358 return (0);
1359 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
1360 progressInfoPtr = (RF_ProgressInfo_t **) data;
1361 row = 0; /* XXX we only consider a single row... */
1362 if (raidPtr->status[row] != rf_rs_reconstructing) {
1363 progressInfo.remaining = 0;
1364 progressInfo.completed = 100;
1365 progressInfo.total = 100;
1366 } else {
1367 progressInfo.total =
1368 raidPtr->reconControl[row]->numRUsTotal;
1369 progressInfo.completed =
1370 raidPtr->reconControl[row]->numRUsComplete;
1371 progressInfo.remaining = progressInfo.total -
1372 progressInfo.completed;
1373 }
1374 retcode = copyout((caddr_t) &progressInfo,
1375 (caddr_t) *progressInfoPtr,
1376 sizeof(RF_ProgressInfo_t));
1377 return (retcode);
1378
1379 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1380 if (raidPtr->Layout.map->faultsTolerated == 0) {
1381 /* This makes no sense on a RAID 0, so tell the
1382 user it's done. */
1383 *(int *) data = 100;
1384 return(0);
1385 }
1386 if (raidPtr->parity_rewrite_in_progress == 1) {
1387 *(int *) data = 100 *
1388 raidPtr->parity_rewrite_stripes_done /
1389 raidPtr->Layout.numStripe;
1390 } else {
1391 *(int *) data = 100;
1392 }
1393 return (0);
1394
1395 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
1396 progressInfoPtr = (RF_ProgressInfo_t **) data;
1397 if (raidPtr->parity_rewrite_in_progress == 1) {
1398 progressInfo.total = raidPtr->Layout.numStripe;
1399 progressInfo.completed =
1400 raidPtr->parity_rewrite_stripes_done;
1401 progressInfo.remaining = progressInfo.total -
1402 progressInfo.completed;
1403 } else {
1404 progressInfo.remaining = 0;
1405 progressInfo.completed = 100;
1406 progressInfo.total = 100;
1407 }
1408 retcode = copyout((caddr_t) &progressInfo,
1409 (caddr_t) *progressInfoPtr,
1410 sizeof(RF_ProgressInfo_t));
1411 return (retcode);
1412
1413 case RAIDFRAME_CHECK_COPYBACK_STATUS:
1414 if (raidPtr->Layout.map->faultsTolerated == 0) {
1415 /* This makes no sense on a RAID 0 */
1416 *(int *) data = 100;
1417 return(0);
1418 }
1419 if (raidPtr->copyback_in_progress == 1) {
1420 *(int *) data = 100 * raidPtr->copyback_stripes_done /
1421 raidPtr->Layout.numStripe;
1422 } else {
1423 *(int *) data = 100;
1424 }
1425 return (0);
1426
1427 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
1428 progressInfoPtr = (RF_ProgressInfo_t **) data;
1429 if (raidPtr->copyback_in_progress == 1) {
1430 progressInfo.total = raidPtr->Layout.numStripe;
1431 progressInfo.completed =
1432 raidPtr->copyback_stripes_done;
1433 progressInfo.remaining = progressInfo.total -
1434 progressInfo.completed;
1435 } else {
1436 progressInfo.remaining = 0;
1437 progressInfo.completed = 100;
1438 progressInfo.total = 100;
1439 }
1440 retcode = copyout((caddr_t) &progressInfo,
1441 (caddr_t) *progressInfoPtr,
1442 sizeof(RF_ProgressInfo_t));
1443 return (retcode);
1444
1445 /* the sparetable daemon calls this to wait for the kernel to
1446 * need a spare table. this ioctl does not return until a
1447 * spare table is needed. XXX -- calling mpsleep here in the
1448 * ioctl code is almost certainly wrong and evil. -- XXX XXX
1449 * -- I should either compute the spare table in the kernel,
1450 * or have a different -- XXX XXX -- interface (a different
1451 * character device) for delivering the table -- XXX */
1452 #if 0
1453 case RAIDFRAME_SPARET_WAIT:
1454 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1455 while (!rf_sparet_wait_queue)
1456 mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE);
1457 waitreq = rf_sparet_wait_queue;
1458 rf_sparet_wait_queue = rf_sparet_wait_queue->next;
1459 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1460
1461 /* structure assignment */
1462 *((RF_SparetWait_t *) data) = *waitreq;
1463
1464 RF_Free(waitreq, sizeof(*waitreq));
1465 return (0);
1466
1467 /* wakes up a process waiting on SPARET_WAIT and puts an error
1468 * code in it that will cause the dameon to exit */
1469 case RAIDFRAME_ABORT_SPARET_WAIT:
1470 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1471 waitreq->fcol = -1;
1472 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1473 waitreq->next = rf_sparet_wait_queue;
1474 rf_sparet_wait_queue = waitreq;
1475 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1476 wakeup(&rf_sparet_wait_queue);
1477 return (0);
1478
1479 /* used by the spare table daemon to deliver a spare table
1480 * into the kernel */
1481 case RAIDFRAME_SEND_SPARET:
1482
1483 /* install the spare table */
1484 retcode = rf_SetSpareTable(raidPtr, *(void **) data);
1485
1486 /* respond to the requestor. the return status of the spare
1487 * table installation is passed in the "fcol" field */
1488 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1489 waitreq->fcol = retcode;
1490 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1491 waitreq->next = rf_sparet_resp_queue;
1492 rf_sparet_resp_queue = waitreq;
1493 wakeup(&rf_sparet_resp_queue);
1494 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1495
1496 return (retcode);
1497 #endif
1498
1499 default:
1500 break; /* fall through to the os-specific code below */
1501
1502 }
1503
1504 if (!raidPtr->valid)
1505 return (EINVAL);
1506
1507 /*
1508 * Add support for "regular" device ioctls here.
1509 */
1510
1511 switch (cmd) {
1512 case DIOCGDINFO:
1513 *(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
1514 break;
1515 #ifdef __HAVE_OLD_DISKLABEL
1516 case ODIOCGDINFO:
1517 newlabel = *(rs->sc_dkdev.dk_label);
1518 if (newlabel.d_npartitions > OLDMAXPARTITIONS)
1519 return ENOTTY;
1520 memcpy(data, &newlabel, sizeof (struct olddisklabel));
1521 break;
1522 #endif
1523
1524 case DIOCGPART:
1525 ((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
1526 ((struct partinfo *) data)->part =
1527 &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
1528 break;
1529
1530 case DIOCWDINFO:
1531 case DIOCSDINFO:
1532 #ifdef __HAVE_OLD_DISKLABEL
1533 case ODIOCWDINFO:
1534 case ODIOCSDINFO:
1535 #endif
1536 {
1537 struct disklabel *lp;
1538 #ifdef __HAVE_OLD_DISKLABEL
1539 if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) {
1540 memset(&newlabel, 0, sizeof newlabel);
1541 memcpy(&newlabel, data, sizeof (struct olddisklabel));
1542 lp = &newlabel;
1543 } else
1544 #endif
1545 lp = (struct disklabel *)data;
1546
1547 if ((error = raidlock(rs)) != 0)
1548 return (error);
1549
1550 rs->sc_flags |= RAIDF_LABELLING;
1551
1552 error = setdisklabel(rs->sc_dkdev.dk_label,
1553 lp, 0, rs->sc_dkdev.dk_cpulabel);
1554 if (error == 0) {
1555 if (cmd == DIOCWDINFO
1556 #ifdef __HAVE_OLD_DISKLABEL
1557 || cmd == ODIOCWDINFO
1558 #endif
1559 )
1560 error = writedisklabel(RAIDLABELDEV(dev),
1561 raidstrategy, rs->sc_dkdev.dk_label,
1562 rs->sc_dkdev.dk_cpulabel);
1563 }
1564 rs->sc_flags &= ~RAIDF_LABELLING;
1565
1566 raidunlock(rs);
1567
1568 if (error)
1569 return (error);
1570 break;
1571 }
1572
1573 case DIOCWLABEL:
1574 if (*(int *) data != 0)
1575 rs->sc_flags |= RAIDF_WLABEL;
1576 else
1577 rs->sc_flags &= ~RAIDF_WLABEL;
1578 break;
1579
1580 case DIOCGDEFLABEL:
1581 raidgetdefaultlabel(raidPtr, rs, (struct disklabel *) data);
1582 break;
1583
1584 #ifdef __HAVE_OLD_DISKLABEL
1585 case ODIOCGDEFLABEL:
1586 raidgetdefaultlabel(raidPtr, rs, &newlabel);
1587 if (newlabel.d_npartitions > OLDMAXPARTITIONS)
1588 return ENOTTY;
1589 memcpy(data, &newlabel, sizeof (struct olddisklabel));
1590 break;
1591 #endif
1592
1593 default:
1594 retcode = ENOTTY;
1595 }
1596 return (retcode);
1597
1598 }
1599
1600
1601 /* raidinit -- complete the rest of the initialization for the
1602 RAIDframe device. */
1603
1604
1605 static void
1606 raidinit(raidPtr)
1607 RF_Raid_t *raidPtr;
1608 {
1609 struct raid_softc *rs;
1610 int unit;
1611
1612 unit = raidPtr->raidid;
1613
1614 rs = &raid_softc[unit];
1615
1616 /* XXX should check return code first... */
1617 rs->sc_flags |= RAIDF_INITED;
1618
1619 sprintf(rs->sc_xname, "raid%d", unit); /* XXX doesn't check bounds. */
1620
1621 rs->sc_dkdev.dk_name = rs->sc_xname;
1622
1623 /* disk_attach actually creates space for the CPU disklabel, among
1624 * other things, so it's critical to call this *BEFORE* we try putzing
1625 * with disklabels. */
1626
1627 disk_attach(&rs->sc_dkdev);
1628
1629 /* XXX There may be a weird interaction here between this, and
1630 * protectedSectors, as used in RAIDframe. */
1631
1632 rs->sc_size = raidPtr->totalSectors;
1633
1634 }
1635 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
1636 /* wake up the daemon & tell it to get us a spare table
1637 * XXX
1638 * the entries in the queues should be tagged with the raidPtr
1639 * so that in the extremely rare case that two recons happen at once,
1640 * we know for which device were requesting a spare table
1641 * XXX
1642 *
1643 * XXX This code is not currently used. GO
1644 */
1645 int
1646 rf_GetSpareTableFromDaemon(req)
1647 RF_SparetWait_t *req;
1648 {
1649 int retcode;
1650
1651 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1652 req->next = rf_sparet_wait_queue;
1653 rf_sparet_wait_queue = req;
1654 wakeup(&rf_sparet_wait_queue);
1655
1656 /* mpsleep unlocks the mutex */
1657 while (!rf_sparet_resp_queue) {
1658 tsleep(&rf_sparet_resp_queue, PRIBIO,
1659 "raidframe getsparetable", 0);
1660 }
1661 req = rf_sparet_resp_queue;
1662 rf_sparet_resp_queue = req->next;
1663 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1664
1665 retcode = req->fcol;
1666 RF_Free(req, sizeof(*req)); /* this is not the same req as we
1667 * alloc'd */
1668 return (retcode);
1669 }
1670 #endif
1671
1672 /* a wrapper around rf_DoAccess that extracts appropriate info from the
1673 * bp & passes it down.
1674 * any calls originating in the kernel must use non-blocking I/O
1675 * do some extra sanity checking to return "appropriate" error values for
1676 * certain conditions (to make some standard utilities work)
1677 *
1678 * Formerly known as: rf_DoAccessKernel
1679 */
1680 void
1681 raidstart(raidPtr)
1682 RF_Raid_t *raidPtr;
1683 {
1684 RF_SectorCount_t num_blocks, pb, sum;
1685 RF_RaidAddr_t raid_addr;
1686 struct partition *pp;
1687 daddr_t blocknum;
1688 int unit;
1689 struct raid_softc *rs;
1690 int do_async;
1691 struct buf *bp;
1692
1693 unit = raidPtr->raidid;
1694 rs = &raid_softc[unit];
1695
1696 /* quick check to see if anything has died recently */
1697 RF_LOCK_MUTEX(raidPtr->mutex);
1698 if (raidPtr->numNewFailures > 0) {
1699 RF_UNLOCK_MUTEX(raidPtr->mutex);
1700 rf_update_component_labels(raidPtr,
1701 RF_NORMAL_COMPONENT_UPDATE);
1702 RF_LOCK_MUTEX(raidPtr->mutex);
1703 raidPtr->numNewFailures--;
1704 }
1705
1706 /* Check to see if we're at the limit... */
1707 while (raidPtr->openings > 0) {
1708 RF_UNLOCK_MUTEX(raidPtr->mutex);
1709
1710 /* get the next item, if any, from the queue */
1711 if ((bp = BUFQ_GET(&rs->buf_queue)) == NULL) {
1712 /* nothing more to do */
1713 return;
1714 }
1715
1716 /* Ok, for the bp we have here, bp->b_blkno is relative to the
1717 * partition.. Need to make it absolute to the underlying
1718 * device.. */
1719
1720 blocknum = bp->b_blkno;
1721 if (DISKPART(bp->b_dev) != RAW_PART) {
1722 pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
1723 blocknum += pp->p_offset;
1724 }
1725
1726 db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
1727 (int) blocknum));
1728
1729 db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
1730 db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
1731
1732 /* *THIS* is where we adjust what block we're going to...
1733 * but DO NOT TOUCH bp->b_blkno!!! */
1734 raid_addr = blocknum;
1735
1736 num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
1737 pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
1738 sum = raid_addr + num_blocks + pb;
1739 if (1 || rf_debugKernelAccess) {
1740 db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
1741 (int) raid_addr, (int) sum, (int) num_blocks,
1742 (int) pb, (int) bp->b_resid));
1743 }
1744 if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
1745 || (sum < num_blocks) || (sum < pb)) {
1746 bp->b_error = ENOSPC;
1747 bp->b_flags |= B_ERROR;
1748 bp->b_resid = bp->b_bcount;
1749 biodone(bp);
1750 RF_LOCK_MUTEX(raidPtr->mutex);
1751 continue;
1752 }
1753 /*
1754 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
1755 */
1756
1757 if (bp->b_bcount & raidPtr->sectorMask) {
1758 bp->b_error = EINVAL;
1759 bp->b_flags |= B_ERROR;
1760 bp->b_resid = bp->b_bcount;
1761 biodone(bp);
1762 RF_LOCK_MUTEX(raidPtr->mutex);
1763 continue;
1764
1765 }
1766 db1_printf(("Calling DoAccess..\n"));
1767
1768
1769 RF_LOCK_MUTEX(raidPtr->mutex);
1770 raidPtr->openings--;
1771 RF_UNLOCK_MUTEX(raidPtr->mutex);
1772
1773 /*
1774 * Everything is async.
1775 */
1776 do_async = 1;
1777
1778 disk_busy(&rs->sc_dkdev);
1779
1780 /* XXX we're still at splbio() here... do we *really*
1781 need to be? */
1782
1783 /* don't ever condition on bp->b_flags & B_WRITE.
1784 * always condition on B_READ instead */
1785
1786 bp->b_error = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
1787 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
1788 do_async, raid_addr, num_blocks,
1789 bp->b_data, bp, RF_DAG_NONBLOCKING_IO);
1790
1791 if (bp->b_error) {
1792 bp->b_flags |= B_ERROR;
1793 }
1794
1795 RF_LOCK_MUTEX(raidPtr->mutex);
1796 }
1797 RF_UNLOCK_MUTEX(raidPtr->mutex);
1798 }
1799
1800
1801
1802
1803 /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */
1804
1805 int
1806 rf_DispatchKernelIO(queue, req)
1807 RF_DiskQueue_t *queue;
1808 RF_DiskQueueData_t *req;
1809 {
1810 int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
1811 struct buf *bp;
1812 struct raidbuf *raidbp = NULL;
1813
1814 req->queue = queue;
1815
1816 #if DIAGNOSTIC
1817 if (queue->raidPtr->raidid >= numraid) {
1818 printf("Invalid unit number: %d %d\n", queue->raidPtr->raidid,
1819 numraid);
1820 panic("Invalid Unit number in rf_DispatchKernelIO");
1821 }
1822 #endif
1823
1824 bp = req->bp;
1825 #if 1
1826 /* XXX when there is a physical disk failure, someone is passing us a
1827 * buffer that contains old stuff!! Attempt to deal with this problem
1828 * without taking a performance hit... (not sure where the real bug
1829 * is. It's buried in RAIDframe somewhere) :-( GO ) */
1830
1831 if (bp->b_flags & B_ERROR) {
1832 bp->b_flags &= ~B_ERROR;
1833 }
1834 if (bp->b_error != 0) {
1835 bp->b_error = 0;
1836 }
1837 #endif
1838 raidbp = pool_get(&raidframe_cbufpool, PR_NOWAIT);
1839
1840 /*
1841 * context for raidiodone
1842 */
1843 raidbp->rf_obp = bp;
1844 raidbp->req = req;
1845
1846 LIST_INIT(&raidbp->rf_buf.b_dep);
1847
1848 switch (req->type) {
1849 case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */
1850 /* XXX need to do something extra here.. */
1851 /* I'm leaving this in, as I've never actually seen it used,
1852 * and I'd like folks to report it... GO */
1853 printf(("WAKEUP CALLED\n"));
1854 queue->numOutstanding++;
1855
1856 /* XXX need to glue the original buffer into this?? */
1857
1858 KernelWakeupFunc(&raidbp->rf_buf);
1859 break;
1860
1861 case RF_IO_TYPE_READ:
1862 case RF_IO_TYPE_WRITE:
1863
1864 if (req->tracerec) {
1865 RF_ETIMER_START(req->tracerec->timer);
1866 }
1867 InitBP(&raidbp->rf_buf, queue->rf_cinfo->ci_vp,
1868 op | bp->b_flags, queue->rf_cinfo->ci_dev,
1869 req->sectorOffset, req->numSector,
1870 req->buf, KernelWakeupFunc, (void *) req,
1871 queue->raidPtr->logBytesPerSector, req->b_proc);
1872
1873 if (rf_debugKernelAccess) {
1874 db1_printf(("dispatch: bp->b_blkno = %ld\n",
1875 (long) bp->b_blkno));
1876 }
1877 queue->numOutstanding++;
1878 queue->last_deq_sector = req->sectorOffset;
1879 /* acc wouldn't have been let in if there were any pending
1880 * reqs at any other priority */
1881 queue->curPriority = req->priority;
1882
1883 db1_printf(("Going for %c to unit %d row %d col %d\n",
1884 req->type, queue->raidPtr->raidid,
1885 queue->row, queue->col));
1886 db1_printf(("sector %d count %d (%d bytes) %d\n",
1887 (int) req->sectorOffset, (int) req->numSector,
1888 (int) (req->numSector <<
1889 queue->raidPtr->logBytesPerSector),
1890 (int) queue->raidPtr->logBytesPerSector));
1891 if ((raidbp->rf_buf.b_flags & B_READ) == 0) {
1892 raidbp->rf_buf.b_vp->v_numoutput++;
1893 }
1894 VOP_STRATEGY(&raidbp->rf_buf);
1895
1896 break;
1897
1898 default:
1899 panic("bad req->type in rf_DispatchKernelIO");
1900 }
1901 db1_printf(("Exiting from DispatchKernelIO\n"));
1902
1903 return (0);
1904 }
1905 /* this is the callback function associated with a I/O invoked from
1906 kernel code.
1907 */
1908 static void
1909 KernelWakeupFunc(vbp)
1910 struct buf *vbp;
1911 {
1912 RF_DiskQueueData_t *req = NULL;
1913 RF_DiskQueue_t *queue;
1914 struct raidbuf *raidbp = (struct raidbuf *) vbp;
1915 struct buf *bp;
1916 int s;
1917
1918 s = splbio();
1919 db1_printf(("recovering the request queue:\n"));
1920 req = raidbp->req;
1921
1922 bp = raidbp->rf_obp;
1923
1924 queue = (RF_DiskQueue_t *) req->queue;
1925
1926 if (raidbp->rf_buf.b_flags & B_ERROR) {
1927 bp->b_flags |= B_ERROR;
1928 bp->b_error = raidbp->rf_buf.b_error ?
1929 raidbp->rf_buf.b_error : EIO;
1930 }
1931
1932 /* XXX methinks this could be wrong... */
1933 #if 1
1934 bp->b_resid = raidbp->rf_buf.b_resid;
1935 #endif
1936
1937 if (req->tracerec) {
1938 RF_ETIMER_STOP(req->tracerec->timer);
1939 RF_ETIMER_EVAL(req->tracerec->timer);
1940 RF_LOCK_MUTEX(rf_tracing_mutex);
1941 req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1942 req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1943 req->tracerec->num_phys_ios++;
1944 RF_UNLOCK_MUTEX(rf_tracing_mutex);
1945 }
1946 bp->b_bcount = raidbp->rf_buf.b_bcount; /* XXXX ?? */
1947
1948 /* XXX Ok, let's get aggressive... If B_ERROR is set, let's go
1949 * ballistic, and mark the component as hosed... */
1950
1951 if (bp->b_flags & B_ERROR) {
1952 /* Mark the disk as dead */
1953 /* but only mark it once... */
1954 if (queue->raidPtr->Disks[queue->row][queue->col].status ==
1955 rf_ds_optimal) {
1956 printf("raid%d: IO Error. Marking %s as failed.\n",
1957 queue->raidPtr->raidid,
1958 queue->raidPtr->Disks[queue->row][queue->col].devname);
1959 queue->raidPtr->Disks[queue->row][queue->col].status =
1960 rf_ds_failed;
1961 queue->raidPtr->status[queue->row] = rf_rs_degraded;
1962 queue->raidPtr->numFailures++;
1963 queue->raidPtr->numNewFailures++;
1964 } else { /* Disk is already dead... */
1965 /* printf("Disk already marked as dead!\n"); */
1966 }
1967
1968 }
1969
1970 pool_put(&raidframe_cbufpool, raidbp);
1971
1972 /* Fill in the error value */
1973
1974 req->error = (bp->b_flags & B_ERROR) ? bp->b_error : 0;
1975
1976 simple_lock(&queue->raidPtr->iodone_lock);
1977
1978 /* Drop this one on the "finished" queue... */
1979 TAILQ_INSERT_TAIL(&(queue->raidPtr->iodone), req, iodone_entries);
1980
1981 /* Let the raidio thread know there is work to be done. */
1982 wakeup(&(queue->raidPtr->iodone));
1983
1984 simple_unlock(&queue->raidPtr->iodone_lock);
1985
1986 splx(s);
1987 }
1988
1989
1990
1991 /*
1992 * initialize a buf structure for doing an I/O in the kernel.
1993 */
1994 static void
1995 InitBP(bp, b_vp, rw_flag, dev, startSect, numSect, buf, cbFunc, cbArg,
1996 logBytesPerSector, b_proc)
1997 struct buf *bp;
1998 struct vnode *b_vp;
1999 unsigned rw_flag;
2000 dev_t dev;
2001 RF_SectorNum_t startSect;
2002 RF_SectorCount_t numSect;
2003 caddr_t buf;
2004 void (*cbFunc) (struct buf *);
2005 void *cbArg;
2006 int logBytesPerSector;
2007 struct proc *b_proc;
2008 {
2009 /* bp->b_flags = B_PHYS | rw_flag; */
2010 bp->b_flags = B_CALL | rw_flag; /* XXX need B_PHYS here too??? */
2011 bp->b_bcount = numSect << logBytesPerSector;
2012 bp->b_bufsize = bp->b_bcount;
2013 bp->b_error = 0;
2014 bp->b_dev = dev;
2015 bp->b_data = buf;
2016 bp->b_blkno = startSect;
2017 bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */
2018 if (bp->b_bcount == 0) {
2019 panic("bp->b_bcount is zero in InitBP!!");
2020 }
2021 bp->b_proc = b_proc;
2022 bp->b_iodone = cbFunc;
2023 bp->b_vp = b_vp;
2024
2025 }
2026
2027 static void
2028 raidgetdefaultlabel(raidPtr, rs, lp)
2029 RF_Raid_t *raidPtr;
2030 struct raid_softc *rs;
2031 struct disklabel *lp;
2032 {
2033 memset(lp, 0, sizeof(*lp));
2034
2035 /* fabricate a label... */
2036 lp->d_secperunit = raidPtr->totalSectors;
2037 lp->d_secsize = raidPtr->bytesPerSector;
2038 lp->d_nsectors = raidPtr->Layout.dataSectorsPerStripe;
2039 lp->d_ntracks = 4 * raidPtr->numCol;
2040 lp->d_ncylinders = raidPtr->totalSectors /
2041 (lp->d_nsectors * lp->d_ntracks);
2042 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
2043
2044 strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
2045 lp->d_type = DTYPE_RAID;
2046 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
2047 lp->d_rpm = 3600;
2048 lp->d_interleave = 1;
2049 lp->d_flags = 0;
2050
2051 lp->d_partitions[RAW_PART].p_offset = 0;
2052 lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors;
2053 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
2054 lp->d_npartitions = RAW_PART + 1;
2055
2056 lp->d_magic = DISKMAGIC;
2057 lp->d_magic2 = DISKMAGIC;
2058 lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
2059
2060 }
2061 /*
2062 * Read the disklabel from the raid device. If one is not present, fake one
2063 * up.
2064 */
2065 static void
2066 raidgetdisklabel(dev)
2067 dev_t dev;
2068 {
2069 int unit = raidunit(dev);
2070 struct raid_softc *rs = &raid_softc[unit];
2071 char *errstring;
2072 struct disklabel *lp = rs->sc_dkdev.dk_label;
2073 struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel;
2074 RF_Raid_t *raidPtr;
2075
2076 db1_printf(("Getting the disklabel...\n"));
2077
2078 memset(clp, 0, sizeof(*clp));
2079
2080 raidPtr = raidPtrs[unit];
2081
2082 raidgetdefaultlabel(raidPtr, rs, lp);
2083
2084 /*
2085 * Call the generic disklabel extraction routine.
2086 */
2087 errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
2088 rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
2089 if (errstring)
2090 raidmakedisklabel(rs);
2091 else {
2092 int i;
2093 struct partition *pp;
2094
2095 /*
2096 * Sanity check whether the found disklabel is valid.
2097 *
2098 * This is necessary since total size of the raid device
2099 * may vary when an interleave is changed even though exactly
2100 * same componets are used, and old disklabel may used
2101 * if that is found.
2102 */
2103 if (lp->d_secperunit != rs->sc_size)
2104 printf("raid%d: WARNING: %s: "
2105 "total sector size in disklabel (%d) != "
2106 "the size of raid (%ld)\n", unit, rs->sc_xname,
2107 lp->d_secperunit, (long) rs->sc_size);
2108 for (i = 0; i < lp->d_npartitions; i++) {
2109 pp = &lp->d_partitions[i];
2110 if (pp->p_offset + pp->p_size > rs->sc_size)
2111 printf("raid%d: WARNING: %s: end of partition `%c' "
2112 "exceeds the size of raid (%ld)\n",
2113 unit, rs->sc_xname, 'a' + i, (long) rs->sc_size);
2114 }
2115 }
2116
2117 }
2118 /*
2119 * Take care of things one might want to take care of in the event
2120 * that a disklabel isn't present.
2121 */
2122 static void
2123 raidmakedisklabel(rs)
2124 struct raid_softc *rs;
2125 {
2126 struct disklabel *lp = rs->sc_dkdev.dk_label;
2127 db1_printf(("Making a label..\n"));
2128
2129 /*
2130 * For historical reasons, if there's no disklabel present
2131 * the raw partition must be marked FS_BSDFFS.
2132 */
2133
2134 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
2135
2136 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
2137
2138 lp->d_checksum = dkcksum(lp);
2139 }
2140 /*
2141 * Lookup the provided name in the filesystem. If the file exists,
2142 * is a valid block device, and isn't being used by anyone else,
2143 * set *vpp to the file's vnode.
2144 * You'll find the original of this in ccd.c
2145 */
2146 int
2147 raidlookup(path, p, vpp)
2148 char *path;
2149 struct proc *p;
2150 struct vnode **vpp; /* result */
2151 {
2152 struct nameidata nd;
2153 struct vnode *vp;
2154 struct vattr va;
2155 int error;
2156
2157 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, path, p);
2158 if ((error = vn_open(&nd, FREAD | FWRITE, 0)) != 0) {
2159 return (error);
2160 }
2161 vp = nd.ni_vp;
2162 if (vp->v_usecount > 1) {
2163 VOP_UNLOCK(vp, 0);
2164 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2165 return (EBUSY);
2166 }
2167 if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)) != 0) {
2168 VOP_UNLOCK(vp, 0);
2169 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2170 return (error);
2171 }
2172 /* XXX: eventually we should handle VREG, too. */
2173 if (va.va_type != VBLK) {
2174 VOP_UNLOCK(vp, 0);
2175 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2176 return (ENOTBLK);
2177 }
2178 VOP_UNLOCK(vp, 0);
2179 *vpp = vp;
2180 return (0);
2181 }
2182 /*
2183 * Wait interruptibly for an exclusive lock.
2184 *
2185 * XXX
2186 * Several drivers do this; it should be abstracted and made MP-safe.
2187 * (Hmm... where have we seen this warning before :-> GO )
2188 */
2189 static int
2190 raidlock(rs)
2191 struct raid_softc *rs;
2192 {
2193 int error;
2194
2195 while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
2196 rs->sc_flags |= RAIDF_WANTED;
2197 if ((error =
2198 tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
2199 return (error);
2200 }
2201 rs->sc_flags |= RAIDF_LOCKED;
2202 return (0);
2203 }
2204 /*
2205 * Unlock and wake up any waiters.
2206 */
2207 static void
2208 raidunlock(rs)
2209 struct raid_softc *rs;
2210 {
2211
2212 rs->sc_flags &= ~RAIDF_LOCKED;
2213 if ((rs->sc_flags & RAIDF_WANTED) != 0) {
2214 rs->sc_flags &= ~RAIDF_WANTED;
2215 wakeup(rs);
2216 }
2217 }
2218
2219
2220 #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */
2221 #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */
2222
2223 int
2224 raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter)
2225 {
2226 RF_ComponentLabel_t clabel;
2227 raidread_component_label(dev, b_vp, &clabel);
2228 clabel.mod_counter = mod_counter;
2229 clabel.clean = RF_RAID_CLEAN;
2230 raidwrite_component_label(dev, b_vp, &clabel);
2231 return(0);
2232 }
2233
2234
2235 int
2236 raidmarkdirty(dev_t dev, struct vnode *b_vp, int mod_counter)
2237 {
2238 RF_ComponentLabel_t clabel;
2239 raidread_component_label(dev, b_vp, &clabel);
2240 clabel.mod_counter = mod_counter;
2241 clabel.clean = RF_RAID_DIRTY;
2242 raidwrite_component_label(dev, b_vp, &clabel);
2243 return(0);
2244 }
2245
2246 /* ARGSUSED */
2247 int
2248 raidread_component_label(dev, b_vp, clabel)
2249 dev_t dev;
2250 struct vnode *b_vp;
2251 RF_ComponentLabel_t *clabel;
2252 {
2253 struct buf *bp;
2254 const struct bdevsw *bdev;
2255 int error;
2256
2257 /* XXX should probably ensure that we don't try to do this if
2258 someone has changed rf_protected_sectors. */
2259
2260 if (b_vp == NULL) {
2261 /* For whatever reason, this component is not valid.
2262 Don't try to read a component label from it. */
2263 return(EINVAL);
2264 }
2265
2266 /* get a block of the appropriate size... */
2267 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2268 bp->b_dev = dev;
2269
2270 /* get our ducks in a row for the read */
2271 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2272 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2273 bp->b_flags |= B_READ;
2274 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2275
2276 bdev = bdevsw_lookup(bp->b_dev);
2277 if (bdev == NULL)
2278 return (ENXIO);
2279 (*bdev->d_strategy)(bp);
2280
2281 error = biowait(bp);
2282
2283 if (!error) {
2284 memcpy(clabel, bp->b_data,
2285 sizeof(RF_ComponentLabel_t));
2286 }
2287
2288 brelse(bp);
2289 return(error);
2290 }
2291 /* ARGSUSED */
2292 int
2293 raidwrite_component_label(dev, b_vp, clabel)
2294 dev_t dev;
2295 struct vnode *b_vp;
2296 RF_ComponentLabel_t *clabel;
2297 {
2298 struct buf *bp;
2299 const struct bdevsw *bdev;
2300 int error;
2301
2302 /* get a block of the appropriate size... */
2303 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2304 bp->b_dev = dev;
2305
2306 /* get our ducks in a row for the write */
2307 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2308 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2309 bp->b_flags |= B_WRITE;
2310 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2311
2312 memset(bp->b_data, 0, RF_COMPONENT_INFO_SIZE );
2313
2314 memcpy(bp->b_data, clabel, sizeof(RF_ComponentLabel_t));
2315
2316 bdev = bdevsw_lookup(bp->b_dev);
2317 if (bdev == NULL)
2318 return (ENXIO);
2319 (*bdev->d_strategy)(bp);
2320 error = biowait(bp);
2321 brelse(bp);
2322 if (error) {
2323 #if 1
2324 printf("Failed to write RAID component info!\n");
2325 #endif
2326 }
2327
2328 return(error);
2329 }
2330
2331 void
2332 rf_markalldirty(raidPtr)
2333 RF_Raid_t *raidPtr;
2334 {
2335 RF_ComponentLabel_t clabel;
2336 int sparecol;
2337 int r,c;
2338 int i,j;
2339 int srow, scol;
2340
2341 raidPtr->mod_counter++;
2342 for (r = 0; r < raidPtr->numRow; r++) {
2343 for (c = 0; c < raidPtr->numCol; c++) {
2344 /* we don't want to touch (at all) a disk that has
2345 failed */
2346 if (!RF_DEAD_DISK(raidPtr->Disks[r][c].status)) {
2347 raidread_component_label(
2348 raidPtr->Disks[r][c].dev,
2349 raidPtr->raid_cinfo[r][c].ci_vp,
2350 &clabel);
2351 if (clabel.status == rf_ds_spared) {
2352 /* XXX do something special...
2353 but whatever you do, don't
2354 try to access it!! */
2355 } else {
2356 raidmarkdirty(
2357 raidPtr->Disks[r][c].dev,
2358 raidPtr->raid_cinfo[r][c].ci_vp,
2359 raidPtr->mod_counter);
2360 }
2361 }
2362 }
2363 }
2364
2365 for( c = 0; c < raidPtr->numSpare ; c++) {
2366 sparecol = raidPtr->numCol + c;
2367 if (raidPtr->Disks[0][sparecol].status == rf_ds_used_spare) {
2368 /*
2369
2370 we claim this disk is "optimal" if it's
2371 rf_ds_used_spare, as that means it should be
2372 directly substitutable for the disk it replaced.
2373 We note that too...
2374
2375 */
2376
2377 for(i=0;i<raidPtr->numRow;i++) {
2378 for(j=0;j<raidPtr->numCol;j++) {
2379 if ((raidPtr->Disks[i][j].spareRow ==
2380 0) &&
2381 (raidPtr->Disks[i][j].spareCol ==
2382 sparecol)) {
2383 srow = i;
2384 scol = j;
2385 break;
2386 }
2387 }
2388 }
2389
2390 raidread_component_label(
2391 raidPtr->Disks[0][sparecol].dev,
2392 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2393 &clabel);
2394 /* make sure status is noted */
2395
2396 raid_init_component_label(raidPtr, &clabel);
2397
2398 clabel.row = srow;
2399 clabel.column = scol;
2400 /* Note: we *don't* change status from rf_ds_used_spare
2401 to rf_ds_optimal */
2402 /* clabel.status = rf_ds_optimal; */
2403
2404 raidmarkdirty(raidPtr->Disks[0][sparecol].dev,
2405 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2406 raidPtr->mod_counter);
2407 }
2408 }
2409 }
2410
2411
2412 void
2413 rf_update_component_labels(raidPtr, final)
2414 RF_Raid_t *raidPtr;
2415 int final;
2416 {
2417 RF_ComponentLabel_t clabel;
2418 int sparecol;
2419 int r,c;
2420 int i,j;
2421 int srow, scol;
2422
2423 srow = -1;
2424 scol = -1;
2425
2426 /* XXX should do extra checks to make sure things really are clean,
2427 rather than blindly setting the clean bit... */
2428
2429 raidPtr->mod_counter++;
2430
2431 for (r = 0; r < raidPtr->numRow; r++) {
2432 for (c = 0; c < raidPtr->numCol; c++) {
2433 if (raidPtr->Disks[r][c].status == rf_ds_optimal) {
2434 raidread_component_label(
2435 raidPtr->Disks[r][c].dev,
2436 raidPtr->raid_cinfo[r][c].ci_vp,
2437 &clabel);
2438 /* make sure status is noted */
2439 clabel.status = rf_ds_optimal;
2440 /* bump the counter */
2441 clabel.mod_counter = raidPtr->mod_counter;
2442
2443 raidwrite_component_label(
2444 raidPtr->Disks[r][c].dev,
2445 raidPtr->raid_cinfo[r][c].ci_vp,
2446 &clabel);
2447 if (final == RF_FINAL_COMPONENT_UPDATE) {
2448 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2449 raidmarkclean(
2450 raidPtr->Disks[r][c].dev,
2451 raidPtr->raid_cinfo[r][c].ci_vp,
2452 raidPtr->mod_counter);
2453 }
2454 }
2455 }
2456 /* else we don't touch it.. */
2457 }
2458 }
2459
2460 for( c = 0; c < raidPtr->numSpare ; c++) {
2461 sparecol = raidPtr->numCol + c;
2462 /* Need to ensure that the reconstruct actually completed! */
2463 if (raidPtr->Disks[0][sparecol].status == rf_ds_used_spare) {
2464 /*
2465
2466 we claim this disk is "optimal" if it's
2467 rf_ds_used_spare, as that means it should be
2468 directly substitutable for the disk it replaced.
2469 We note that too...
2470
2471 */
2472
2473 for(i=0;i<raidPtr->numRow;i++) {
2474 for(j=0;j<raidPtr->numCol;j++) {
2475 if ((raidPtr->Disks[i][j].spareRow ==
2476 0) &&
2477 (raidPtr->Disks[i][j].spareCol ==
2478 sparecol)) {
2479 srow = i;
2480 scol = j;
2481 break;
2482 }
2483 }
2484 }
2485
2486 /* XXX shouldn't *really* need this... */
2487 raidread_component_label(
2488 raidPtr->Disks[0][sparecol].dev,
2489 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2490 &clabel);
2491 /* make sure status is noted */
2492
2493 raid_init_component_label(raidPtr, &clabel);
2494
2495 clabel.mod_counter = raidPtr->mod_counter;
2496 clabel.row = srow;
2497 clabel.column = scol;
2498 clabel.status = rf_ds_optimal;
2499
2500 raidwrite_component_label(
2501 raidPtr->Disks[0][sparecol].dev,
2502 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2503 &clabel);
2504 if (final == RF_FINAL_COMPONENT_UPDATE) {
2505 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2506 raidmarkclean( raidPtr->Disks[0][sparecol].dev,
2507 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2508 raidPtr->mod_counter);
2509 }
2510 }
2511 }
2512 }
2513 }
2514
2515 void
2516 rf_close_component(raidPtr, vp, auto_configured)
2517 RF_Raid_t *raidPtr;
2518 struct vnode *vp;
2519 int auto_configured;
2520 {
2521 struct proc *p;
2522
2523 p = raidPtr->engine_thread;
2524
2525 if (vp != NULL) {
2526 if (auto_configured == 1) {
2527 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2528 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
2529 vput(vp);
2530
2531 } else {
2532 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2533 }
2534 }
2535 }
2536
2537
2538 void
2539 rf_UnconfigureVnodes(raidPtr)
2540 RF_Raid_t *raidPtr;
2541 {
2542 int r,c;
2543 struct vnode *vp;
2544 int acd;
2545
2546
2547 /* We take this opportunity to close the vnodes like we should.. */
2548
2549 for (r = 0; r < raidPtr->numRow; r++) {
2550 for (c = 0; c < raidPtr->numCol; c++) {
2551 vp = raidPtr->raid_cinfo[r][c].ci_vp;
2552 acd = raidPtr->Disks[r][c].auto_configured;
2553 rf_close_component(raidPtr, vp, acd);
2554 raidPtr->raid_cinfo[r][c].ci_vp = NULL;
2555 raidPtr->Disks[r][c].auto_configured = 0;
2556 }
2557 }
2558 for (r = 0; r < raidPtr->numSpare; r++) {
2559 vp = raidPtr->raid_cinfo[0][raidPtr->numCol + r].ci_vp;
2560 acd = raidPtr->Disks[0][raidPtr->numCol + r].auto_configured;
2561 rf_close_component(raidPtr, vp, acd);
2562 raidPtr->raid_cinfo[0][raidPtr->numCol + r].ci_vp = NULL;
2563 raidPtr->Disks[0][raidPtr->numCol + r].auto_configured = 0;
2564 }
2565 }
2566
2567
2568 void
2569 rf_ReconThread(req)
2570 struct rf_recon_req *req;
2571 {
2572 int s;
2573 RF_Raid_t *raidPtr;
2574
2575 s = splbio();
2576 raidPtr = (RF_Raid_t *) req->raidPtr;
2577 raidPtr->recon_in_progress = 1;
2578
2579 rf_FailDisk((RF_Raid_t *) req->raidPtr, req->row, req->col,
2580 ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
2581
2582 /* XXX get rid of this! we don't need it at all.. */
2583 RF_Free(req, sizeof(*req));
2584
2585 raidPtr->recon_in_progress = 0;
2586 splx(s);
2587
2588 /* That's all... */
2589 kthread_exit(0); /* does not return */
2590 }
2591
2592 void
2593 rf_RewriteParityThread(raidPtr)
2594 RF_Raid_t *raidPtr;
2595 {
2596 int retcode;
2597 int s;
2598
2599 raidPtr->parity_rewrite_in_progress = 1;
2600 s = splbio();
2601 retcode = rf_RewriteParity(raidPtr);
2602 splx(s);
2603 if (retcode) {
2604 printf("raid%d: Error re-writing parity!\n",raidPtr->raidid);
2605 } else {
2606 /* set the clean bit! If we shutdown correctly,
2607 the clean bit on each component label will get
2608 set */
2609 raidPtr->parity_good = RF_RAID_CLEAN;
2610 }
2611 raidPtr->parity_rewrite_in_progress = 0;
2612
2613 /* Anyone waiting for us to stop? If so, inform them... */
2614 if (raidPtr->waitShutdown) {
2615 wakeup(&raidPtr->parity_rewrite_in_progress);
2616 }
2617
2618 /* That's all... */
2619 kthread_exit(0); /* does not return */
2620 }
2621
2622
2623 void
2624 rf_CopybackThread(raidPtr)
2625 RF_Raid_t *raidPtr;
2626 {
2627 int s;
2628
2629 raidPtr->copyback_in_progress = 1;
2630 s = splbio();
2631 rf_CopybackReconstructedData(raidPtr);
2632 splx(s);
2633 raidPtr->copyback_in_progress = 0;
2634
2635 /* That's all... */
2636 kthread_exit(0); /* does not return */
2637 }
2638
2639
2640 void
2641 rf_ReconstructInPlaceThread(req)
2642 struct rf_recon_req *req;
2643 {
2644 int retcode;
2645 int s;
2646 RF_Raid_t *raidPtr;
2647
2648 s = splbio();
2649 raidPtr = req->raidPtr;
2650 raidPtr->recon_in_progress = 1;
2651 retcode = rf_ReconstructInPlace(raidPtr, req->row, req->col);
2652 RF_Free(req, sizeof(*req));
2653 raidPtr->recon_in_progress = 0;
2654 splx(s);
2655
2656 /* That's all... */
2657 kthread_exit(0); /* does not return */
2658 }
2659
2660 RF_AutoConfig_t *
2661 rf_find_raid_components()
2662 {
2663 struct vnode *vp;
2664 struct disklabel label;
2665 struct device *dv;
2666 dev_t dev;
2667 int bmajor;
2668 int error;
2669 int i;
2670 int good_one;
2671 RF_ComponentLabel_t *clabel;
2672 RF_AutoConfig_t *ac_list;
2673 RF_AutoConfig_t *ac;
2674
2675
2676 /* initialize the AutoConfig list */
2677 ac_list = NULL;
2678
2679 /* we begin by trolling through *all* the devices on the system */
2680
2681 for (dv = alldevs.tqh_first; dv != NULL;
2682 dv = dv->dv_list.tqe_next) {
2683
2684 /* we are only interested in disks... */
2685 if (dv->dv_class != DV_DISK)
2686 continue;
2687
2688 /* we don't care about floppies... */
2689 if (!strcmp(dv->dv_cfdata->cf_name,"fd")) {
2690 continue;
2691 }
2692
2693 /* we don't care about CD's... */
2694 if (!strcmp(dv->dv_cfdata->cf_name,"cd")) {
2695 continue;
2696 }
2697
2698 /* hdfd is the Atari/Hades floppy driver */
2699 if (!strcmp(dv->dv_cfdata->cf_name,"hdfd")) {
2700 continue;
2701 }
2702 /* fdisa is the Atari/Milan floppy driver */
2703 if (!strcmp(dv->dv_cfdata->cf_name,"fdisa")) {
2704 continue;
2705 }
2706
2707 /* need to find the device_name_to_block_device_major stuff */
2708 bmajor = devsw_name2blk(dv->dv_xname, NULL, 0);
2709
2710 /* get a vnode for the raw partition of this disk */
2711
2712 dev = MAKEDISKDEV(bmajor, dv->dv_unit, RAW_PART);
2713 if (bdevvp(dev, &vp))
2714 panic("RAID can't alloc vnode");
2715
2716 error = VOP_OPEN(vp, FREAD, NOCRED, 0);
2717
2718 if (error) {
2719 /* "Who cares." Continue looking
2720 for something that exists*/
2721 vput(vp);
2722 continue;
2723 }
2724
2725 /* Ok, the disk exists. Go get the disklabel. */
2726 error = VOP_IOCTL(vp, DIOCGDINFO, (caddr_t)&label,
2727 FREAD, NOCRED, 0);
2728 if (error) {
2729 /*
2730 * XXX can't happen - open() would
2731 * have errored out (or faked up one)
2732 */
2733 printf("can't get label for dev %s%c (%d)!?!?\n",
2734 dv->dv_xname, 'a' + RAW_PART, error);
2735 }
2736
2737 /* don't need this any more. We'll allocate it again
2738 a little later if we really do... */
2739 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2740 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
2741 vput(vp);
2742
2743 for (i=0; i < label.d_npartitions; i++) {
2744 /* We only support partitions marked as RAID */
2745 if (label.d_partitions[i].p_fstype != FS_RAID)
2746 continue;
2747
2748 dev = MAKEDISKDEV(bmajor, dv->dv_unit, i);
2749 if (bdevvp(dev, &vp))
2750 panic("RAID can't alloc vnode");
2751
2752 error = VOP_OPEN(vp, FREAD, NOCRED, 0);
2753 if (error) {
2754 /* Whatever... */
2755 vput(vp);
2756 continue;
2757 }
2758
2759 good_one = 0;
2760
2761 clabel = (RF_ComponentLabel_t *)
2762 malloc(sizeof(RF_ComponentLabel_t),
2763 M_RAIDFRAME, M_NOWAIT);
2764 if (clabel == NULL) {
2765 /* XXX CLEANUP HERE */
2766 printf("RAID auto config: out of memory!\n");
2767 return(NULL); /* XXX probably should panic? */
2768 }
2769
2770 if (!raidread_component_label(dev, vp, clabel)) {
2771 /* Got the label. Does it look reasonable? */
2772 if (rf_reasonable_label(clabel) &&
2773 (clabel->partitionSize <=
2774 label.d_partitions[i].p_size)) {
2775 #if DEBUG
2776 printf("Component on: %s%c: %d\n",
2777 dv->dv_xname, 'a'+i,
2778 label.d_partitions[i].p_size);
2779 rf_print_component_label(clabel);
2780 #endif
2781 /* if it's reasonable, add it,
2782 else ignore it. */
2783 ac = (RF_AutoConfig_t *)
2784 malloc(sizeof(RF_AutoConfig_t),
2785 M_RAIDFRAME,
2786 M_NOWAIT);
2787 if (ac == NULL) {
2788 /* XXX should panic?? */
2789 return(NULL);
2790 }
2791
2792 sprintf(ac->devname, "%s%c",
2793 dv->dv_xname, 'a'+i);
2794 ac->dev = dev;
2795 ac->vp = vp;
2796 ac->clabel = clabel;
2797 ac->next = ac_list;
2798 ac_list = ac;
2799 good_one = 1;
2800 }
2801 }
2802 if (!good_one) {
2803 /* cleanup */
2804 free(clabel, M_RAIDFRAME);
2805 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2806 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
2807 vput(vp);
2808 }
2809 }
2810 }
2811 return(ac_list);
2812 }
2813
2814 static int
2815 rf_reasonable_label(clabel)
2816 RF_ComponentLabel_t *clabel;
2817 {
2818
2819 if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) ||
2820 (clabel->version==RF_COMPONENT_LABEL_VERSION)) &&
2821 ((clabel->clean == RF_RAID_CLEAN) ||
2822 (clabel->clean == RF_RAID_DIRTY)) &&
2823 clabel->row >=0 &&
2824 clabel->column >= 0 &&
2825 clabel->num_rows > 0 &&
2826 clabel->num_columns > 0 &&
2827 clabel->row < clabel->num_rows &&
2828 clabel->column < clabel->num_columns &&
2829 clabel->blockSize > 0 &&
2830 clabel->numBlocks > 0) {
2831 /* label looks reasonable enough... */
2832 return(1);
2833 }
2834 return(0);
2835 }
2836
2837
2838 #if DEBUG
2839 void
2840 rf_print_component_label(clabel)
2841 RF_ComponentLabel_t *clabel;
2842 {
2843 printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
2844 clabel->row, clabel->column,
2845 clabel->num_rows, clabel->num_columns);
2846 printf(" Version: %d Serial Number: %d Mod Counter: %d\n",
2847 clabel->version, clabel->serial_number,
2848 clabel->mod_counter);
2849 printf(" Clean: %s Status: %d\n",
2850 clabel->clean ? "Yes" : "No", clabel->status );
2851 printf(" sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
2852 clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
2853 printf(" RAID Level: %c blocksize: %d numBlocks: %d\n",
2854 (char) clabel->parityConfig, clabel->blockSize,
2855 clabel->numBlocks);
2856 printf(" Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No" );
2857 printf(" Contains root partition: %s\n",
2858 clabel->root_partition ? "Yes" : "No" );
2859 printf(" Last configured as: raid%d\n", clabel->last_unit );
2860 #if 0
2861 printf(" Config order: %d\n", clabel->config_order);
2862 #endif
2863
2864 }
2865 #endif
2866
2867 RF_ConfigSet_t *
2868 rf_create_auto_sets(ac_list)
2869 RF_AutoConfig_t *ac_list;
2870 {
2871 RF_AutoConfig_t *ac;
2872 RF_ConfigSet_t *config_sets;
2873 RF_ConfigSet_t *cset;
2874 RF_AutoConfig_t *ac_next;
2875
2876
2877 config_sets = NULL;
2878
2879 /* Go through the AutoConfig list, and figure out which components
2880 belong to what sets. */
2881 ac = ac_list;
2882 while(ac!=NULL) {
2883 /* we're going to putz with ac->next, so save it here
2884 for use at the end of the loop */
2885 ac_next = ac->next;
2886
2887 if (config_sets == NULL) {
2888 /* will need at least this one... */
2889 config_sets = (RF_ConfigSet_t *)
2890 malloc(sizeof(RF_ConfigSet_t),
2891 M_RAIDFRAME, M_NOWAIT);
2892 if (config_sets == NULL) {
2893 panic("rf_create_auto_sets: No memory!");
2894 }
2895 /* this one is easy :) */
2896 config_sets->ac = ac;
2897 config_sets->next = NULL;
2898 config_sets->rootable = 0;
2899 ac->next = NULL;
2900 } else {
2901 /* which set does this component fit into? */
2902 cset = config_sets;
2903 while(cset!=NULL) {
2904 if (rf_does_it_fit(cset, ac)) {
2905 /* looks like it matches... */
2906 ac->next = cset->ac;
2907 cset->ac = ac;
2908 break;
2909 }
2910 cset = cset->next;
2911 }
2912 if (cset==NULL) {
2913 /* didn't find a match above... new set..*/
2914 cset = (RF_ConfigSet_t *)
2915 malloc(sizeof(RF_ConfigSet_t),
2916 M_RAIDFRAME, M_NOWAIT);
2917 if (cset == NULL) {
2918 panic("rf_create_auto_sets: No memory!");
2919 }
2920 cset->ac = ac;
2921 ac->next = NULL;
2922 cset->next = config_sets;
2923 cset->rootable = 0;
2924 config_sets = cset;
2925 }
2926 }
2927 ac = ac_next;
2928 }
2929
2930
2931 return(config_sets);
2932 }
2933
2934 static int
2935 rf_does_it_fit(cset, ac)
2936 RF_ConfigSet_t *cset;
2937 RF_AutoConfig_t *ac;
2938 {
2939 RF_ComponentLabel_t *clabel1, *clabel2;
2940
2941 /* If this one matches the *first* one in the set, that's good
2942 enough, since the other members of the set would have been
2943 through here too... */
2944 /* note that we are not checking partitionSize here..
2945
2946 Note that we are also not checking the mod_counters here.
2947 If everything else matches execpt the mod_counter, that's
2948 good enough for this test. We will deal with the mod_counters
2949 a little later in the autoconfiguration process.
2950
2951 (clabel1->mod_counter == clabel2->mod_counter) &&
2952
2953 The reason we don't check for this is that failed disks
2954 will have lower modification counts. If those disks are
2955 not added to the set they used to belong to, then they will
2956 form their own set, which may result in 2 different sets,
2957 for example, competing to be configured at raid0, and
2958 perhaps competing to be the root filesystem set. If the
2959 wrong ones get configured, or both attempt to become /,
2960 weird behaviour and or serious lossage will occur. Thus we
2961 need to bring them into the fold here, and kick them out at
2962 a later point.
2963
2964 */
2965
2966 clabel1 = cset->ac->clabel;
2967 clabel2 = ac->clabel;
2968 if ((clabel1->version == clabel2->version) &&
2969 (clabel1->serial_number == clabel2->serial_number) &&
2970 (clabel1->num_rows == clabel2->num_rows) &&
2971 (clabel1->num_columns == clabel2->num_columns) &&
2972 (clabel1->sectPerSU == clabel2->sectPerSU) &&
2973 (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
2974 (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
2975 (clabel1->parityConfig == clabel2->parityConfig) &&
2976 (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
2977 (clabel1->blockSize == clabel2->blockSize) &&
2978 (clabel1->numBlocks == clabel2->numBlocks) &&
2979 (clabel1->autoconfigure == clabel2->autoconfigure) &&
2980 (clabel1->root_partition == clabel2->root_partition) &&
2981 (clabel1->last_unit == clabel2->last_unit) &&
2982 (clabel1->config_order == clabel2->config_order)) {
2983 /* if it get's here, it almost *has* to be a match */
2984 } else {
2985 /* it's not consistent with somebody in the set..
2986 punt */
2987 return(0);
2988 }
2989 /* all was fine.. it must fit... */
2990 return(1);
2991 }
2992
2993 int
2994 rf_have_enough_components(cset)
2995 RF_ConfigSet_t *cset;
2996 {
2997 RF_AutoConfig_t *ac;
2998 RF_AutoConfig_t *auto_config;
2999 RF_ComponentLabel_t *clabel;
3000 int r,c;
3001 int num_rows;
3002 int num_cols;
3003 int num_missing;
3004 int mod_counter;
3005 int mod_counter_found;
3006 int even_pair_failed;
3007 char parity_type;
3008
3009
3010 /* check to see that we have enough 'live' components
3011 of this set. If so, we can configure it if necessary */
3012
3013 num_rows = cset->ac->clabel->num_rows;
3014 num_cols = cset->ac->clabel->num_columns;
3015 parity_type = cset->ac->clabel->parityConfig;
3016
3017 /* XXX Check for duplicate components!?!?!? */
3018
3019 /* Determine what the mod_counter is supposed to be for this set. */
3020
3021 mod_counter_found = 0;
3022 mod_counter = 0;
3023 ac = cset->ac;
3024 while(ac!=NULL) {
3025 if (mod_counter_found==0) {
3026 mod_counter = ac->clabel->mod_counter;
3027 mod_counter_found = 1;
3028 } else {
3029 if (ac->clabel->mod_counter > mod_counter) {
3030 mod_counter = ac->clabel->mod_counter;
3031 }
3032 }
3033 ac = ac->next;
3034 }
3035
3036 num_missing = 0;
3037 auto_config = cset->ac;
3038
3039 for(r=0; r<num_rows; r++) {
3040 even_pair_failed = 0;
3041 for(c=0; c<num_cols; c++) {
3042 ac = auto_config;
3043 while(ac!=NULL) {
3044 if ((ac->clabel->row == r) &&
3045 (ac->clabel->column == c) &&
3046 (ac->clabel->mod_counter == mod_counter)) {
3047 /* it's this one... */
3048 #if DEBUG
3049 printf("Found: %s at %d,%d\n",
3050 ac->devname,r,c);
3051 #endif
3052 break;
3053 }
3054 ac=ac->next;
3055 }
3056 if (ac==NULL) {
3057 /* Didn't find one here! */
3058 /* special case for RAID 1, especially
3059 where there are more than 2
3060 components (where RAIDframe treats
3061 things a little differently :( ) */
3062 if (parity_type == '1') {
3063 if (c%2 == 0) { /* even component */
3064 even_pair_failed = 1;
3065 } else { /* odd component. If
3066 we're failed, and
3067 so is the even
3068 component, it's
3069 "Good Night, Charlie" */
3070 if (even_pair_failed == 1) {
3071 return(0);
3072 }
3073 }
3074 } else {
3075 /* normal accounting */
3076 num_missing++;
3077 }
3078 }
3079 if ((parity_type == '1') && (c%2 == 1)) {
3080 /* Just did an even component, and we didn't
3081 bail.. reset the even_pair_failed flag,
3082 and go on to the next component.... */
3083 even_pair_failed = 0;
3084 }
3085 }
3086 }
3087
3088 clabel = cset->ac->clabel;
3089
3090 if (((clabel->parityConfig == '0') && (num_missing > 0)) ||
3091 ((clabel->parityConfig == '4') && (num_missing > 1)) ||
3092 ((clabel->parityConfig == '5') && (num_missing > 1))) {
3093 /* XXX this needs to be made *much* more general */
3094 /* Too many failures */
3095 return(0);
3096 }
3097 /* otherwise, all is well, and we've got enough to take a kick
3098 at autoconfiguring this set */
3099 return(1);
3100 }
3101
3102 void
3103 rf_create_configuration(ac,config,raidPtr)
3104 RF_AutoConfig_t *ac;
3105 RF_Config_t *config;
3106 RF_Raid_t *raidPtr;
3107 {
3108 RF_ComponentLabel_t *clabel;
3109 int i;
3110
3111 clabel = ac->clabel;
3112
3113 /* 1. Fill in the common stuff */
3114 config->numRow = clabel->num_rows;
3115 config->numCol = clabel->num_columns;
3116 config->numSpare = 0; /* XXX should this be set here? */
3117 config->sectPerSU = clabel->sectPerSU;
3118 config->SUsPerPU = clabel->SUsPerPU;
3119 config->SUsPerRU = clabel->SUsPerRU;
3120 config->parityConfig = clabel->parityConfig;
3121 /* XXX... */
3122 strcpy(config->diskQueueType,"fifo");
3123 config->maxOutstandingDiskReqs = clabel->maxOutstanding;
3124 config->layoutSpecificSize = 0; /* XXX ?? */
3125
3126 while(ac!=NULL) {
3127 /* row/col values will be in range due to the checks
3128 in reasonable_label() */
3129 strcpy(config->devnames[ac->clabel->row][ac->clabel->column],
3130 ac->devname);
3131 ac = ac->next;
3132 }
3133
3134 for(i=0;i<RF_MAXDBGV;i++) {
3135 config->debugVars[i][0] = NULL;
3136 }
3137 }
3138
3139 int
3140 rf_set_autoconfig(raidPtr, new_value)
3141 RF_Raid_t *raidPtr;
3142 int new_value;
3143 {
3144 RF_ComponentLabel_t clabel;
3145 struct vnode *vp;
3146 dev_t dev;
3147 int row, column;
3148 int sparecol;
3149
3150 raidPtr->autoconfigure = new_value;
3151 for(row=0; row<raidPtr->numRow; row++) {
3152 for(column=0; column<raidPtr->numCol; column++) {
3153 if (raidPtr->Disks[row][column].status ==
3154 rf_ds_optimal) {
3155 dev = raidPtr->Disks[row][column].dev;
3156 vp = raidPtr->raid_cinfo[row][column].ci_vp;
3157 raidread_component_label(dev, vp, &clabel);
3158 clabel.autoconfigure = new_value;
3159 raidwrite_component_label(dev, vp, &clabel);
3160 }
3161 }
3162 }
3163 for(column = 0; column < raidPtr->numSpare ; column++) {
3164 sparecol = raidPtr->numCol + column;
3165 if (raidPtr->Disks[0][sparecol].status == rf_ds_used_spare) {
3166 dev = raidPtr->Disks[0][sparecol].dev;
3167 vp = raidPtr->raid_cinfo[0][sparecol].ci_vp;
3168 raidread_component_label(dev, vp, &clabel);
3169 clabel.autoconfigure = new_value;
3170 raidwrite_component_label(dev, vp, &clabel);
3171 }
3172 }
3173 return(new_value);
3174 }
3175
3176 int
3177 rf_set_rootpartition(raidPtr, new_value)
3178 RF_Raid_t *raidPtr;
3179 int new_value;
3180 {
3181 RF_ComponentLabel_t clabel;
3182 struct vnode *vp;
3183 dev_t dev;
3184 int row, column;
3185 int sparecol;
3186
3187 raidPtr->root_partition = new_value;
3188 for(row=0; row<raidPtr->numRow; row++) {
3189 for(column=0; column<raidPtr->numCol; column++) {
3190 if (raidPtr->Disks[row][column].status ==
3191 rf_ds_optimal) {
3192 dev = raidPtr->Disks[row][column].dev;
3193 vp = raidPtr->raid_cinfo[row][column].ci_vp;
3194 raidread_component_label(dev, vp, &clabel);
3195 clabel.root_partition = new_value;
3196 raidwrite_component_label(dev, vp, &clabel);
3197 }
3198 }
3199 }
3200 for(column = 0; column < raidPtr->numSpare ; column++) {
3201 sparecol = raidPtr->numCol + column;
3202 if (raidPtr->Disks[0][sparecol].status == rf_ds_used_spare) {
3203 dev = raidPtr->Disks[0][sparecol].dev;
3204 vp = raidPtr->raid_cinfo[0][sparecol].ci_vp;
3205 raidread_component_label(dev, vp, &clabel);
3206 clabel.root_partition = new_value;
3207 raidwrite_component_label(dev, vp, &clabel);
3208 }
3209 }
3210 return(new_value);
3211 }
3212
3213 void
3214 rf_release_all_vps(cset)
3215 RF_ConfigSet_t *cset;
3216 {
3217 RF_AutoConfig_t *ac;
3218
3219 ac = cset->ac;
3220 while(ac!=NULL) {
3221 /* Close the vp, and give it back */
3222 if (ac->vp) {
3223 vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY);
3224 VOP_CLOSE(ac->vp, FREAD, NOCRED, 0);
3225 vput(ac->vp);
3226 ac->vp = NULL;
3227 }
3228 ac = ac->next;
3229 }
3230 }
3231
3232
3233 void
3234 rf_cleanup_config_set(cset)
3235 RF_ConfigSet_t *cset;
3236 {
3237 RF_AutoConfig_t *ac;
3238 RF_AutoConfig_t *next_ac;
3239
3240 ac = cset->ac;
3241 while(ac!=NULL) {
3242 next_ac = ac->next;
3243 /* nuke the label */
3244 free(ac->clabel, M_RAIDFRAME);
3245 /* cleanup the config structure */
3246 free(ac, M_RAIDFRAME);
3247 /* "next.." */
3248 ac = next_ac;
3249 }
3250 /* and, finally, nuke the config set */
3251 free(cset, M_RAIDFRAME);
3252 }
3253
3254
3255 void
3256 raid_init_component_label(raidPtr, clabel)
3257 RF_Raid_t *raidPtr;
3258 RF_ComponentLabel_t *clabel;
3259 {
3260 /* current version number */
3261 clabel->version = RF_COMPONENT_LABEL_VERSION;
3262 clabel->serial_number = raidPtr->serial_number;
3263 clabel->mod_counter = raidPtr->mod_counter;
3264 clabel->num_rows = raidPtr->numRow;
3265 clabel->num_columns = raidPtr->numCol;
3266 clabel->clean = RF_RAID_DIRTY; /* not clean */
3267 clabel->status = rf_ds_optimal; /* "It's good!" */
3268
3269 clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
3270 clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
3271 clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
3272
3273 clabel->blockSize = raidPtr->bytesPerSector;
3274 clabel->numBlocks = raidPtr->sectorsPerDisk;
3275
3276 /* XXX not portable */
3277 clabel->parityConfig = raidPtr->Layout.map->parityConfig;
3278 clabel->maxOutstanding = raidPtr->maxOutstanding;
3279 clabel->autoconfigure = raidPtr->autoconfigure;
3280 clabel->root_partition = raidPtr->root_partition;
3281 clabel->last_unit = raidPtr->raidid;
3282 clabel->config_order = raidPtr->config_order;
3283 }
3284
3285 int
3286 rf_auto_config_set(cset,unit)
3287 RF_ConfigSet_t *cset;
3288 int *unit;
3289 {
3290 RF_Raid_t *raidPtr;
3291 RF_Config_t *config;
3292 int raidID;
3293 int retcode;
3294
3295 #if DEBUG
3296 printf("RAID autoconfigure\n");
3297 #endif
3298
3299 retcode = 0;
3300 *unit = -1;
3301
3302 /* 1. Create a config structure */
3303
3304 config = (RF_Config_t *)malloc(sizeof(RF_Config_t),
3305 M_RAIDFRAME,
3306 M_NOWAIT);
3307 if (config==NULL) {
3308 printf("Out of mem!?!?\n");
3309 /* XXX do something more intelligent here. */
3310 return(1);
3311 }
3312
3313 memset(config, 0, sizeof(RF_Config_t));
3314
3315 /*
3316 2. Figure out what RAID ID this one is supposed to live at
3317 See if we can get the same RAID dev that it was configured
3318 on last time..
3319 */
3320
3321 raidID = cset->ac->clabel->last_unit;
3322 if ((raidID < 0) || (raidID >= numraid)) {
3323 /* let's not wander off into lala land. */
3324 raidID = numraid - 1;
3325 }
3326 if (raidPtrs[raidID]->valid != 0) {
3327
3328 /*
3329 Nope... Go looking for an alternative...
3330 Start high so we don't immediately use raid0 if that's
3331 not taken.
3332 */
3333
3334 for(raidID = numraid - 1; raidID >= 0; raidID--) {
3335 if (raidPtrs[raidID]->valid == 0) {
3336 /* can use this one! */
3337 break;
3338 }
3339 }
3340 }
3341
3342 if (raidID < 0) {
3343 /* punt... */
3344 printf("Unable to auto configure this set!\n");
3345 printf("(Out of RAID devs!)\n");
3346 return(1);
3347 }
3348
3349 #if DEBUG
3350 printf("Configuring raid%d:\n",raidID);
3351 #endif
3352
3353 raidPtr = raidPtrs[raidID];
3354
3355 /* XXX all this stuff should be done SOMEWHERE ELSE! */
3356 raidPtr->raidid = raidID;
3357 raidPtr->openings = RAIDOUTSTANDING;
3358
3359 /* 3. Build the configuration structure */
3360 rf_create_configuration(cset->ac, config, raidPtr);
3361
3362 /* 4. Do the configuration */
3363 retcode = rf_Configure(raidPtr, config, cset->ac);
3364
3365 if (retcode == 0) {
3366
3367 raidinit(raidPtrs[raidID]);
3368
3369 rf_markalldirty(raidPtrs[raidID]);
3370 raidPtrs[raidID]->autoconfigure = 1; /* XXX do this here? */
3371 if (cset->ac->clabel->root_partition==1) {
3372 /* everything configured just fine. Make a note
3373 that this set is eligible to be root. */
3374 cset->rootable = 1;
3375 /* XXX do this here? */
3376 raidPtrs[raidID]->root_partition = 1;
3377 }
3378 }
3379
3380 /* 5. Cleanup */
3381 free(config, M_RAIDFRAME);
3382
3383 *unit = raidID;
3384 return(retcode);
3385 }
3386
3387 void
3388 rf_disk_unbusy(desc)
3389 RF_RaidAccessDesc_t *desc;
3390 {
3391 struct buf *bp;
3392
3393 bp = (struct buf *)desc->bp;
3394 disk_unbusy(&raid_softc[desc->raidPtr->raidid].sc_dkdev,
3395 (bp->b_bcount - bp->b_resid), (bp->b_flags & B_READ));
3396 }
3397