rf_netbsdkintf.c revision 1.149 1 /* $NetBSD: rf_netbsdkintf.c,v 1.149 2002/11/19 01:45:29 oster Exp $ */
2 /*-
3 * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
4 * All rights reserved.
5 *
6 * This code is derived from software contributed to The NetBSD Foundation
7 * by Greg Oster; Jason R. Thorpe.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed by the NetBSD
20 * Foundation, Inc. and its contributors.
21 * 4. Neither the name of The NetBSD Foundation nor the names of its
22 * contributors may be used to endorse or promote products derived
23 * from this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
26 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
36 */
37
38 /*
39 * Copyright (c) 1988 University of Utah.
40 * Copyright (c) 1990, 1993
41 * The Regents of the University of California. All rights reserved.
42 *
43 * This code is derived from software contributed to Berkeley by
44 * the Systems Programming Group of the University of Utah Computer
45 * Science Department.
46 *
47 * Redistribution and use in source and binary forms, with or without
48 * modification, are permitted provided that the following conditions
49 * are met:
50 * 1. Redistributions of source code must retain the above copyright
51 * notice, this list of conditions and the following disclaimer.
52 * 2. Redistributions in binary form must reproduce the above copyright
53 * notice, this list of conditions and the following disclaimer in the
54 * documentation and/or other materials provided with the distribution.
55 * 3. All advertising materials mentioning features or use of this software
56 * must display the following acknowledgement:
57 * This product includes software developed by the University of
58 * California, Berkeley and its contributors.
59 * 4. Neither the name of the University nor the names of its contributors
60 * may be used to endorse or promote products derived from this software
61 * without specific prior written permission.
62 *
63 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
64 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
65 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
66 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
67 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
68 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
69 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
70 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
71 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
72 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
73 * SUCH DAMAGE.
74 *
75 * from: Utah $Hdr: cd.c 1.6 90/11/28$
76 *
77 * @(#)cd.c 8.2 (Berkeley) 11/16/93
78 */
79
80
81
82
83 /*
84 * Copyright (c) 1995 Carnegie-Mellon University.
85 * All rights reserved.
86 *
87 * Authors: Mark Holland, Jim Zelenka
88 *
89 * Permission to use, copy, modify and distribute this software and
90 * its documentation is hereby granted, provided that both the copyright
91 * notice and this permission notice appear in all copies of the
92 * software, derivative works or modified versions, and any portions
93 * thereof, and that both notices appear in supporting documentation.
94 *
95 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
96 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
97 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
98 *
99 * Carnegie Mellon requests users of this software to return to
100 *
101 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
102 * School of Computer Science
103 * Carnegie Mellon University
104 * Pittsburgh PA 15213-3890
105 *
106 * any improvements or extensions that they make and grant Carnegie the
107 * rights to redistribute these changes.
108 */
109
110 /***********************************************************
111 *
112 * rf_kintf.c -- the kernel interface routines for RAIDframe
113 *
114 ***********************************************************/
115
116 #include <sys/cdefs.h>
117 __KERNEL_RCSID(0, "$NetBSD: rf_netbsdkintf.c,v 1.149 2002/11/19 01:45:29 oster Exp $");
118
119 #include <sys/param.h>
120 #include <sys/errno.h>
121 #include <sys/pool.h>
122 #include <sys/queue.h>
123 #include <sys/disk.h>
124 #include <sys/device.h>
125 #include <sys/stat.h>
126 #include <sys/ioctl.h>
127 #include <sys/fcntl.h>
128 #include <sys/systm.h>
129 #include <sys/namei.h>
130 #include <sys/vnode.h>
131 #include <sys/disklabel.h>
132 #include <sys/conf.h>
133 #include <sys/lock.h>
134 #include <sys/buf.h>
135 #include <sys/user.h>
136 #include <sys/reboot.h>
137
138 #include <dev/raidframe/raidframevar.h>
139 #include <dev/raidframe/raidframeio.h>
140 #include "raid.h"
141 #include "opt_raid_autoconfig.h"
142 #include "rf_raid.h"
143 #include "rf_copyback.h"
144 #include "rf_dag.h"
145 #include "rf_dagflags.h"
146 #include "rf_desc.h"
147 #include "rf_diskqueue.h"
148 #include "rf_etimer.h"
149 #include "rf_general.h"
150 #include "rf_kintf.h"
151 #include "rf_options.h"
152 #include "rf_driver.h"
153 #include "rf_parityscan.h"
154 #include "rf_threadstuff.h"
155
156 #ifdef DEBUG
157 int rf_kdebug_level = 0;
158 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
159 #else /* DEBUG */
160 #define db1_printf(a) { }
161 #endif /* DEBUG */
162
163 static RF_Raid_t **raidPtrs; /* global raid device descriptors */
164
165 RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex)
166
167 static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a
168 * spare table */
169 static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from
170 * installation process */
171
172 /* prototypes */
173 static void KernelWakeupFunc(struct buf * bp);
174 static void InitBP(struct buf * bp, struct vnode *, unsigned rw_flag,
175 dev_t dev, RF_SectorNum_t startSect,
176 RF_SectorCount_t numSect, caddr_t buf,
177 void (*cbFunc) (struct buf *), void *cbArg,
178 int logBytesPerSector, struct proc * b_proc);
179 static void raidinit(RF_Raid_t *);
180
181 void raidattach(int);
182
183 dev_type_open(raidopen);
184 dev_type_close(raidclose);
185 dev_type_read(raidread);
186 dev_type_write(raidwrite);
187 dev_type_ioctl(raidioctl);
188 dev_type_strategy(raidstrategy);
189 dev_type_dump(raiddump);
190 dev_type_size(raidsize);
191
192 const struct bdevsw raid_bdevsw = {
193 raidopen, raidclose, raidstrategy, raidioctl,
194 raiddump, raidsize, D_DISK
195 };
196
197 const struct cdevsw raid_cdevsw = {
198 raidopen, raidclose, raidread, raidwrite, raidioctl,
199 nostop, notty, nopoll, nommap, nokqfilter, D_DISK
200 };
201
202 /*
203 * Pilfered from ccd.c
204 */
205
206 struct raidbuf {
207 struct buf rf_buf; /* new I/O buf. MUST BE FIRST!!! */
208 struct buf *rf_obp; /* ptr. to original I/O buf */
209 RF_DiskQueueData_t *req;/* the request that this was part of.. */
210 };
211
212 /* component buffer pool */
213 struct pool raidframe_cbufpool;
214
215 /* XXX Not sure if the following should be replacing the raidPtrs above,
216 or if it should be used in conjunction with that...
217 */
218
219 struct raid_softc {
220 int sc_flags; /* flags */
221 int sc_cflags; /* configuration flags */
222 size_t sc_size; /* size of the raid device */
223 char sc_xname[20]; /* XXX external name */
224 struct disk sc_dkdev; /* generic disk device info */
225 struct bufq_state buf_queue; /* used for the device queue */
226 };
227 /* sc_flags */
228 #define RAIDF_INITED 0x01 /* unit has been initialized */
229 #define RAIDF_WLABEL 0x02 /* label area is writable */
230 #define RAIDF_LABELLING 0x04 /* unit is currently being labelled */
231 #define RAIDF_WANTED 0x40 /* someone is waiting to obtain a lock */
232 #define RAIDF_LOCKED 0x80 /* unit is locked */
233
234 #define raidunit(x) DISKUNIT(x)
235 int numraid = 0;
236
237 /*
238 * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
239 * Be aware that large numbers can allow the driver to consume a lot of
240 * kernel memory, especially on writes, and in degraded mode reads.
241 *
242 * For example: with a stripe width of 64 blocks (32k) and 5 disks,
243 * a single 64K write will typically require 64K for the old data,
244 * 64K for the old parity, and 64K for the new parity, for a total
245 * of 192K (if the parity buffer is not re-used immediately).
246 * Even it if is used immediately, that's still 128K, which when multiplied
247 * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
248 *
249 * Now in degraded mode, for example, a 64K read on the above setup may
250 * require data reconstruction, which will require *all* of the 4 remaining
251 * disks to participate -- 4 * 32K/disk == 128K again.
252 */
253
254 #ifndef RAIDOUTSTANDING
255 #define RAIDOUTSTANDING 6
256 #endif
257
258 #define RAIDLABELDEV(dev) \
259 (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
260
261 /* declared here, and made public, for the benefit of KVM stuff.. */
262 struct raid_softc *raid_softc;
263
264 static void raidgetdefaultlabel(RF_Raid_t *, struct raid_softc *,
265 struct disklabel *);
266 static void raidgetdisklabel(dev_t);
267 static void raidmakedisklabel(struct raid_softc *);
268
269 static int raidlock(struct raid_softc *);
270 static void raidunlock(struct raid_softc *);
271
272 static void rf_markalldirty(RF_Raid_t *);
273
274 struct device *raidrootdev;
275
276 void rf_ReconThread(struct rf_recon_req *);
277 /* XXX what I want is: */
278 /*void rf_ReconThread(RF_Raid_t *raidPtr); */
279 void rf_RewriteParityThread(RF_Raid_t *raidPtr);
280 void rf_CopybackThread(RF_Raid_t *raidPtr);
281 void rf_ReconstructInPlaceThread(struct rf_recon_req *);
282 int rf_autoconfig(struct device *self);
283 void rf_buildroothack(RF_ConfigSet_t *);
284
285 RF_AutoConfig_t *rf_find_raid_components(void);
286 RF_ConfigSet_t *rf_create_auto_sets(RF_AutoConfig_t *);
287 static int rf_does_it_fit(RF_ConfigSet_t *,RF_AutoConfig_t *);
288 static int rf_reasonable_label(RF_ComponentLabel_t *);
289 void rf_create_configuration(RF_AutoConfig_t *,RF_Config_t *, RF_Raid_t *);
290 int rf_set_autoconfig(RF_Raid_t *, int);
291 int rf_set_rootpartition(RF_Raid_t *, int);
292 void rf_release_all_vps(RF_ConfigSet_t *);
293 void rf_cleanup_config_set(RF_ConfigSet_t *);
294 int rf_have_enough_components(RF_ConfigSet_t *);
295 int rf_auto_config_set(RF_ConfigSet_t *, int *);
296
297 static int raidautoconfig = 0; /* Debugging, mostly. Set to 0 to not
298 allow autoconfig to take place.
299 Note that this is overridden by having
300 RAID_AUTOCONFIG as an option in the
301 kernel config file. */
302
303 void
304 raidattach(num)
305 int num;
306 {
307 int raidID;
308 int i, rc;
309
310 #ifdef DEBUG
311 printf("raidattach: Asked for %d units\n", num);
312 #endif
313
314 if (num <= 0) {
315 #ifdef DIAGNOSTIC
316 panic("raidattach: count <= 0");
317 #endif
318 return;
319 }
320 /* This is where all the initialization stuff gets done. */
321
322 numraid = num;
323
324 /* Make some space for requested number of units... */
325
326 RF_Calloc(raidPtrs, num, sizeof(RF_Raid_t *), (RF_Raid_t **));
327 if (raidPtrs == NULL) {
328 panic("raidPtrs is NULL!!");
329 }
330
331 /* Initialize the component buffer pool. */
332 pool_init(&raidframe_cbufpool, sizeof(struct raidbuf), 0,
333 0, 0, "raidpl", NULL);
334
335 rc = rf_mutex_init(&rf_sparet_wait_mutex);
336 if (rc) {
337 RF_PANIC();
338 }
339
340 rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
341
342 for (i = 0; i < num; i++)
343 raidPtrs[i] = NULL;
344 rc = rf_BootRaidframe();
345 if (rc == 0)
346 printf("Kernelized RAIDframe activated\n");
347 else
348 panic("Serious error booting RAID!!");
349
350 /* put together some datastructures like the CCD device does.. This
351 * lets us lock the device and what-not when it gets opened. */
352
353 raid_softc = (struct raid_softc *)
354 malloc(num * sizeof(struct raid_softc),
355 M_RAIDFRAME, M_NOWAIT);
356 if (raid_softc == NULL) {
357 printf("WARNING: no memory for RAIDframe driver\n");
358 return;
359 }
360
361 memset(raid_softc, 0, num * sizeof(struct raid_softc));
362
363 raidrootdev = (struct device *)malloc(num * sizeof(struct device),
364 M_RAIDFRAME, M_NOWAIT);
365 if (raidrootdev == NULL) {
366 panic("No memory for RAIDframe driver!!?!?!");
367 }
368
369 for (raidID = 0; raidID < num; raidID++) {
370 bufq_alloc(&raid_softc[raidID].buf_queue, BUFQ_FCFS);
371
372 raidrootdev[raidID].dv_class = DV_DISK;
373 raidrootdev[raidID].dv_cfdata = NULL;
374 raidrootdev[raidID].dv_unit = raidID;
375 raidrootdev[raidID].dv_parent = NULL;
376 raidrootdev[raidID].dv_flags = 0;
377 sprintf(raidrootdev[raidID].dv_xname,"raid%d",raidID);
378
379 RF_Calloc(raidPtrs[raidID], 1, sizeof(RF_Raid_t),
380 (RF_Raid_t *));
381 if (raidPtrs[raidID] == NULL) {
382 printf("WARNING: raidPtrs[%d] is NULL\n", raidID);
383 numraid = raidID;
384 return;
385 }
386 }
387
388 #ifdef RAID_AUTOCONFIG
389 raidautoconfig = 1;
390 #endif
391
392 /*
393 * Register a finalizer which will be used to auto-config RAID
394 * sets once all real hardware devices have been found.
395 */
396 if (config_finalize_register(NULL, rf_autoconfig) != 0)
397 printf("WARNING: unable to register RAIDframe finalizer\n");
398 }
399
400 int
401 rf_autoconfig(struct device *self)
402 {
403 RF_AutoConfig_t *ac_list;
404 RF_ConfigSet_t *config_sets;
405
406 if (raidautoconfig == 0)
407 return (0);
408
409 /* XXX This code can only be run once. */
410 raidautoconfig = 0;
411
412 /* 1. locate all RAID components on the system */
413 #ifdef DEBUG
414 printf("Searching for RAID components...\n");
415 #endif
416 ac_list = rf_find_raid_components();
417
418 /* 2. Sort them into their respective sets. */
419 config_sets = rf_create_auto_sets(ac_list);
420
421 /*
422 * 3. Evaluate each set andconfigure the valid ones.
423 * This gets done in rf_buildroothack().
424 */
425 rf_buildroothack(config_sets);
426
427 return (1);
428 }
429
430 void
431 rf_buildroothack(RF_ConfigSet_t *config_sets)
432 {
433 RF_ConfigSet_t *cset;
434 RF_ConfigSet_t *next_cset;
435 int retcode;
436 int raidID;
437 int rootID;
438 int num_root;
439
440 rootID = 0;
441 num_root = 0;
442 cset = config_sets;
443 while(cset != NULL ) {
444 next_cset = cset->next;
445 if (rf_have_enough_components(cset) &&
446 cset->ac->clabel->autoconfigure==1) {
447 retcode = rf_auto_config_set(cset,&raidID);
448 if (!retcode) {
449 if (cset->rootable) {
450 rootID = raidID;
451 num_root++;
452 }
453 } else {
454 /* The autoconfig didn't work :( */
455 #if DEBUG
456 printf("Autoconfig failed with code %d for raid%d\n", retcode, raidID);
457 #endif
458 rf_release_all_vps(cset);
459 }
460 } else {
461 /* we're not autoconfiguring this set...
462 release the associated resources */
463 rf_release_all_vps(cset);
464 }
465 /* cleanup */
466 rf_cleanup_config_set(cset);
467 cset = next_cset;
468 }
469
470 /* we found something bootable... */
471
472 if (num_root == 1) {
473 booted_device = &raidrootdev[rootID];
474 } else if (num_root > 1) {
475 /* we can't guess.. require the user to answer... */
476 boothowto |= RB_ASKNAME;
477 }
478 }
479
480
481 int
482 raidsize(dev)
483 dev_t dev;
484 {
485 struct raid_softc *rs;
486 struct disklabel *lp;
487 int part, unit, omask, size;
488
489 unit = raidunit(dev);
490 if (unit >= numraid)
491 return (-1);
492 rs = &raid_softc[unit];
493
494 if ((rs->sc_flags & RAIDF_INITED) == 0)
495 return (-1);
496
497 part = DISKPART(dev);
498 omask = rs->sc_dkdev.dk_openmask & (1 << part);
499 lp = rs->sc_dkdev.dk_label;
500
501 if (omask == 0 && raidopen(dev, 0, S_IFBLK, curproc))
502 return (-1);
503
504 if (lp->d_partitions[part].p_fstype != FS_SWAP)
505 size = -1;
506 else
507 size = lp->d_partitions[part].p_size *
508 (lp->d_secsize / DEV_BSIZE);
509
510 if (omask == 0 && raidclose(dev, 0, S_IFBLK, curproc))
511 return (-1);
512
513 return (size);
514
515 }
516
517 int
518 raiddump(dev, blkno, va, size)
519 dev_t dev;
520 daddr_t blkno;
521 caddr_t va;
522 size_t size;
523 {
524 /* Not implemented. */
525 return ENXIO;
526 }
527 /* ARGSUSED */
528 int
529 raidopen(dev, flags, fmt, p)
530 dev_t dev;
531 int flags, fmt;
532 struct proc *p;
533 {
534 int unit = raidunit(dev);
535 struct raid_softc *rs;
536 struct disklabel *lp;
537 int part, pmask;
538 int error = 0;
539
540 if (unit >= numraid)
541 return (ENXIO);
542 rs = &raid_softc[unit];
543
544 if ((error = raidlock(rs)) != 0)
545 return (error);
546 lp = rs->sc_dkdev.dk_label;
547
548 part = DISKPART(dev);
549 pmask = (1 << part);
550
551 if ((rs->sc_flags & RAIDF_INITED) &&
552 (rs->sc_dkdev.dk_openmask == 0))
553 raidgetdisklabel(dev);
554
555 /* make sure that this partition exists */
556
557 if (part != RAW_PART) {
558 if (((rs->sc_flags & RAIDF_INITED) == 0) ||
559 ((part >= lp->d_npartitions) ||
560 (lp->d_partitions[part].p_fstype == FS_UNUSED))) {
561 error = ENXIO;
562 raidunlock(rs);
563 return (error);
564 }
565 }
566 /* Prevent this unit from being unconfigured while open. */
567 switch (fmt) {
568 case S_IFCHR:
569 rs->sc_dkdev.dk_copenmask |= pmask;
570 break;
571
572 case S_IFBLK:
573 rs->sc_dkdev.dk_bopenmask |= pmask;
574 break;
575 }
576
577 if ((rs->sc_dkdev.dk_openmask == 0) &&
578 ((rs->sc_flags & RAIDF_INITED) != 0)) {
579 /* First one... mark things as dirty... Note that we *MUST*
580 have done a configure before this. I DO NOT WANT TO BE
581 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
582 THAT THEY BELONG TOGETHER!!!!! */
583 /* XXX should check to see if we're only open for reading
584 here... If so, we needn't do this, but then need some
585 other way of keeping track of what's happened.. */
586
587 rf_markalldirty( raidPtrs[unit] );
588 }
589
590
591 rs->sc_dkdev.dk_openmask =
592 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
593
594 raidunlock(rs);
595
596 return (error);
597
598
599 }
600 /* ARGSUSED */
601 int
602 raidclose(dev, flags, fmt, p)
603 dev_t dev;
604 int flags, fmt;
605 struct proc *p;
606 {
607 int unit = raidunit(dev);
608 struct raid_softc *rs;
609 int error = 0;
610 int part;
611
612 if (unit >= numraid)
613 return (ENXIO);
614 rs = &raid_softc[unit];
615
616 if ((error = raidlock(rs)) != 0)
617 return (error);
618
619 part = DISKPART(dev);
620
621 /* ...that much closer to allowing unconfiguration... */
622 switch (fmt) {
623 case S_IFCHR:
624 rs->sc_dkdev.dk_copenmask &= ~(1 << part);
625 break;
626
627 case S_IFBLK:
628 rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
629 break;
630 }
631 rs->sc_dkdev.dk_openmask =
632 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
633
634 if ((rs->sc_dkdev.dk_openmask == 0) &&
635 ((rs->sc_flags & RAIDF_INITED) != 0)) {
636 /* Last one... device is not unconfigured yet.
637 Device shutdown has taken care of setting the
638 clean bits if RAIDF_INITED is not set
639 mark things as clean... */
640
641 rf_update_component_labels(raidPtrs[unit],
642 RF_FINAL_COMPONENT_UPDATE);
643 if (doing_shutdown) {
644 /* last one, and we're going down, so
645 lights out for this RAID set too. */
646 error = rf_Shutdown(raidPtrs[unit]);
647
648 /* It's no longer initialized... */
649 rs->sc_flags &= ~RAIDF_INITED;
650
651 /* Detach the disk. */
652 disk_detach(&rs->sc_dkdev);
653 }
654 }
655
656 raidunlock(rs);
657 return (0);
658
659 }
660
661 void
662 raidstrategy(bp)
663 struct buf *bp;
664 {
665 int s;
666
667 unsigned int raidID = raidunit(bp->b_dev);
668 RF_Raid_t *raidPtr;
669 struct raid_softc *rs = &raid_softc[raidID];
670 struct disklabel *lp;
671 int wlabel;
672
673 if ((rs->sc_flags & RAIDF_INITED) ==0) {
674 bp->b_error = ENXIO;
675 bp->b_flags |= B_ERROR;
676 bp->b_resid = bp->b_bcount;
677 biodone(bp);
678 return;
679 }
680 if (raidID >= numraid || !raidPtrs[raidID]) {
681 bp->b_error = ENODEV;
682 bp->b_flags |= B_ERROR;
683 bp->b_resid = bp->b_bcount;
684 biodone(bp);
685 return;
686 }
687 raidPtr = raidPtrs[raidID];
688 if (!raidPtr->valid) {
689 bp->b_error = ENODEV;
690 bp->b_flags |= B_ERROR;
691 bp->b_resid = bp->b_bcount;
692 biodone(bp);
693 return;
694 }
695 if (bp->b_bcount == 0) {
696 db1_printf(("b_bcount is zero..\n"));
697 biodone(bp);
698 return;
699 }
700 lp = rs->sc_dkdev.dk_label;
701
702 /*
703 * Do bounds checking and adjust transfer. If there's an
704 * error, the bounds check will flag that for us.
705 */
706
707 wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
708 if (DISKPART(bp->b_dev) != RAW_PART)
709 if (bounds_check_with_label(bp, lp, wlabel) <= 0) {
710 db1_printf(("Bounds check failed!!:%d %d\n",
711 (int) bp->b_blkno, (int) wlabel));
712 biodone(bp);
713 return;
714 }
715 s = splbio();
716
717 bp->b_resid = 0;
718
719 /* stuff it onto our queue */
720 BUFQ_PUT(&rs->buf_queue, bp);
721
722 raidstart(raidPtrs[raidID]);
723
724 splx(s);
725 }
726 /* ARGSUSED */
727 int
728 raidread(dev, uio, flags)
729 dev_t dev;
730 struct uio *uio;
731 int flags;
732 {
733 int unit = raidunit(dev);
734 struct raid_softc *rs;
735 int part;
736
737 if (unit >= numraid)
738 return (ENXIO);
739 rs = &raid_softc[unit];
740
741 if ((rs->sc_flags & RAIDF_INITED) == 0)
742 return (ENXIO);
743 part = DISKPART(dev);
744
745 return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
746
747 }
748 /* ARGSUSED */
749 int
750 raidwrite(dev, uio, flags)
751 dev_t dev;
752 struct uio *uio;
753 int flags;
754 {
755 int unit = raidunit(dev);
756 struct raid_softc *rs;
757
758 if (unit >= numraid)
759 return (ENXIO);
760 rs = &raid_softc[unit];
761
762 if ((rs->sc_flags & RAIDF_INITED) == 0)
763 return (ENXIO);
764
765 return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
766
767 }
768
769 int
770 raidioctl(dev, cmd, data, flag, p)
771 dev_t dev;
772 u_long cmd;
773 caddr_t data;
774 int flag;
775 struct proc *p;
776 {
777 int unit = raidunit(dev);
778 int error = 0;
779 int part, pmask;
780 struct raid_softc *rs;
781 RF_Config_t *k_cfg, *u_cfg;
782 RF_Raid_t *raidPtr;
783 RF_RaidDisk_t *diskPtr;
784 RF_AccTotals_t *totals;
785 RF_DeviceConfig_t *d_cfg, **ucfgp;
786 u_char *specific_buf;
787 int retcode = 0;
788 int row;
789 int column;
790 int raidid;
791 struct rf_recon_req *rrcopy, *rr;
792 RF_ComponentLabel_t *clabel;
793 RF_ComponentLabel_t ci_label;
794 RF_ComponentLabel_t **clabel_ptr;
795 RF_SingleComponent_t *sparePtr,*componentPtr;
796 RF_SingleComponent_t hot_spare;
797 RF_SingleComponent_t component;
798 RF_ProgressInfo_t progressInfo, **progressInfoPtr;
799 int i, j, d;
800 #ifdef __HAVE_OLD_DISKLABEL
801 struct disklabel newlabel;
802 #endif
803
804 if (unit >= numraid)
805 return (ENXIO);
806 rs = &raid_softc[unit];
807 raidPtr = raidPtrs[unit];
808
809 db1_printf(("raidioctl: %d %d %d %d\n", (int) dev,
810 (int) DISKPART(dev), (int) unit, (int) cmd));
811
812 /* Must be open for writes for these commands... */
813 switch (cmd) {
814 case DIOCSDINFO:
815 case DIOCWDINFO:
816 #ifdef __HAVE_OLD_DISKLABEL
817 case ODIOCWDINFO:
818 case ODIOCSDINFO:
819 #endif
820 case DIOCWLABEL:
821 if ((flag & FWRITE) == 0)
822 return (EBADF);
823 }
824
825 /* Must be initialized for these... */
826 switch (cmd) {
827 case DIOCGDINFO:
828 case DIOCSDINFO:
829 case DIOCWDINFO:
830 #ifdef __HAVE_OLD_DISKLABEL
831 case ODIOCGDINFO:
832 case ODIOCWDINFO:
833 case ODIOCSDINFO:
834 case ODIOCGDEFLABEL:
835 #endif
836 case DIOCGPART:
837 case DIOCWLABEL:
838 case DIOCGDEFLABEL:
839 case RAIDFRAME_SHUTDOWN:
840 case RAIDFRAME_REWRITEPARITY:
841 case RAIDFRAME_GET_INFO:
842 case RAIDFRAME_RESET_ACCTOTALS:
843 case RAIDFRAME_GET_ACCTOTALS:
844 case RAIDFRAME_KEEP_ACCTOTALS:
845 case RAIDFRAME_GET_SIZE:
846 case RAIDFRAME_FAIL_DISK:
847 case RAIDFRAME_COPYBACK:
848 case RAIDFRAME_CHECK_RECON_STATUS:
849 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
850 case RAIDFRAME_GET_COMPONENT_LABEL:
851 case RAIDFRAME_SET_COMPONENT_LABEL:
852 case RAIDFRAME_ADD_HOT_SPARE:
853 case RAIDFRAME_REMOVE_HOT_SPARE:
854 case RAIDFRAME_INIT_LABELS:
855 case RAIDFRAME_REBUILD_IN_PLACE:
856 case RAIDFRAME_CHECK_PARITY:
857 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
858 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
859 case RAIDFRAME_CHECK_COPYBACK_STATUS:
860 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
861 case RAIDFRAME_SET_AUTOCONFIG:
862 case RAIDFRAME_SET_ROOT:
863 case RAIDFRAME_DELETE_COMPONENT:
864 case RAIDFRAME_INCORPORATE_HOT_SPARE:
865 if ((rs->sc_flags & RAIDF_INITED) == 0)
866 return (ENXIO);
867 }
868
869 switch (cmd) {
870
871 /* configure the system */
872 case RAIDFRAME_CONFIGURE:
873
874 if (raidPtr->valid) {
875 /* There is a valid RAID set running on this unit! */
876 printf("raid%d: Device already configured!\n",unit);
877 return(EINVAL);
878 }
879
880 /* copy-in the configuration information */
881 /* data points to a pointer to the configuration structure */
882
883 u_cfg = *((RF_Config_t **) data);
884 RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
885 if (k_cfg == NULL) {
886 return (ENOMEM);
887 }
888 retcode = copyin((caddr_t) u_cfg, (caddr_t) k_cfg,
889 sizeof(RF_Config_t));
890 if (retcode) {
891 RF_Free(k_cfg, sizeof(RF_Config_t));
892 db1_printf(("rf_ioctl: retcode=%d copyin.1\n",
893 retcode));
894 return (retcode);
895 }
896 /* allocate a buffer for the layout-specific data, and copy it
897 * in */
898 if (k_cfg->layoutSpecificSize) {
899 if (k_cfg->layoutSpecificSize > 10000) {
900 /* sanity check */
901 RF_Free(k_cfg, sizeof(RF_Config_t));
902 return (EINVAL);
903 }
904 RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
905 (u_char *));
906 if (specific_buf == NULL) {
907 RF_Free(k_cfg, sizeof(RF_Config_t));
908 return (ENOMEM);
909 }
910 retcode = copyin(k_cfg->layoutSpecific,
911 (caddr_t) specific_buf,
912 k_cfg->layoutSpecificSize);
913 if (retcode) {
914 RF_Free(k_cfg, sizeof(RF_Config_t));
915 RF_Free(specific_buf,
916 k_cfg->layoutSpecificSize);
917 db1_printf(("rf_ioctl: retcode=%d copyin.2\n",
918 retcode));
919 return (retcode);
920 }
921 } else
922 specific_buf = NULL;
923 k_cfg->layoutSpecific = specific_buf;
924
925 /* should do some kind of sanity check on the configuration.
926 * Store the sum of all the bytes in the last byte? */
927
928 /* configure the system */
929
930 /*
931 * Clear the entire RAID descriptor, just to make sure
932 * there is no stale data left in the case of a
933 * reconfiguration
934 */
935 memset((char *) raidPtr, 0, sizeof(RF_Raid_t));
936 raidPtr->raidid = unit;
937
938 retcode = rf_Configure(raidPtr, k_cfg, NULL);
939
940 if (retcode == 0) {
941
942 /* allow this many simultaneous IO's to
943 this RAID device */
944 raidPtr->openings = RAIDOUTSTANDING;
945
946 raidinit(raidPtr);
947 rf_markalldirty(raidPtr);
948 }
949 /* free the buffers. No return code here. */
950 if (k_cfg->layoutSpecificSize) {
951 RF_Free(specific_buf, k_cfg->layoutSpecificSize);
952 }
953 RF_Free(k_cfg, sizeof(RF_Config_t));
954
955 return (retcode);
956
957 /* shutdown the system */
958 case RAIDFRAME_SHUTDOWN:
959
960 if ((error = raidlock(rs)) != 0)
961 return (error);
962
963 /*
964 * If somebody has a partition mounted, we shouldn't
965 * shutdown.
966 */
967
968 part = DISKPART(dev);
969 pmask = (1 << part);
970 if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
971 ((rs->sc_dkdev.dk_bopenmask & pmask) &&
972 (rs->sc_dkdev.dk_copenmask & pmask))) {
973 raidunlock(rs);
974 return (EBUSY);
975 }
976
977 retcode = rf_Shutdown(raidPtr);
978
979 /* It's no longer initialized... */
980 rs->sc_flags &= ~RAIDF_INITED;
981
982 /* Detach the disk. */
983 disk_detach(&rs->sc_dkdev);
984
985 raidunlock(rs);
986
987 return (retcode);
988 case RAIDFRAME_GET_COMPONENT_LABEL:
989 clabel_ptr = (RF_ComponentLabel_t **) data;
990 /* need to read the component label for the disk indicated
991 by row,column in clabel */
992
993 /* For practice, let's get it directly fromdisk, rather
994 than from the in-core copy */
995 RF_Malloc( clabel, sizeof( RF_ComponentLabel_t ),
996 (RF_ComponentLabel_t *));
997 if (clabel == NULL)
998 return (ENOMEM);
999
1000 memset((char *) clabel, 0, sizeof(RF_ComponentLabel_t));
1001
1002 retcode = copyin( *clabel_ptr, clabel,
1003 sizeof(RF_ComponentLabel_t));
1004
1005 if (retcode) {
1006 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
1007 return(retcode);
1008 }
1009
1010 row = clabel->row;
1011 column = clabel->column;
1012
1013 if ((row < 0) || (row >= raidPtr->numRow) ||
1014 (column < 0) || (column >= raidPtr->numCol +
1015 raidPtr->numSpare)) {
1016 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
1017 return(EINVAL);
1018 }
1019
1020 raidread_component_label(raidPtr->Disks[row][column].dev,
1021 raidPtr->raid_cinfo[row][column].ci_vp,
1022 clabel );
1023
1024 retcode = copyout((caddr_t) clabel,
1025 (caddr_t) *clabel_ptr,
1026 sizeof(RF_ComponentLabel_t));
1027 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
1028 return (retcode);
1029
1030 case RAIDFRAME_SET_COMPONENT_LABEL:
1031 clabel = (RF_ComponentLabel_t *) data;
1032
1033 /* XXX check the label for valid stuff... */
1034 /* Note that some things *should not* get modified --
1035 the user should be re-initing the labels instead of
1036 trying to patch things.
1037 */
1038
1039 raidid = raidPtr->raidid;
1040 printf("raid%d: Got component label:\n", raidid);
1041 printf("raid%d: Version: %d\n", raidid, clabel->version);
1042 printf("raid%d: Serial Number: %d\n", raidid, clabel->serial_number);
1043 printf("raid%d: Mod counter: %d\n", raidid, clabel->mod_counter);
1044 printf("raid%d: Row: %d\n", raidid, clabel->row);
1045 printf("raid%d: Column: %d\n", raidid, clabel->column);
1046 printf("raid%d: Num Rows: %d\n", raidid, clabel->num_rows);
1047 printf("raid%d: Num Columns: %d\n", raidid, clabel->num_columns);
1048 printf("raid%d: Clean: %d\n", raidid, clabel->clean);
1049 printf("raid%d: Status: %d\n", raidid, clabel->status);
1050
1051 row = clabel->row;
1052 column = clabel->column;
1053
1054 if ((row < 0) || (row >= raidPtr->numRow) ||
1055 (column < 0) || (column >= raidPtr->numCol)) {
1056 return(EINVAL);
1057 }
1058
1059 /* XXX this isn't allowed to do anything for now :-) */
1060
1061 /* XXX and before it is, we need to fill in the rest
1062 of the fields!?!?!?! */
1063 #if 0
1064 raidwrite_component_label(
1065 raidPtr->Disks[row][column].dev,
1066 raidPtr->raid_cinfo[row][column].ci_vp,
1067 clabel );
1068 #endif
1069 return (0);
1070
1071 case RAIDFRAME_INIT_LABELS:
1072 clabel = (RF_ComponentLabel_t *) data;
1073 /*
1074 we only want the serial number from
1075 the above. We get all the rest of the information
1076 from the config that was used to create this RAID
1077 set.
1078 */
1079
1080 raidPtr->serial_number = clabel->serial_number;
1081
1082 raid_init_component_label(raidPtr, &ci_label);
1083 ci_label.serial_number = clabel->serial_number;
1084
1085 for(row=0;row<raidPtr->numRow;row++) {
1086 ci_label.row = row;
1087 for(column=0;column<raidPtr->numCol;column++) {
1088 diskPtr = &raidPtr->Disks[row][column];
1089 if (!RF_DEAD_DISK(diskPtr->status)) {
1090 ci_label.partitionSize = diskPtr->partitionSize;
1091 ci_label.column = column;
1092 raidwrite_component_label(
1093 raidPtr->Disks[row][column].dev,
1094 raidPtr->raid_cinfo[row][column].ci_vp,
1095 &ci_label );
1096 }
1097 }
1098 }
1099
1100 return (retcode);
1101 case RAIDFRAME_SET_AUTOCONFIG:
1102 d = rf_set_autoconfig(raidPtr, *(int *) data);
1103 printf("raid%d: New autoconfig value is: %d\n",
1104 raidPtr->raidid, d);
1105 *(int *) data = d;
1106 return (retcode);
1107
1108 case RAIDFRAME_SET_ROOT:
1109 d = rf_set_rootpartition(raidPtr, *(int *) data);
1110 printf("raid%d: New rootpartition value is: %d\n",
1111 raidPtr->raidid, d);
1112 *(int *) data = d;
1113 return (retcode);
1114
1115 /* initialize all parity */
1116 case RAIDFRAME_REWRITEPARITY:
1117
1118 if (raidPtr->Layout.map->faultsTolerated == 0) {
1119 /* Parity for RAID 0 is trivially correct */
1120 raidPtr->parity_good = RF_RAID_CLEAN;
1121 return(0);
1122 }
1123
1124 if (raidPtr->parity_rewrite_in_progress == 1) {
1125 /* Re-write is already in progress! */
1126 return(EINVAL);
1127 }
1128
1129 retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
1130 rf_RewriteParityThread,
1131 raidPtr,"raid_parity");
1132 return (retcode);
1133
1134
1135 case RAIDFRAME_ADD_HOT_SPARE:
1136 sparePtr = (RF_SingleComponent_t *) data;
1137 memcpy( &hot_spare, sparePtr, sizeof(RF_SingleComponent_t));
1138 retcode = rf_add_hot_spare(raidPtr, &hot_spare);
1139 return(retcode);
1140
1141 case RAIDFRAME_REMOVE_HOT_SPARE:
1142 return(retcode);
1143
1144 case RAIDFRAME_DELETE_COMPONENT:
1145 componentPtr = (RF_SingleComponent_t *)data;
1146 memcpy( &component, componentPtr,
1147 sizeof(RF_SingleComponent_t));
1148 retcode = rf_delete_component(raidPtr, &component);
1149 return(retcode);
1150
1151 case RAIDFRAME_INCORPORATE_HOT_SPARE:
1152 componentPtr = (RF_SingleComponent_t *)data;
1153 memcpy( &component, componentPtr,
1154 sizeof(RF_SingleComponent_t));
1155 retcode = rf_incorporate_hot_spare(raidPtr, &component);
1156 return(retcode);
1157
1158 case RAIDFRAME_REBUILD_IN_PLACE:
1159
1160 if (raidPtr->Layout.map->faultsTolerated == 0) {
1161 /* Can't do this on a RAID 0!! */
1162 return(EINVAL);
1163 }
1164
1165 if (raidPtr->recon_in_progress == 1) {
1166 /* a reconstruct is already in progress! */
1167 return(EINVAL);
1168 }
1169
1170 componentPtr = (RF_SingleComponent_t *) data;
1171 memcpy( &component, componentPtr,
1172 sizeof(RF_SingleComponent_t));
1173 row = component.row;
1174 column = component.column;
1175
1176 if ((row < 0) || (row >= raidPtr->numRow) ||
1177 (column < 0) || (column >= raidPtr->numCol)) {
1178 return(EINVAL);
1179 }
1180
1181 RF_LOCK_MUTEX(raidPtr->mutex);
1182 if ((raidPtr->Disks[row][column].status == rf_ds_optimal) &&
1183 (raidPtr->numFailures > 0)) {
1184 /* XXX 0 above shouldn't be constant!!! */
1185 /* some component other than this has failed.
1186 Let's not make things worse than they already
1187 are... */
1188 printf("raid%d: Unable to reconstruct to disk at:\n",
1189 raidPtr->raidid);
1190 printf("raid%d: Row: %d Col: %d Too many failures.\n",
1191 raidPtr->raidid, row, column);
1192 RF_UNLOCK_MUTEX(raidPtr->mutex);
1193 return (EINVAL);
1194 }
1195 if (raidPtr->Disks[row][column].status ==
1196 rf_ds_reconstructing) {
1197 printf("raid%d: Unable to reconstruct to disk at:\n",
1198 raidPtr->raidid);
1199 printf("raid%d: Row: %d Col: %d Reconstruction already occuring!\n", raidPtr->raidid, row, column);
1200
1201 RF_UNLOCK_MUTEX(raidPtr->mutex);
1202 return (EINVAL);
1203 }
1204 if (raidPtr->Disks[row][column].status == rf_ds_spared) {
1205 RF_UNLOCK_MUTEX(raidPtr->mutex);
1206 return (EINVAL);
1207 }
1208 RF_UNLOCK_MUTEX(raidPtr->mutex);
1209
1210 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1211 if (rrcopy == NULL)
1212 return(ENOMEM);
1213
1214 rrcopy->raidPtr = (void *) raidPtr;
1215 rrcopy->row = row;
1216 rrcopy->col = column;
1217
1218 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1219 rf_ReconstructInPlaceThread,
1220 rrcopy,"raid_reconip");
1221 return(retcode);
1222
1223 case RAIDFRAME_GET_INFO:
1224 if (!raidPtr->valid)
1225 return (ENODEV);
1226 ucfgp = (RF_DeviceConfig_t **) data;
1227 RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t),
1228 (RF_DeviceConfig_t *));
1229 if (d_cfg == NULL)
1230 return (ENOMEM);
1231 memset((char *) d_cfg, 0, sizeof(RF_DeviceConfig_t));
1232 d_cfg->rows = raidPtr->numRow;
1233 d_cfg->cols = raidPtr->numCol;
1234 d_cfg->ndevs = raidPtr->numRow * raidPtr->numCol;
1235 if (d_cfg->ndevs >= RF_MAX_DISKS) {
1236 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1237 return (ENOMEM);
1238 }
1239 d_cfg->nspares = raidPtr->numSpare;
1240 if (d_cfg->nspares >= RF_MAX_DISKS) {
1241 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1242 return (ENOMEM);
1243 }
1244 d_cfg->maxqdepth = raidPtr->maxQueueDepth;
1245 d = 0;
1246 for (i = 0; i < d_cfg->rows; i++) {
1247 for (j = 0; j < d_cfg->cols; j++) {
1248 d_cfg->devs[d] = raidPtr->Disks[i][j];
1249 d++;
1250 }
1251 }
1252 for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) {
1253 d_cfg->spares[i] = raidPtr->Disks[0][j];
1254 }
1255 retcode = copyout((caddr_t) d_cfg, (caddr_t) * ucfgp,
1256 sizeof(RF_DeviceConfig_t));
1257 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1258
1259 return (retcode);
1260
1261 case RAIDFRAME_CHECK_PARITY:
1262 *(int *) data = raidPtr->parity_good;
1263 return (0);
1264
1265 case RAIDFRAME_RESET_ACCTOTALS:
1266 memset(&raidPtr->acc_totals, 0, sizeof(raidPtr->acc_totals));
1267 return (0);
1268
1269 case RAIDFRAME_GET_ACCTOTALS:
1270 totals = (RF_AccTotals_t *) data;
1271 *totals = raidPtr->acc_totals;
1272 return (0);
1273
1274 case RAIDFRAME_KEEP_ACCTOTALS:
1275 raidPtr->keep_acc_totals = *(int *)data;
1276 return (0);
1277
1278 case RAIDFRAME_GET_SIZE:
1279 *(int *) data = raidPtr->totalSectors;
1280 return (0);
1281
1282 /* fail a disk & optionally start reconstruction */
1283 case RAIDFRAME_FAIL_DISK:
1284
1285 if (raidPtr->Layout.map->faultsTolerated == 0) {
1286 /* Can't do this on a RAID 0!! */
1287 return(EINVAL);
1288 }
1289
1290 rr = (struct rf_recon_req *) data;
1291
1292 if (rr->row < 0 || rr->row >= raidPtr->numRow
1293 || rr->col < 0 || rr->col >= raidPtr->numCol)
1294 return (EINVAL);
1295
1296
1297 RF_LOCK_MUTEX(raidPtr->mutex);
1298 if ((raidPtr->Disks[rr->row][rr->col].status ==
1299 rf_ds_optimal) && (raidPtr->numFailures > 0)) {
1300 /* some other component has failed. Let's not make
1301 things worse. XXX wrong for RAID6 */
1302 RF_UNLOCK_MUTEX(raidPtr->mutex);
1303 return (EINVAL);
1304 }
1305 if (raidPtr->Disks[rr->row][rr->col].status == rf_ds_spared) {
1306 /* Can't fail a spared disk! */
1307 RF_UNLOCK_MUTEX(raidPtr->mutex);
1308 return (EINVAL);
1309 }
1310 RF_UNLOCK_MUTEX(raidPtr->mutex);
1311
1312 /* make a copy of the recon request so that we don't rely on
1313 * the user's buffer */
1314 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1315 if (rrcopy == NULL)
1316 return(ENOMEM);
1317 memcpy(rrcopy, rr, sizeof(*rr));
1318 rrcopy->raidPtr = (void *) raidPtr;
1319
1320 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1321 rf_ReconThread,
1322 rrcopy,"raid_recon");
1323 return (0);
1324
1325 /* invoke a copyback operation after recon on whatever disk
1326 * needs it, if any */
1327 case RAIDFRAME_COPYBACK:
1328
1329 if (raidPtr->Layout.map->faultsTolerated == 0) {
1330 /* This makes no sense on a RAID 0!! */
1331 return(EINVAL);
1332 }
1333
1334 if (raidPtr->copyback_in_progress == 1) {
1335 /* Copyback is already in progress! */
1336 return(EINVAL);
1337 }
1338
1339 retcode = RF_CREATE_THREAD(raidPtr->copyback_thread,
1340 rf_CopybackThread,
1341 raidPtr,"raid_copyback");
1342 return (retcode);
1343
1344 /* return the percentage completion of reconstruction */
1345 case RAIDFRAME_CHECK_RECON_STATUS:
1346 if (raidPtr->Layout.map->faultsTolerated == 0) {
1347 /* This makes no sense on a RAID 0, so tell the
1348 user it's done. */
1349 *(int *) data = 100;
1350 return(0);
1351 }
1352 row = 0; /* XXX we only consider a single row... */
1353 if (raidPtr->status[row] != rf_rs_reconstructing)
1354 *(int *) data = 100;
1355 else
1356 *(int *) data = raidPtr->reconControl[row]->percentComplete;
1357 return (0);
1358 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
1359 progressInfoPtr = (RF_ProgressInfo_t **) data;
1360 row = 0; /* XXX we only consider a single row... */
1361 if (raidPtr->status[row] != rf_rs_reconstructing) {
1362 progressInfo.remaining = 0;
1363 progressInfo.completed = 100;
1364 progressInfo.total = 100;
1365 } else {
1366 progressInfo.total =
1367 raidPtr->reconControl[row]->numRUsTotal;
1368 progressInfo.completed =
1369 raidPtr->reconControl[row]->numRUsComplete;
1370 progressInfo.remaining = progressInfo.total -
1371 progressInfo.completed;
1372 }
1373 retcode = copyout((caddr_t) &progressInfo,
1374 (caddr_t) *progressInfoPtr,
1375 sizeof(RF_ProgressInfo_t));
1376 return (retcode);
1377
1378 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1379 if (raidPtr->Layout.map->faultsTolerated == 0) {
1380 /* This makes no sense on a RAID 0, so tell the
1381 user it's done. */
1382 *(int *) data = 100;
1383 return(0);
1384 }
1385 if (raidPtr->parity_rewrite_in_progress == 1) {
1386 *(int *) data = 100 *
1387 raidPtr->parity_rewrite_stripes_done /
1388 raidPtr->Layout.numStripe;
1389 } else {
1390 *(int *) data = 100;
1391 }
1392 return (0);
1393
1394 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
1395 progressInfoPtr = (RF_ProgressInfo_t **) data;
1396 if (raidPtr->parity_rewrite_in_progress == 1) {
1397 progressInfo.total = raidPtr->Layout.numStripe;
1398 progressInfo.completed =
1399 raidPtr->parity_rewrite_stripes_done;
1400 progressInfo.remaining = progressInfo.total -
1401 progressInfo.completed;
1402 } else {
1403 progressInfo.remaining = 0;
1404 progressInfo.completed = 100;
1405 progressInfo.total = 100;
1406 }
1407 retcode = copyout((caddr_t) &progressInfo,
1408 (caddr_t) *progressInfoPtr,
1409 sizeof(RF_ProgressInfo_t));
1410 return (retcode);
1411
1412 case RAIDFRAME_CHECK_COPYBACK_STATUS:
1413 if (raidPtr->Layout.map->faultsTolerated == 0) {
1414 /* This makes no sense on a RAID 0 */
1415 *(int *) data = 100;
1416 return(0);
1417 }
1418 if (raidPtr->copyback_in_progress == 1) {
1419 *(int *) data = 100 * raidPtr->copyback_stripes_done /
1420 raidPtr->Layout.numStripe;
1421 } else {
1422 *(int *) data = 100;
1423 }
1424 return (0);
1425
1426 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
1427 progressInfoPtr = (RF_ProgressInfo_t **) data;
1428 if (raidPtr->copyback_in_progress == 1) {
1429 progressInfo.total = raidPtr->Layout.numStripe;
1430 progressInfo.completed =
1431 raidPtr->copyback_stripes_done;
1432 progressInfo.remaining = progressInfo.total -
1433 progressInfo.completed;
1434 } else {
1435 progressInfo.remaining = 0;
1436 progressInfo.completed = 100;
1437 progressInfo.total = 100;
1438 }
1439 retcode = copyout((caddr_t) &progressInfo,
1440 (caddr_t) *progressInfoPtr,
1441 sizeof(RF_ProgressInfo_t));
1442 return (retcode);
1443
1444 /* the sparetable daemon calls this to wait for the kernel to
1445 * need a spare table. this ioctl does not return until a
1446 * spare table is needed. XXX -- calling mpsleep here in the
1447 * ioctl code is almost certainly wrong and evil. -- XXX XXX
1448 * -- I should either compute the spare table in the kernel,
1449 * or have a different -- XXX XXX -- interface (a different
1450 * character device) for delivering the table -- XXX */
1451 #if 0
1452 case RAIDFRAME_SPARET_WAIT:
1453 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1454 while (!rf_sparet_wait_queue)
1455 mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE);
1456 waitreq = rf_sparet_wait_queue;
1457 rf_sparet_wait_queue = rf_sparet_wait_queue->next;
1458 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1459
1460 /* structure assignment */
1461 *((RF_SparetWait_t *) data) = *waitreq;
1462
1463 RF_Free(waitreq, sizeof(*waitreq));
1464 return (0);
1465
1466 /* wakes up a process waiting on SPARET_WAIT and puts an error
1467 * code in it that will cause the dameon to exit */
1468 case RAIDFRAME_ABORT_SPARET_WAIT:
1469 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1470 waitreq->fcol = -1;
1471 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1472 waitreq->next = rf_sparet_wait_queue;
1473 rf_sparet_wait_queue = waitreq;
1474 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1475 wakeup(&rf_sparet_wait_queue);
1476 return (0);
1477
1478 /* used by the spare table daemon to deliver a spare table
1479 * into the kernel */
1480 case RAIDFRAME_SEND_SPARET:
1481
1482 /* install the spare table */
1483 retcode = rf_SetSpareTable(raidPtr, *(void **) data);
1484
1485 /* respond to the requestor. the return status of the spare
1486 * table installation is passed in the "fcol" field */
1487 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1488 waitreq->fcol = retcode;
1489 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1490 waitreq->next = rf_sparet_resp_queue;
1491 rf_sparet_resp_queue = waitreq;
1492 wakeup(&rf_sparet_resp_queue);
1493 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1494
1495 return (retcode);
1496 #endif
1497
1498 default:
1499 break; /* fall through to the os-specific code below */
1500
1501 }
1502
1503 if (!raidPtr->valid)
1504 return (EINVAL);
1505
1506 /*
1507 * Add support for "regular" device ioctls here.
1508 */
1509
1510 switch (cmd) {
1511 case DIOCGDINFO:
1512 *(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
1513 break;
1514 #ifdef __HAVE_OLD_DISKLABEL
1515 case ODIOCGDINFO:
1516 newlabel = *(rs->sc_dkdev.dk_label);
1517 if (newlabel.d_npartitions > OLDMAXPARTITIONS)
1518 return ENOTTY;
1519 memcpy(data, &newlabel, sizeof (struct olddisklabel));
1520 break;
1521 #endif
1522
1523 case DIOCGPART:
1524 ((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
1525 ((struct partinfo *) data)->part =
1526 &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
1527 break;
1528
1529 case DIOCWDINFO:
1530 case DIOCSDINFO:
1531 #ifdef __HAVE_OLD_DISKLABEL
1532 case ODIOCWDINFO:
1533 case ODIOCSDINFO:
1534 #endif
1535 {
1536 struct disklabel *lp;
1537 #ifdef __HAVE_OLD_DISKLABEL
1538 if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) {
1539 memset(&newlabel, 0, sizeof newlabel);
1540 memcpy(&newlabel, data, sizeof (struct olddisklabel));
1541 lp = &newlabel;
1542 } else
1543 #endif
1544 lp = (struct disklabel *)data;
1545
1546 if ((error = raidlock(rs)) != 0)
1547 return (error);
1548
1549 rs->sc_flags |= RAIDF_LABELLING;
1550
1551 error = setdisklabel(rs->sc_dkdev.dk_label,
1552 lp, 0, rs->sc_dkdev.dk_cpulabel);
1553 if (error == 0) {
1554 if (cmd == DIOCWDINFO
1555 #ifdef __HAVE_OLD_DISKLABEL
1556 || cmd == ODIOCWDINFO
1557 #endif
1558 )
1559 error = writedisklabel(RAIDLABELDEV(dev),
1560 raidstrategy, rs->sc_dkdev.dk_label,
1561 rs->sc_dkdev.dk_cpulabel);
1562 }
1563 rs->sc_flags &= ~RAIDF_LABELLING;
1564
1565 raidunlock(rs);
1566
1567 if (error)
1568 return (error);
1569 break;
1570 }
1571
1572 case DIOCWLABEL:
1573 if (*(int *) data != 0)
1574 rs->sc_flags |= RAIDF_WLABEL;
1575 else
1576 rs->sc_flags &= ~RAIDF_WLABEL;
1577 break;
1578
1579 case DIOCGDEFLABEL:
1580 raidgetdefaultlabel(raidPtr, rs, (struct disklabel *) data);
1581 break;
1582
1583 #ifdef __HAVE_OLD_DISKLABEL
1584 case ODIOCGDEFLABEL:
1585 raidgetdefaultlabel(raidPtr, rs, &newlabel);
1586 if (newlabel.d_npartitions > OLDMAXPARTITIONS)
1587 return ENOTTY;
1588 memcpy(data, &newlabel, sizeof (struct olddisklabel));
1589 break;
1590 #endif
1591
1592 default:
1593 retcode = ENOTTY;
1594 }
1595 return (retcode);
1596
1597 }
1598
1599
1600 /* raidinit -- complete the rest of the initialization for the
1601 RAIDframe device. */
1602
1603
1604 static void
1605 raidinit(raidPtr)
1606 RF_Raid_t *raidPtr;
1607 {
1608 struct raid_softc *rs;
1609 int unit;
1610
1611 unit = raidPtr->raidid;
1612
1613 rs = &raid_softc[unit];
1614
1615 /* XXX should check return code first... */
1616 rs->sc_flags |= RAIDF_INITED;
1617
1618 sprintf(rs->sc_xname, "raid%d", unit); /* XXX doesn't check bounds. */
1619
1620 rs->sc_dkdev.dk_name = rs->sc_xname;
1621
1622 /* disk_attach actually creates space for the CPU disklabel, among
1623 * other things, so it's critical to call this *BEFORE* we try putzing
1624 * with disklabels. */
1625
1626 disk_attach(&rs->sc_dkdev);
1627
1628 /* XXX There may be a weird interaction here between this, and
1629 * protectedSectors, as used in RAIDframe. */
1630
1631 rs->sc_size = raidPtr->totalSectors;
1632
1633 }
1634
1635 /* wake up the daemon & tell it to get us a spare table
1636 * XXX
1637 * the entries in the queues should be tagged with the raidPtr
1638 * so that in the extremely rare case that two recons happen at once,
1639 * we know for which device were requesting a spare table
1640 * XXX
1641 *
1642 * XXX This code is not currently used. GO
1643 */
1644 int
1645 rf_GetSpareTableFromDaemon(req)
1646 RF_SparetWait_t *req;
1647 {
1648 int retcode;
1649
1650 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1651 req->next = rf_sparet_wait_queue;
1652 rf_sparet_wait_queue = req;
1653 wakeup(&rf_sparet_wait_queue);
1654
1655 /* mpsleep unlocks the mutex */
1656 while (!rf_sparet_resp_queue) {
1657 tsleep(&rf_sparet_resp_queue, PRIBIO,
1658 "raidframe getsparetable", 0);
1659 }
1660 req = rf_sparet_resp_queue;
1661 rf_sparet_resp_queue = req->next;
1662 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1663
1664 retcode = req->fcol;
1665 RF_Free(req, sizeof(*req)); /* this is not the same req as we
1666 * alloc'd */
1667 return (retcode);
1668 }
1669
1670 /* a wrapper around rf_DoAccess that extracts appropriate info from the
1671 * bp & passes it down.
1672 * any calls originating in the kernel must use non-blocking I/O
1673 * do some extra sanity checking to return "appropriate" error values for
1674 * certain conditions (to make some standard utilities work)
1675 *
1676 * Formerly known as: rf_DoAccessKernel
1677 */
1678 void
1679 raidstart(raidPtr)
1680 RF_Raid_t *raidPtr;
1681 {
1682 RF_SectorCount_t num_blocks, pb, sum;
1683 RF_RaidAddr_t raid_addr;
1684 int retcode;
1685 struct partition *pp;
1686 daddr_t blocknum;
1687 int unit;
1688 struct raid_softc *rs;
1689 int do_async;
1690 struct buf *bp;
1691
1692 unit = raidPtr->raidid;
1693 rs = &raid_softc[unit];
1694
1695 /* quick check to see if anything has died recently */
1696 RF_LOCK_MUTEX(raidPtr->mutex);
1697 if (raidPtr->numNewFailures > 0) {
1698 rf_update_component_labels(raidPtr,
1699 RF_NORMAL_COMPONENT_UPDATE);
1700 raidPtr->numNewFailures--;
1701 }
1702
1703 /* Check to see if we're at the limit... */
1704 while (raidPtr->openings > 0) {
1705 RF_UNLOCK_MUTEX(raidPtr->mutex);
1706
1707 /* get the next item, if any, from the queue */
1708 if ((bp = BUFQ_GET(&rs->buf_queue)) == NULL) {
1709 /* nothing more to do */
1710 return;
1711 }
1712
1713 /* Ok, for the bp we have here, bp->b_blkno is relative to the
1714 * partition.. Need to make it absolute to the underlying
1715 * device.. */
1716
1717 blocknum = bp->b_blkno;
1718 if (DISKPART(bp->b_dev) != RAW_PART) {
1719 pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
1720 blocknum += pp->p_offset;
1721 }
1722
1723 db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
1724 (int) blocknum));
1725
1726 db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
1727 db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
1728
1729 /* *THIS* is where we adjust what block we're going to...
1730 * but DO NOT TOUCH bp->b_blkno!!! */
1731 raid_addr = blocknum;
1732
1733 num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
1734 pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
1735 sum = raid_addr + num_blocks + pb;
1736 if (1 || rf_debugKernelAccess) {
1737 db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
1738 (int) raid_addr, (int) sum, (int) num_blocks,
1739 (int) pb, (int) bp->b_resid));
1740 }
1741 if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
1742 || (sum < num_blocks) || (sum < pb)) {
1743 bp->b_error = ENOSPC;
1744 bp->b_flags |= B_ERROR;
1745 bp->b_resid = bp->b_bcount;
1746 biodone(bp);
1747 RF_LOCK_MUTEX(raidPtr->mutex);
1748 continue;
1749 }
1750 /*
1751 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
1752 */
1753
1754 if (bp->b_bcount & raidPtr->sectorMask) {
1755 bp->b_error = EINVAL;
1756 bp->b_flags |= B_ERROR;
1757 bp->b_resid = bp->b_bcount;
1758 biodone(bp);
1759 RF_LOCK_MUTEX(raidPtr->mutex);
1760 continue;
1761
1762 }
1763 db1_printf(("Calling DoAccess..\n"));
1764
1765
1766 RF_LOCK_MUTEX(raidPtr->mutex);
1767 raidPtr->openings--;
1768 RF_UNLOCK_MUTEX(raidPtr->mutex);
1769
1770 /*
1771 * Everything is async.
1772 */
1773 do_async = 1;
1774
1775 disk_busy(&rs->sc_dkdev);
1776
1777 /* XXX we're still at splbio() here... do we *really*
1778 need to be? */
1779
1780 /* don't ever condition on bp->b_flags & B_WRITE.
1781 * always condition on B_READ instead */
1782
1783 retcode = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
1784 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
1785 do_async, raid_addr, num_blocks,
1786 bp->b_data, bp, RF_DAG_NONBLOCKING_IO);
1787
1788 RF_LOCK_MUTEX(raidPtr->mutex);
1789 }
1790 RF_UNLOCK_MUTEX(raidPtr->mutex);
1791 }
1792
1793
1794
1795
1796 /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */
1797
1798 int
1799 rf_DispatchKernelIO(queue, req)
1800 RF_DiskQueue_t *queue;
1801 RF_DiskQueueData_t *req;
1802 {
1803 int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
1804 struct buf *bp;
1805 struct raidbuf *raidbp = NULL;
1806
1807 req->queue = queue;
1808
1809 #if DIAGNOSTIC
1810 if (queue->raidPtr->raidid >= numraid) {
1811 printf("Invalid unit number: %d %d\n", queue->raidPtr->raidid,
1812 numraid);
1813 panic("Invalid Unit number in rf_DispatchKernelIO");
1814 }
1815 #endif
1816
1817 bp = req->bp;
1818 #if 1
1819 /* XXX when there is a physical disk failure, someone is passing us a
1820 * buffer that contains old stuff!! Attempt to deal with this problem
1821 * without taking a performance hit... (not sure where the real bug
1822 * is. It's buried in RAIDframe somewhere) :-( GO ) */
1823
1824 if (bp->b_flags & B_ERROR) {
1825 bp->b_flags &= ~B_ERROR;
1826 }
1827 if (bp->b_error != 0) {
1828 bp->b_error = 0;
1829 }
1830 #endif
1831 raidbp = pool_get(&raidframe_cbufpool, PR_NOWAIT);
1832
1833 /*
1834 * context for raidiodone
1835 */
1836 raidbp->rf_obp = bp;
1837 raidbp->req = req;
1838
1839 LIST_INIT(&raidbp->rf_buf.b_dep);
1840
1841 switch (req->type) {
1842 case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */
1843 /* XXX need to do something extra here.. */
1844 /* I'm leaving this in, as I've never actually seen it used,
1845 * and I'd like folks to report it... GO */
1846 printf(("WAKEUP CALLED\n"));
1847 queue->numOutstanding++;
1848
1849 /* XXX need to glue the original buffer into this?? */
1850
1851 KernelWakeupFunc(&raidbp->rf_buf);
1852 break;
1853
1854 case RF_IO_TYPE_READ:
1855 case RF_IO_TYPE_WRITE:
1856
1857 if (req->tracerec) {
1858 RF_ETIMER_START(req->tracerec->timer);
1859 }
1860 InitBP(&raidbp->rf_buf, queue->rf_cinfo->ci_vp,
1861 op | bp->b_flags, queue->rf_cinfo->ci_dev,
1862 req->sectorOffset, req->numSector,
1863 req->buf, KernelWakeupFunc, (void *) req,
1864 queue->raidPtr->logBytesPerSector, req->b_proc);
1865
1866 if (rf_debugKernelAccess) {
1867 db1_printf(("dispatch: bp->b_blkno = %ld\n",
1868 (long) bp->b_blkno));
1869 }
1870 queue->numOutstanding++;
1871 queue->last_deq_sector = req->sectorOffset;
1872 /* acc wouldn't have been let in if there were any pending
1873 * reqs at any other priority */
1874 queue->curPriority = req->priority;
1875
1876 db1_printf(("Going for %c to unit %d row %d col %d\n",
1877 req->type, queue->raidPtr->raidid,
1878 queue->row, queue->col));
1879 db1_printf(("sector %d count %d (%d bytes) %d\n",
1880 (int) req->sectorOffset, (int) req->numSector,
1881 (int) (req->numSector <<
1882 queue->raidPtr->logBytesPerSector),
1883 (int) queue->raidPtr->logBytesPerSector));
1884 if ((raidbp->rf_buf.b_flags & B_READ) == 0) {
1885 raidbp->rf_buf.b_vp->v_numoutput++;
1886 }
1887 VOP_STRATEGY(&raidbp->rf_buf);
1888
1889 break;
1890
1891 default:
1892 panic("bad req->type in rf_DispatchKernelIO");
1893 }
1894 db1_printf(("Exiting from DispatchKernelIO\n"));
1895
1896 return (0);
1897 }
1898 /* this is the callback function associated with a I/O invoked from
1899 kernel code.
1900 */
1901 static void
1902 KernelWakeupFunc(vbp)
1903 struct buf *vbp;
1904 {
1905 RF_DiskQueueData_t *req = NULL;
1906 RF_DiskQueue_t *queue;
1907 struct raidbuf *raidbp = (struct raidbuf *) vbp;
1908 struct buf *bp;
1909 int s;
1910
1911 s = splbio();
1912 db1_printf(("recovering the request queue:\n"));
1913 req = raidbp->req;
1914
1915 bp = raidbp->rf_obp;
1916
1917 queue = (RF_DiskQueue_t *) req->queue;
1918
1919 if (raidbp->rf_buf.b_flags & B_ERROR) {
1920 bp->b_flags |= B_ERROR;
1921 bp->b_error = raidbp->rf_buf.b_error ?
1922 raidbp->rf_buf.b_error : EIO;
1923 }
1924
1925 /* XXX methinks this could be wrong... */
1926 #if 1
1927 bp->b_resid = raidbp->rf_buf.b_resid;
1928 #endif
1929
1930 if (req->tracerec) {
1931 RF_ETIMER_STOP(req->tracerec->timer);
1932 RF_ETIMER_EVAL(req->tracerec->timer);
1933 RF_LOCK_MUTEX(rf_tracing_mutex);
1934 req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1935 req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1936 req->tracerec->num_phys_ios++;
1937 RF_UNLOCK_MUTEX(rf_tracing_mutex);
1938 }
1939 bp->b_bcount = raidbp->rf_buf.b_bcount; /* XXXX ?? */
1940
1941 /* XXX Ok, let's get aggressive... If B_ERROR is set, let's go
1942 * ballistic, and mark the component as hosed... */
1943
1944 if (bp->b_flags & B_ERROR) {
1945 /* Mark the disk as dead */
1946 /* but only mark it once... */
1947 if (queue->raidPtr->Disks[queue->row][queue->col].status ==
1948 rf_ds_optimal) {
1949 printf("raid%d: IO Error. Marking %s as failed.\n",
1950 queue->raidPtr->raidid,
1951 queue->raidPtr->Disks[queue->row][queue->col].devname);
1952 queue->raidPtr->Disks[queue->row][queue->col].status =
1953 rf_ds_failed;
1954 queue->raidPtr->status[queue->row] = rf_rs_degraded;
1955 queue->raidPtr->numFailures++;
1956 queue->raidPtr->numNewFailures++;
1957 } else { /* Disk is already dead... */
1958 /* printf("Disk already marked as dead!\n"); */
1959 }
1960
1961 }
1962
1963 pool_put(&raidframe_cbufpool, raidbp);
1964
1965 /* Fill in the error value */
1966
1967 req->error = (bp->b_flags & B_ERROR) ? bp->b_error : 0;
1968
1969 simple_lock(&queue->raidPtr->iodone_lock);
1970
1971 /* Drop this one on the "finished" queue... */
1972 TAILQ_INSERT_TAIL(&(queue->raidPtr->iodone), req, iodone_entries);
1973
1974 /* Let the raidio thread know there is work to be done. */
1975 wakeup(&(queue->raidPtr->iodone));
1976
1977 simple_unlock(&queue->raidPtr->iodone_lock);
1978
1979 splx(s);
1980 }
1981
1982
1983
1984 /*
1985 * initialize a buf structure for doing an I/O in the kernel.
1986 */
1987 static void
1988 InitBP(bp, b_vp, rw_flag, dev, startSect, numSect, buf, cbFunc, cbArg,
1989 logBytesPerSector, b_proc)
1990 struct buf *bp;
1991 struct vnode *b_vp;
1992 unsigned rw_flag;
1993 dev_t dev;
1994 RF_SectorNum_t startSect;
1995 RF_SectorCount_t numSect;
1996 caddr_t buf;
1997 void (*cbFunc) (struct buf *);
1998 void *cbArg;
1999 int logBytesPerSector;
2000 struct proc *b_proc;
2001 {
2002 /* bp->b_flags = B_PHYS | rw_flag; */
2003 bp->b_flags = B_CALL | rw_flag; /* XXX need B_PHYS here too??? */
2004 bp->b_bcount = numSect << logBytesPerSector;
2005 bp->b_bufsize = bp->b_bcount;
2006 bp->b_error = 0;
2007 bp->b_dev = dev;
2008 bp->b_data = buf;
2009 bp->b_blkno = startSect;
2010 bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */
2011 if (bp->b_bcount == 0) {
2012 panic("bp->b_bcount is zero in InitBP!!");
2013 }
2014 bp->b_proc = b_proc;
2015 bp->b_iodone = cbFunc;
2016 bp->b_vp = b_vp;
2017
2018 }
2019
2020 static void
2021 raidgetdefaultlabel(raidPtr, rs, lp)
2022 RF_Raid_t *raidPtr;
2023 struct raid_softc *rs;
2024 struct disklabel *lp;
2025 {
2026 memset(lp, 0, sizeof(*lp));
2027
2028 /* fabricate a label... */
2029 lp->d_secperunit = raidPtr->totalSectors;
2030 lp->d_secsize = raidPtr->bytesPerSector;
2031 lp->d_nsectors = raidPtr->Layout.dataSectorsPerStripe;
2032 lp->d_ntracks = 4 * raidPtr->numCol;
2033 lp->d_ncylinders = raidPtr->totalSectors /
2034 (lp->d_nsectors * lp->d_ntracks);
2035 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
2036
2037 strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
2038 lp->d_type = DTYPE_RAID;
2039 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
2040 lp->d_rpm = 3600;
2041 lp->d_interleave = 1;
2042 lp->d_flags = 0;
2043
2044 lp->d_partitions[RAW_PART].p_offset = 0;
2045 lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors;
2046 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
2047 lp->d_npartitions = RAW_PART + 1;
2048
2049 lp->d_magic = DISKMAGIC;
2050 lp->d_magic2 = DISKMAGIC;
2051 lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
2052
2053 }
2054 /*
2055 * Read the disklabel from the raid device. If one is not present, fake one
2056 * up.
2057 */
2058 static void
2059 raidgetdisklabel(dev)
2060 dev_t dev;
2061 {
2062 int unit = raidunit(dev);
2063 struct raid_softc *rs = &raid_softc[unit];
2064 char *errstring;
2065 struct disklabel *lp = rs->sc_dkdev.dk_label;
2066 struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel;
2067 RF_Raid_t *raidPtr;
2068
2069 db1_printf(("Getting the disklabel...\n"));
2070
2071 memset(clp, 0, sizeof(*clp));
2072
2073 raidPtr = raidPtrs[unit];
2074
2075 raidgetdefaultlabel(raidPtr, rs, lp);
2076
2077 /*
2078 * Call the generic disklabel extraction routine.
2079 */
2080 errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
2081 rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
2082 if (errstring)
2083 raidmakedisklabel(rs);
2084 else {
2085 int i;
2086 struct partition *pp;
2087
2088 /*
2089 * Sanity check whether the found disklabel is valid.
2090 *
2091 * This is necessary since total size of the raid device
2092 * may vary when an interleave is changed even though exactly
2093 * same componets are used, and old disklabel may used
2094 * if that is found.
2095 */
2096 if (lp->d_secperunit != rs->sc_size)
2097 printf("raid%d: WARNING: %s: "
2098 "total sector size in disklabel (%d) != "
2099 "the size of raid (%ld)\n", unit, rs->sc_xname,
2100 lp->d_secperunit, (long) rs->sc_size);
2101 for (i = 0; i < lp->d_npartitions; i++) {
2102 pp = &lp->d_partitions[i];
2103 if (pp->p_offset + pp->p_size > rs->sc_size)
2104 printf("raid%d: WARNING: %s: end of partition `%c' "
2105 "exceeds the size of raid (%ld)\n",
2106 unit, rs->sc_xname, 'a' + i, (long) rs->sc_size);
2107 }
2108 }
2109
2110 }
2111 /*
2112 * Take care of things one might want to take care of in the event
2113 * that a disklabel isn't present.
2114 */
2115 static void
2116 raidmakedisklabel(rs)
2117 struct raid_softc *rs;
2118 {
2119 struct disklabel *lp = rs->sc_dkdev.dk_label;
2120 db1_printf(("Making a label..\n"));
2121
2122 /*
2123 * For historical reasons, if there's no disklabel present
2124 * the raw partition must be marked FS_BSDFFS.
2125 */
2126
2127 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
2128
2129 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
2130
2131 lp->d_checksum = dkcksum(lp);
2132 }
2133 /*
2134 * Lookup the provided name in the filesystem. If the file exists,
2135 * is a valid block device, and isn't being used by anyone else,
2136 * set *vpp to the file's vnode.
2137 * You'll find the original of this in ccd.c
2138 */
2139 int
2140 raidlookup(path, p, vpp)
2141 char *path;
2142 struct proc *p;
2143 struct vnode **vpp; /* result */
2144 {
2145 struct nameidata nd;
2146 struct vnode *vp;
2147 struct vattr va;
2148 int error;
2149
2150 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, path, p);
2151 if ((error = vn_open(&nd, FREAD | FWRITE, 0)) != 0) {
2152 return (error);
2153 }
2154 vp = nd.ni_vp;
2155 if (vp->v_usecount > 1) {
2156 VOP_UNLOCK(vp, 0);
2157 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2158 return (EBUSY);
2159 }
2160 if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)) != 0) {
2161 VOP_UNLOCK(vp, 0);
2162 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2163 return (error);
2164 }
2165 /* XXX: eventually we should handle VREG, too. */
2166 if (va.va_type != VBLK) {
2167 VOP_UNLOCK(vp, 0);
2168 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2169 return (ENOTBLK);
2170 }
2171 VOP_UNLOCK(vp, 0);
2172 *vpp = vp;
2173 return (0);
2174 }
2175 /*
2176 * Wait interruptibly for an exclusive lock.
2177 *
2178 * XXX
2179 * Several drivers do this; it should be abstracted and made MP-safe.
2180 * (Hmm... where have we seen this warning before :-> GO )
2181 */
2182 static int
2183 raidlock(rs)
2184 struct raid_softc *rs;
2185 {
2186 int error;
2187
2188 while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
2189 rs->sc_flags |= RAIDF_WANTED;
2190 if ((error =
2191 tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
2192 return (error);
2193 }
2194 rs->sc_flags |= RAIDF_LOCKED;
2195 return (0);
2196 }
2197 /*
2198 * Unlock and wake up any waiters.
2199 */
2200 static void
2201 raidunlock(rs)
2202 struct raid_softc *rs;
2203 {
2204
2205 rs->sc_flags &= ~RAIDF_LOCKED;
2206 if ((rs->sc_flags & RAIDF_WANTED) != 0) {
2207 rs->sc_flags &= ~RAIDF_WANTED;
2208 wakeup(rs);
2209 }
2210 }
2211
2212
2213 #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */
2214 #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */
2215
2216 int
2217 raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter)
2218 {
2219 RF_ComponentLabel_t clabel;
2220 raidread_component_label(dev, b_vp, &clabel);
2221 clabel.mod_counter = mod_counter;
2222 clabel.clean = RF_RAID_CLEAN;
2223 raidwrite_component_label(dev, b_vp, &clabel);
2224 return(0);
2225 }
2226
2227
2228 int
2229 raidmarkdirty(dev_t dev, struct vnode *b_vp, int mod_counter)
2230 {
2231 RF_ComponentLabel_t clabel;
2232 raidread_component_label(dev, b_vp, &clabel);
2233 clabel.mod_counter = mod_counter;
2234 clabel.clean = RF_RAID_DIRTY;
2235 raidwrite_component_label(dev, b_vp, &clabel);
2236 return(0);
2237 }
2238
2239 /* ARGSUSED */
2240 int
2241 raidread_component_label(dev, b_vp, clabel)
2242 dev_t dev;
2243 struct vnode *b_vp;
2244 RF_ComponentLabel_t *clabel;
2245 {
2246 struct buf *bp;
2247 const struct bdevsw *bdev;
2248 int error;
2249
2250 /* XXX should probably ensure that we don't try to do this if
2251 someone has changed rf_protected_sectors. */
2252
2253 if (b_vp == NULL) {
2254 /* For whatever reason, this component is not valid.
2255 Don't try to read a component label from it. */
2256 return(EINVAL);
2257 }
2258
2259 /* get a block of the appropriate size... */
2260 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2261 bp->b_dev = dev;
2262
2263 /* get our ducks in a row for the read */
2264 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2265 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2266 bp->b_flags |= B_READ;
2267 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2268
2269 bdev = bdevsw_lookup(bp->b_dev);
2270 if (bdev == NULL)
2271 return (ENXIO);
2272 (*bdev->d_strategy)(bp);
2273
2274 error = biowait(bp);
2275
2276 if (!error) {
2277 memcpy(clabel, bp->b_data,
2278 sizeof(RF_ComponentLabel_t));
2279 }
2280
2281 brelse(bp);
2282 return(error);
2283 }
2284 /* ARGSUSED */
2285 int
2286 raidwrite_component_label(dev, b_vp, clabel)
2287 dev_t dev;
2288 struct vnode *b_vp;
2289 RF_ComponentLabel_t *clabel;
2290 {
2291 struct buf *bp;
2292 const struct bdevsw *bdev;
2293 int error;
2294
2295 /* get a block of the appropriate size... */
2296 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2297 bp->b_dev = dev;
2298
2299 /* get our ducks in a row for the write */
2300 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2301 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2302 bp->b_flags |= B_WRITE;
2303 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2304
2305 memset(bp->b_data, 0, RF_COMPONENT_INFO_SIZE );
2306
2307 memcpy(bp->b_data, clabel, sizeof(RF_ComponentLabel_t));
2308
2309 bdev = bdevsw_lookup(bp->b_dev);
2310 if (bdev == NULL)
2311 return (ENXIO);
2312 (*bdev->d_strategy)(bp);
2313 error = biowait(bp);
2314 brelse(bp);
2315 if (error) {
2316 #if 1
2317 printf("Failed to write RAID component info!\n");
2318 #endif
2319 }
2320
2321 return(error);
2322 }
2323
2324 void
2325 rf_markalldirty(raidPtr)
2326 RF_Raid_t *raidPtr;
2327 {
2328 RF_ComponentLabel_t clabel;
2329 int sparecol;
2330 int r,c;
2331 int i,j;
2332 int srow, scol;
2333
2334 raidPtr->mod_counter++;
2335 for (r = 0; r < raidPtr->numRow; r++) {
2336 for (c = 0; c < raidPtr->numCol; c++) {
2337 /* we don't want to touch (at all) a disk that has
2338 failed */
2339 if (!RF_DEAD_DISK(raidPtr->Disks[r][c].status)) {
2340 raidread_component_label(
2341 raidPtr->Disks[r][c].dev,
2342 raidPtr->raid_cinfo[r][c].ci_vp,
2343 &clabel);
2344 if (clabel.status == rf_ds_spared) {
2345 /* XXX do something special...
2346 but whatever you do, don't
2347 try to access it!! */
2348 } else {
2349 raidmarkdirty(
2350 raidPtr->Disks[r][c].dev,
2351 raidPtr->raid_cinfo[r][c].ci_vp,
2352 raidPtr->mod_counter);
2353 }
2354 }
2355 }
2356 }
2357
2358 for( c = 0; c < raidPtr->numSpare ; c++) {
2359 sparecol = raidPtr->numCol + c;
2360 if (raidPtr->Disks[0][sparecol].status == rf_ds_used_spare) {
2361 /*
2362
2363 we claim this disk is "optimal" if it's
2364 rf_ds_used_spare, as that means it should be
2365 directly substitutable for the disk it replaced.
2366 We note that too...
2367
2368 */
2369
2370 for(i=0;i<raidPtr->numRow;i++) {
2371 for(j=0;j<raidPtr->numCol;j++) {
2372 if ((raidPtr->Disks[i][j].spareRow ==
2373 0) &&
2374 (raidPtr->Disks[i][j].spareCol ==
2375 sparecol)) {
2376 srow = i;
2377 scol = j;
2378 break;
2379 }
2380 }
2381 }
2382
2383 raidread_component_label(
2384 raidPtr->Disks[0][sparecol].dev,
2385 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2386 &clabel);
2387 /* make sure status is noted */
2388
2389 raid_init_component_label(raidPtr, &clabel);
2390
2391 clabel.row = srow;
2392 clabel.column = scol;
2393 /* Note: we *don't* change status from rf_ds_used_spare
2394 to rf_ds_optimal */
2395 /* clabel.status = rf_ds_optimal; */
2396
2397 raidmarkdirty(raidPtr->Disks[0][sparecol].dev,
2398 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2399 raidPtr->mod_counter);
2400 }
2401 }
2402 }
2403
2404
2405 void
2406 rf_update_component_labels(raidPtr, final)
2407 RF_Raid_t *raidPtr;
2408 int final;
2409 {
2410 RF_ComponentLabel_t clabel;
2411 int sparecol;
2412 int r,c;
2413 int i,j;
2414 int srow, scol;
2415
2416 srow = -1;
2417 scol = -1;
2418
2419 /* XXX should do extra checks to make sure things really are clean,
2420 rather than blindly setting the clean bit... */
2421
2422 raidPtr->mod_counter++;
2423
2424 for (r = 0; r < raidPtr->numRow; r++) {
2425 for (c = 0; c < raidPtr->numCol; c++) {
2426 if (raidPtr->Disks[r][c].status == rf_ds_optimal) {
2427 raidread_component_label(
2428 raidPtr->Disks[r][c].dev,
2429 raidPtr->raid_cinfo[r][c].ci_vp,
2430 &clabel);
2431 /* make sure status is noted */
2432 clabel.status = rf_ds_optimal;
2433 /* bump the counter */
2434 clabel.mod_counter = raidPtr->mod_counter;
2435
2436 raidwrite_component_label(
2437 raidPtr->Disks[r][c].dev,
2438 raidPtr->raid_cinfo[r][c].ci_vp,
2439 &clabel);
2440 if (final == RF_FINAL_COMPONENT_UPDATE) {
2441 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2442 raidmarkclean(
2443 raidPtr->Disks[r][c].dev,
2444 raidPtr->raid_cinfo[r][c].ci_vp,
2445 raidPtr->mod_counter);
2446 }
2447 }
2448 }
2449 /* else we don't touch it.. */
2450 }
2451 }
2452
2453 for( c = 0; c < raidPtr->numSpare ; c++) {
2454 sparecol = raidPtr->numCol + c;
2455 /* Need to ensure that the reconstruct actually completed! */
2456 if (raidPtr->Disks[0][sparecol].status == rf_ds_used_spare) {
2457 /*
2458
2459 we claim this disk is "optimal" if it's
2460 rf_ds_used_spare, as that means it should be
2461 directly substitutable for the disk it replaced.
2462 We note that too...
2463
2464 */
2465
2466 for(i=0;i<raidPtr->numRow;i++) {
2467 for(j=0;j<raidPtr->numCol;j++) {
2468 if ((raidPtr->Disks[i][j].spareRow ==
2469 0) &&
2470 (raidPtr->Disks[i][j].spareCol ==
2471 sparecol)) {
2472 srow = i;
2473 scol = j;
2474 break;
2475 }
2476 }
2477 }
2478
2479 /* XXX shouldn't *really* need this... */
2480 raidread_component_label(
2481 raidPtr->Disks[0][sparecol].dev,
2482 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2483 &clabel);
2484 /* make sure status is noted */
2485
2486 raid_init_component_label(raidPtr, &clabel);
2487
2488 clabel.mod_counter = raidPtr->mod_counter;
2489 clabel.row = srow;
2490 clabel.column = scol;
2491 clabel.status = rf_ds_optimal;
2492
2493 raidwrite_component_label(
2494 raidPtr->Disks[0][sparecol].dev,
2495 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2496 &clabel);
2497 if (final == RF_FINAL_COMPONENT_UPDATE) {
2498 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2499 raidmarkclean( raidPtr->Disks[0][sparecol].dev,
2500 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2501 raidPtr->mod_counter);
2502 }
2503 }
2504 }
2505 }
2506 }
2507
2508 void
2509 rf_close_component(raidPtr, vp, auto_configured)
2510 RF_Raid_t *raidPtr;
2511 struct vnode *vp;
2512 int auto_configured;
2513 {
2514 struct proc *p;
2515
2516 p = raidPtr->engine_thread;
2517
2518 if (vp != NULL) {
2519 if (auto_configured == 1) {
2520 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2521 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
2522 vput(vp);
2523
2524 } else {
2525 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2526 }
2527 }
2528 }
2529
2530
2531 void
2532 rf_UnconfigureVnodes(raidPtr)
2533 RF_Raid_t *raidPtr;
2534 {
2535 int r,c;
2536 struct vnode *vp;
2537 int acd;
2538
2539
2540 /* We take this opportunity to close the vnodes like we should.. */
2541
2542 for (r = 0; r < raidPtr->numRow; r++) {
2543 for (c = 0; c < raidPtr->numCol; c++) {
2544 vp = raidPtr->raid_cinfo[r][c].ci_vp;
2545 acd = raidPtr->Disks[r][c].auto_configured;
2546 rf_close_component(raidPtr, vp, acd);
2547 raidPtr->raid_cinfo[r][c].ci_vp = NULL;
2548 raidPtr->Disks[r][c].auto_configured = 0;
2549 }
2550 }
2551 for (r = 0; r < raidPtr->numSpare; r++) {
2552 vp = raidPtr->raid_cinfo[0][raidPtr->numCol + r].ci_vp;
2553 acd = raidPtr->Disks[0][raidPtr->numCol + r].auto_configured;
2554 rf_close_component(raidPtr, vp, acd);
2555 raidPtr->raid_cinfo[0][raidPtr->numCol + r].ci_vp = NULL;
2556 raidPtr->Disks[0][raidPtr->numCol + r].auto_configured = 0;
2557 }
2558 }
2559
2560
2561 void
2562 rf_ReconThread(req)
2563 struct rf_recon_req *req;
2564 {
2565 int s;
2566 RF_Raid_t *raidPtr;
2567
2568 s = splbio();
2569 raidPtr = (RF_Raid_t *) req->raidPtr;
2570 raidPtr->recon_in_progress = 1;
2571
2572 rf_FailDisk((RF_Raid_t *) req->raidPtr, req->row, req->col,
2573 ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
2574
2575 /* XXX get rid of this! we don't need it at all.. */
2576 RF_Free(req, sizeof(*req));
2577
2578 raidPtr->recon_in_progress = 0;
2579 splx(s);
2580
2581 /* That's all... */
2582 kthread_exit(0); /* does not return */
2583 }
2584
2585 void
2586 rf_RewriteParityThread(raidPtr)
2587 RF_Raid_t *raidPtr;
2588 {
2589 int retcode;
2590 int s;
2591
2592 raidPtr->parity_rewrite_in_progress = 1;
2593 s = splbio();
2594 retcode = rf_RewriteParity(raidPtr);
2595 splx(s);
2596 if (retcode) {
2597 printf("raid%d: Error re-writing parity!\n",raidPtr->raidid);
2598 } else {
2599 /* set the clean bit! If we shutdown correctly,
2600 the clean bit on each component label will get
2601 set */
2602 raidPtr->parity_good = RF_RAID_CLEAN;
2603 }
2604 raidPtr->parity_rewrite_in_progress = 0;
2605
2606 /* Anyone waiting for us to stop? If so, inform them... */
2607 if (raidPtr->waitShutdown) {
2608 wakeup(&raidPtr->parity_rewrite_in_progress);
2609 }
2610
2611 /* That's all... */
2612 kthread_exit(0); /* does not return */
2613 }
2614
2615
2616 void
2617 rf_CopybackThread(raidPtr)
2618 RF_Raid_t *raidPtr;
2619 {
2620 int s;
2621
2622 raidPtr->copyback_in_progress = 1;
2623 s = splbio();
2624 rf_CopybackReconstructedData(raidPtr);
2625 splx(s);
2626 raidPtr->copyback_in_progress = 0;
2627
2628 /* That's all... */
2629 kthread_exit(0); /* does not return */
2630 }
2631
2632
2633 void
2634 rf_ReconstructInPlaceThread(req)
2635 struct rf_recon_req *req;
2636 {
2637 int retcode;
2638 int s;
2639 RF_Raid_t *raidPtr;
2640
2641 s = splbio();
2642 raidPtr = req->raidPtr;
2643 raidPtr->recon_in_progress = 1;
2644 retcode = rf_ReconstructInPlace(raidPtr, req->row, req->col);
2645 RF_Free(req, sizeof(*req));
2646 raidPtr->recon_in_progress = 0;
2647 splx(s);
2648
2649 /* That's all... */
2650 kthread_exit(0); /* does not return */
2651 }
2652
2653 RF_AutoConfig_t *
2654 rf_find_raid_components()
2655 {
2656 struct vnode *vp;
2657 struct disklabel label;
2658 struct device *dv;
2659 dev_t dev;
2660 int bmajor;
2661 int error;
2662 int i;
2663 int good_one;
2664 RF_ComponentLabel_t *clabel;
2665 RF_AutoConfig_t *ac_list;
2666 RF_AutoConfig_t *ac;
2667
2668
2669 /* initialize the AutoConfig list */
2670 ac_list = NULL;
2671
2672 /* we begin by trolling through *all* the devices on the system */
2673
2674 for (dv = alldevs.tqh_first; dv != NULL;
2675 dv = dv->dv_list.tqe_next) {
2676
2677 /* we are only interested in disks... */
2678 if (dv->dv_class != DV_DISK)
2679 continue;
2680
2681 /* we don't care about floppies... */
2682 if (!strcmp(dv->dv_cfdata->cf_name,"fd")) {
2683 continue;
2684 }
2685
2686 /* we don't care about CD's... */
2687 if (!strcmp(dv->dv_cfdata->cf_name,"cd")) {
2688 continue;
2689 }
2690
2691 /* hdfd is the Atari/Hades floppy driver */
2692 if (!strcmp(dv->dv_cfdata->cf_name,"hdfd")) {
2693 continue;
2694 }
2695 /* fdisa is the Atari/Milan floppy driver */
2696 if (!strcmp(dv->dv_cfdata->cf_name,"fdisa")) {
2697 continue;
2698 }
2699
2700 /* need to find the device_name_to_block_device_major stuff */
2701 bmajor = devsw_name2blk(dv->dv_xname, NULL, 0);
2702
2703 /* get a vnode for the raw partition of this disk */
2704
2705 dev = MAKEDISKDEV(bmajor, dv->dv_unit, RAW_PART);
2706 if (bdevvp(dev, &vp))
2707 panic("RAID can't alloc vnode");
2708
2709 error = VOP_OPEN(vp, FREAD, NOCRED, 0);
2710
2711 if (error) {
2712 /* "Who cares." Continue looking
2713 for something that exists*/
2714 vput(vp);
2715 continue;
2716 }
2717
2718 /* Ok, the disk exists. Go get the disklabel. */
2719 error = VOP_IOCTL(vp, DIOCGDINFO, (caddr_t)&label,
2720 FREAD, NOCRED, 0);
2721 if (error) {
2722 /*
2723 * XXX can't happen - open() would
2724 * have errored out (or faked up one)
2725 */
2726 printf("can't get label for dev %s%c (%d)!?!?\n",
2727 dv->dv_xname, 'a' + RAW_PART, error);
2728 }
2729
2730 /* don't need this any more. We'll allocate it again
2731 a little later if we really do... */
2732 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2733 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
2734 vput(vp);
2735
2736 for (i=0; i < label.d_npartitions; i++) {
2737 /* We only support partitions marked as RAID */
2738 if (label.d_partitions[i].p_fstype != FS_RAID)
2739 continue;
2740
2741 dev = MAKEDISKDEV(bmajor, dv->dv_unit, i);
2742 if (bdevvp(dev, &vp))
2743 panic("RAID can't alloc vnode");
2744
2745 error = VOP_OPEN(vp, FREAD, NOCRED, 0);
2746 if (error) {
2747 /* Whatever... */
2748 vput(vp);
2749 continue;
2750 }
2751
2752 good_one = 0;
2753
2754 clabel = (RF_ComponentLabel_t *)
2755 malloc(sizeof(RF_ComponentLabel_t),
2756 M_RAIDFRAME, M_NOWAIT);
2757 if (clabel == NULL) {
2758 /* XXX CLEANUP HERE */
2759 printf("RAID auto config: out of memory!\n");
2760 return(NULL); /* XXX probably should panic? */
2761 }
2762
2763 if (!raidread_component_label(dev, vp, clabel)) {
2764 /* Got the label. Does it look reasonable? */
2765 if (rf_reasonable_label(clabel) &&
2766 (clabel->partitionSize <=
2767 label.d_partitions[i].p_size)) {
2768 #if DEBUG
2769 printf("Component on: %s%c: %d\n",
2770 dv->dv_xname, 'a'+i,
2771 label.d_partitions[i].p_size);
2772 rf_print_component_label(clabel);
2773 #endif
2774 /* if it's reasonable, add it,
2775 else ignore it. */
2776 ac = (RF_AutoConfig_t *)
2777 malloc(sizeof(RF_AutoConfig_t),
2778 M_RAIDFRAME,
2779 M_NOWAIT);
2780 if (ac == NULL) {
2781 /* XXX should panic?? */
2782 return(NULL);
2783 }
2784
2785 sprintf(ac->devname, "%s%c",
2786 dv->dv_xname, 'a'+i);
2787 ac->dev = dev;
2788 ac->vp = vp;
2789 ac->clabel = clabel;
2790 ac->next = ac_list;
2791 ac_list = ac;
2792 good_one = 1;
2793 }
2794 }
2795 if (!good_one) {
2796 /* cleanup */
2797 free(clabel, M_RAIDFRAME);
2798 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2799 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
2800 vput(vp);
2801 }
2802 }
2803 }
2804 return(ac_list);
2805 }
2806
2807 static int
2808 rf_reasonable_label(clabel)
2809 RF_ComponentLabel_t *clabel;
2810 {
2811
2812 if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) ||
2813 (clabel->version==RF_COMPONENT_LABEL_VERSION)) &&
2814 ((clabel->clean == RF_RAID_CLEAN) ||
2815 (clabel->clean == RF_RAID_DIRTY)) &&
2816 clabel->row >=0 &&
2817 clabel->column >= 0 &&
2818 clabel->num_rows > 0 &&
2819 clabel->num_columns > 0 &&
2820 clabel->row < clabel->num_rows &&
2821 clabel->column < clabel->num_columns &&
2822 clabel->blockSize > 0 &&
2823 clabel->numBlocks > 0) {
2824 /* label looks reasonable enough... */
2825 return(1);
2826 }
2827 return(0);
2828 }
2829
2830
2831 #if DEBUG
2832 void
2833 rf_print_component_label(clabel)
2834 RF_ComponentLabel_t *clabel;
2835 {
2836 printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
2837 clabel->row, clabel->column,
2838 clabel->num_rows, clabel->num_columns);
2839 printf(" Version: %d Serial Number: %d Mod Counter: %d\n",
2840 clabel->version, clabel->serial_number,
2841 clabel->mod_counter);
2842 printf(" Clean: %s Status: %d\n",
2843 clabel->clean ? "Yes" : "No", clabel->status );
2844 printf(" sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
2845 clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
2846 printf(" RAID Level: %c blocksize: %d numBlocks: %d\n",
2847 (char) clabel->parityConfig, clabel->blockSize,
2848 clabel->numBlocks);
2849 printf(" Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No" );
2850 printf(" Contains root partition: %s\n",
2851 clabel->root_partition ? "Yes" : "No" );
2852 printf(" Last configured as: raid%d\n", clabel->last_unit );
2853 #if 0
2854 printf(" Config order: %d\n", clabel->config_order);
2855 #endif
2856
2857 }
2858 #endif
2859
2860 RF_ConfigSet_t *
2861 rf_create_auto_sets(ac_list)
2862 RF_AutoConfig_t *ac_list;
2863 {
2864 RF_AutoConfig_t *ac;
2865 RF_ConfigSet_t *config_sets;
2866 RF_ConfigSet_t *cset;
2867 RF_AutoConfig_t *ac_next;
2868
2869
2870 config_sets = NULL;
2871
2872 /* Go through the AutoConfig list, and figure out which components
2873 belong to what sets. */
2874 ac = ac_list;
2875 while(ac!=NULL) {
2876 /* we're going to putz with ac->next, so save it here
2877 for use at the end of the loop */
2878 ac_next = ac->next;
2879
2880 if (config_sets == NULL) {
2881 /* will need at least this one... */
2882 config_sets = (RF_ConfigSet_t *)
2883 malloc(sizeof(RF_ConfigSet_t),
2884 M_RAIDFRAME, M_NOWAIT);
2885 if (config_sets == NULL) {
2886 panic("rf_create_auto_sets: No memory!");
2887 }
2888 /* this one is easy :) */
2889 config_sets->ac = ac;
2890 config_sets->next = NULL;
2891 config_sets->rootable = 0;
2892 ac->next = NULL;
2893 } else {
2894 /* which set does this component fit into? */
2895 cset = config_sets;
2896 while(cset!=NULL) {
2897 if (rf_does_it_fit(cset, ac)) {
2898 /* looks like it matches... */
2899 ac->next = cset->ac;
2900 cset->ac = ac;
2901 break;
2902 }
2903 cset = cset->next;
2904 }
2905 if (cset==NULL) {
2906 /* didn't find a match above... new set..*/
2907 cset = (RF_ConfigSet_t *)
2908 malloc(sizeof(RF_ConfigSet_t),
2909 M_RAIDFRAME, M_NOWAIT);
2910 if (cset == NULL) {
2911 panic("rf_create_auto_sets: No memory!");
2912 }
2913 cset->ac = ac;
2914 ac->next = NULL;
2915 cset->next = config_sets;
2916 cset->rootable = 0;
2917 config_sets = cset;
2918 }
2919 }
2920 ac = ac_next;
2921 }
2922
2923
2924 return(config_sets);
2925 }
2926
2927 static int
2928 rf_does_it_fit(cset, ac)
2929 RF_ConfigSet_t *cset;
2930 RF_AutoConfig_t *ac;
2931 {
2932 RF_ComponentLabel_t *clabel1, *clabel2;
2933
2934 /* If this one matches the *first* one in the set, that's good
2935 enough, since the other members of the set would have been
2936 through here too... */
2937 /* note that we are not checking partitionSize here..
2938
2939 Note that we are also not checking the mod_counters here.
2940 If everything else matches execpt the mod_counter, that's
2941 good enough for this test. We will deal with the mod_counters
2942 a little later in the autoconfiguration process.
2943
2944 (clabel1->mod_counter == clabel2->mod_counter) &&
2945
2946 The reason we don't check for this is that failed disks
2947 will have lower modification counts. If those disks are
2948 not added to the set they used to belong to, then they will
2949 form their own set, which may result in 2 different sets,
2950 for example, competing to be configured at raid0, and
2951 perhaps competing to be the root filesystem set. If the
2952 wrong ones get configured, or both attempt to become /,
2953 weird behaviour and or serious lossage will occur. Thus we
2954 need to bring them into the fold here, and kick them out at
2955 a later point.
2956
2957 */
2958
2959 clabel1 = cset->ac->clabel;
2960 clabel2 = ac->clabel;
2961 if ((clabel1->version == clabel2->version) &&
2962 (clabel1->serial_number == clabel2->serial_number) &&
2963 (clabel1->num_rows == clabel2->num_rows) &&
2964 (clabel1->num_columns == clabel2->num_columns) &&
2965 (clabel1->sectPerSU == clabel2->sectPerSU) &&
2966 (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
2967 (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
2968 (clabel1->parityConfig == clabel2->parityConfig) &&
2969 (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
2970 (clabel1->blockSize == clabel2->blockSize) &&
2971 (clabel1->numBlocks == clabel2->numBlocks) &&
2972 (clabel1->autoconfigure == clabel2->autoconfigure) &&
2973 (clabel1->root_partition == clabel2->root_partition) &&
2974 (clabel1->last_unit == clabel2->last_unit) &&
2975 (clabel1->config_order == clabel2->config_order)) {
2976 /* if it get's here, it almost *has* to be a match */
2977 } else {
2978 /* it's not consistent with somebody in the set..
2979 punt */
2980 return(0);
2981 }
2982 /* all was fine.. it must fit... */
2983 return(1);
2984 }
2985
2986 int
2987 rf_have_enough_components(cset)
2988 RF_ConfigSet_t *cset;
2989 {
2990 RF_AutoConfig_t *ac;
2991 RF_AutoConfig_t *auto_config;
2992 RF_ComponentLabel_t *clabel;
2993 int r,c;
2994 int num_rows;
2995 int num_cols;
2996 int num_missing;
2997 int mod_counter;
2998 int mod_counter_found;
2999 int even_pair_failed;
3000 char parity_type;
3001
3002
3003 /* check to see that we have enough 'live' components
3004 of this set. If so, we can configure it if necessary */
3005
3006 num_rows = cset->ac->clabel->num_rows;
3007 num_cols = cset->ac->clabel->num_columns;
3008 parity_type = cset->ac->clabel->parityConfig;
3009
3010 /* XXX Check for duplicate components!?!?!? */
3011
3012 /* Determine what the mod_counter is supposed to be for this set. */
3013
3014 mod_counter_found = 0;
3015 mod_counter = 0;
3016 ac = cset->ac;
3017 while(ac!=NULL) {
3018 if (mod_counter_found==0) {
3019 mod_counter = ac->clabel->mod_counter;
3020 mod_counter_found = 1;
3021 } else {
3022 if (ac->clabel->mod_counter > mod_counter) {
3023 mod_counter = ac->clabel->mod_counter;
3024 }
3025 }
3026 ac = ac->next;
3027 }
3028
3029 num_missing = 0;
3030 auto_config = cset->ac;
3031
3032 for(r=0; r<num_rows; r++) {
3033 even_pair_failed = 0;
3034 for(c=0; c<num_cols; c++) {
3035 ac = auto_config;
3036 while(ac!=NULL) {
3037 if ((ac->clabel->row == r) &&
3038 (ac->clabel->column == c) &&
3039 (ac->clabel->mod_counter == mod_counter)) {
3040 /* it's this one... */
3041 #if DEBUG
3042 printf("Found: %s at %d,%d\n",
3043 ac->devname,r,c);
3044 #endif
3045 break;
3046 }
3047 ac=ac->next;
3048 }
3049 if (ac==NULL) {
3050 /* Didn't find one here! */
3051 /* special case for RAID 1, especially
3052 where there are more than 2
3053 components (where RAIDframe treats
3054 things a little differently :( ) */
3055 if (parity_type == '1') {
3056 if (c%2 == 0) { /* even component */
3057 even_pair_failed = 1;
3058 } else { /* odd component. If
3059 we're failed, and
3060 so is the even
3061 component, it's
3062 "Good Night, Charlie" */
3063 if (even_pair_failed == 1) {
3064 return(0);
3065 }
3066 }
3067 } else {
3068 /* normal accounting */
3069 num_missing++;
3070 }
3071 }
3072 if ((parity_type == '1') && (c%2 == 1)) {
3073 /* Just did an even component, and we didn't
3074 bail.. reset the even_pair_failed flag,
3075 and go on to the next component.... */
3076 even_pair_failed = 0;
3077 }
3078 }
3079 }
3080
3081 clabel = cset->ac->clabel;
3082
3083 if (((clabel->parityConfig == '0') && (num_missing > 0)) ||
3084 ((clabel->parityConfig == '4') && (num_missing > 1)) ||
3085 ((clabel->parityConfig == '5') && (num_missing > 1))) {
3086 /* XXX this needs to be made *much* more general */
3087 /* Too many failures */
3088 return(0);
3089 }
3090 /* otherwise, all is well, and we've got enough to take a kick
3091 at autoconfiguring this set */
3092 return(1);
3093 }
3094
3095 void
3096 rf_create_configuration(ac,config,raidPtr)
3097 RF_AutoConfig_t *ac;
3098 RF_Config_t *config;
3099 RF_Raid_t *raidPtr;
3100 {
3101 RF_ComponentLabel_t *clabel;
3102 int i;
3103
3104 clabel = ac->clabel;
3105
3106 /* 1. Fill in the common stuff */
3107 config->numRow = clabel->num_rows;
3108 config->numCol = clabel->num_columns;
3109 config->numSpare = 0; /* XXX should this be set here? */
3110 config->sectPerSU = clabel->sectPerSU;
3111 config->SUsPerPU = clabel->SUsPerPU;
3112 config->SUsPerRU = clabel->SUsPerRU;
3113 config->parityConfig = clabel->parityConfig;
3114 /* XXX... */
3115 strcpy(config->diskQueueType,"fifo");
3116 config->maxOutstandingDiskReqs = clabel->maxOutstanding;
3117 config->layoutSpecificSize = 0; /* XXX ?? */
3118
3119 while(ac!=NULL) {
3120 /* row/col values will be in range due to the checks
3121 in reasonable_label() */
3122 strcpy(config->devnames[ac->clabel->row][ac->clabel->column],
3123 ac->devname);
3124 ac = ac->next;
3125 }
3126
3127 for(i=0;i<RF_MAXDBGV;i++) {
3128 config->debugVars[i][0] = NULL;
3129 }
3130 }
3131
3132 int
3133 rf_set_autoconfig(raidPtr, new_value)
3134 RF_Raid_t *raidPtr;
3135 int new_value;
3136 {
3137 RF_ComponentLabel_t clabel;
3138 struct vnode *vp;
3139 dev_t dev;
3140 int row, column;
3141 int sparecol;
3142
3143 raidPtr->autoconfigure = new_value;
3144 for(row=0; row<raidPtr->numRow; row++) {
3145 for(column=0; column<raidPtr->numCol; column++) {
3146 if (raidPtr->Disks[row][column].status ==
3147 rf_ds_optimal) {
3148 dev = raidPtr->Disks[row][column].dev;
3149 vp = raidPtr->raid_cinfo[row][column].ci_vp;
3150 raidread_component_label(dev, vp, &clabel);
3151 clabel.autoconfigure = new_value;
3152 raidwrite_component_label(dev, vp, &clabel);
3153 }
3154 }
3155 }
3156 for(column = 0; column < raidPtr->numSpare ; column++) {
3157 sparecol = raidPtr->numCol + column;
3158 if (raidPtr->Disks[0][sparecol].status == rf_ds_used_spare) {
3159 dev = raidPtr->Disks[0][sparecol].dev;
3160 vp = raidPtr->raid_cinfo[0][sparecol].ci_vp;
3161 raidread_component_label(dev, vp, &clabel);
3162 clabel.autoconfigure = new_value;
3163 raidwrite_component_label(dev, vp, &clabel);
3164 }
3165 }
3166 return(new_value);
3167 }
3168
3169 int
3170 rf_set_rootpartition(raidPtr, new_value)
3171 RF_Raid_t *raidPtr;
3172 int new_value;
3173 {
3174 RF_ComponentLabel_t clabel;
3175 struct vnode *vp;
3176 dev_t dev;
3177 int row, column;
3178 int sparecol;
3179
3180 raidPtr->root_partition = new_value;
3181 for(row=0; row<raidPtr->numRow; row++) {
3182 for(column=0; column<raidPtr->numCol; column++) {
3183 if (raidPtr->Disks[row][column].status ==
3184 rf_ds_optimal) {
3185 dev = raidPtr->Disks[row][column].dev;
3186 vp = raidPtr->raid_cinfo[row][column].ci_vp;
3187 raidread_component_label(dev, vp, &clabel);
3188 clabel.root_partition = new_value;
3189 raidwrite_component_label(dev, vp, &clabel);
3190 }
3191 }
3192 }
3193 for(column = 0; column < raidPtr->numSpare ; column++) {
3194 sparecol = raidPtr->numCol + column;
3195 if (raidPtr->Disks[0][sparecol].status == rf_ds_used_spare) {
3196 dev = raidPtr->Disks[0][sparecol].dev;
3197 vp = raidPtr->raid_cinfo[0][sparecol].ci_vp;
3198 raidread_component_label(dev, vp, &clabel);
3199 clabel.root_partition = new_value;
3200 raidwrite_component_label(dev, vp, &clabel);
3201 }
3202 }
3203 return(new_value);
3204 }
3205
3206 void
3207 rf_release_all_vps(cset)
3208 RF_ConfigSet_t *cset;
3209 {
3210 RF_AutoConfig_t *ac;
3211
3212 ac = cset->ac;
3213 while(ac!=NULL) {
3214 /* Close the vp, and give it back */
3215 if (ac->vp) {
3216 vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY);
3217 VOP_CLOSE(ac->vp, FREAD, NOCRED, 0);
3218 vput(ac->vp);
3219 ac->vp = NULL;
3220 }
3221 ac = ac->next;
3222 }
3223 }
3224
3225
3226 void
3227 rf_cleanup_config_set(cset)
3228 RF_ConfigSet_t *cset;
3229 {
3230 RF_AutoConfig_t *ac;
3231 RF_AutoConfig_t *next_ac;
3232
3233 ac = cset->ac;
3234 while(ac!=NULL) {
3235 next_ac = ac->next;
3236 /* nuke the label */
3237 free(ac->clabel, M_RAIDFRAME);
3238 /* cleanup the config structure */
3239 free(ac, M_RAIDFRAME);
3240 /* "next.." */
3241 ac = next_ac;
3242 }
3243 /* and, finally, nuke the config set */
3244 free(cset, M_RAIDFRAME);
3245 }
3246
3247
3248 void
3249 raid_init_component_label(raidPtr, clabel)
3250 RF_Raid_t *raidPtr;
3251 RF_ComponentLabel_t *clabel;
3252 {
3253 /* current version number */
3254 clabel->version = RF_COMPONENT_LABEL_VERSION;
3255 clabel->serial_number = raidPtr->serial_number;
3256 clabel->mod_counter = raidPtr->mod_counter;
3257 clabel->num_rows = raidPtr->numRow;
3258 clabel->num_columns = raidPtr->numCol;
3259 clabel->clean = RF_RAID_DIRTY; /* not clean */
3260 clabel->status = rf_ds_optimal; /* "It's good!" */
3261
3262 clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
3263 clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
3264 clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
3265
3266 clabel->blockSize = raidPtr->bytesPerSector;
3267 clabel->numBlocks = raidPtr->sectorsPerDisk;
3268
3269 /* XXX not portable */
3270 clabel->parityConfig = raidPtr->Layout.map->parityConfig;
3271 clabel->maxOutstanding = raidPtr->maxOutstanding;
3272 clabel->autoconfigure = raidPtr->autoconfigure;
3273 clabel->root_partition = raidPtr->root_partition;
3274 clabel->last_unit = raidPtr->raidid;
3275 clabel->config_order = raidPtr->config_order;
3276 }
3277
3278 int
3279 rf_auto_config_set(cset,unit)
3280 RF_ConfigSet_t *cset;
3281 int *unit;
3282 {
3283 RF_Raid_t *raidPtr;
3284 RF_Config_t *config;
3285 int raidID;
3286 int retcode;
3287
3288 #if DEBUG
3289 printf("RAID autoconfigure\n");
3290 #endif
3291
3292 retcode = 0;
3293 *unit = -1;
3294
3295 /* 1. Create a config structure */
3296
3297 config = (RF_Config_t *)malloc(sizeof(RF_Config_t),
3298 M_RAIDFRAME,
3299 M_NOWAIT);
3300 if (config==NULL) {
3301 printf("Out of mem!?!?\n");
3302 /* XXX do something more intelligent here. */
3303 return(1);
3304 }
3305
3306 memset(config, 0, sizeof(RF_Config_t));
3307
3308 /*
3309 2. Figure out what RAID ID this one is supposed to live at
3310 See if we can get the same RAID dev that it was configured
3311 on last time..
3312 */
3313
3314 raidID = cset->ac->clabel->last_unit;
3315 if ((raidID < 0) || (raidID >= numraid)) {
3316 /* let's not wander off into lala land. */
3317 raidID = numraid - 1;
3318 }
3319 if (raidPtrs[raidID]->valid != 0) {
3320
3321 /*
3322 Nope... Go looking for an alternative...
3323 Start high so we don't immediately use raid0 if that's
3324 not taken.
3325 */
3326
3327 for(raidID = numraid - 1; raidID >= 0; raidID--) {
3328 if (raidPtrs[raidID]->valid == 0) {
3329 /* can use this one! */
3330 break;
3331 }
3332 }
3333 }
3334
3335 if (raidID < 0) {
3336 /* punt... */
3337 printf("Unable to auto configure this set!\n");
3338 printf("(Out of RAID devs!)\n");
3339 return(1);
3340 }
3341
3342 #if DEBUG
3343 printf("Configuring raid%d:\n",raidID);
3344 #endif
3345
3346 raidPtr = raidPtrs[raidID];
3347
3348 /* XXX all this stuff should be done SOMEWHERE ELSE! */
3349 raidPtr->raidid = raidID;
3350 raidPtr->openings = RAIDOUTSTANDING;
3351
3352 /* 3. Build the configuration structure */
3353 rf_create_configuration(cset->ac, config, raidPtr);
3354
3355 /* 4. Do the configuration */
3356 retcode = rf_Configure(raidPtr, config, cset->ac);
3357
3358 if (retcode == 0) {
3359
3360 raidinit(raidPtrs[raidID]);
3361
3362 rf_markalldirty(raidPtrs[raidID]);
3363 raidPtrs[raidID]->autoconfigure = 1; /* XXX do this here? */
3364 if (cset->ac->clabel->root_partition==1) {
3365 /* everything configured just fine. Make a note
3366 that this set is eligible to be root. */
3367 cset->rootable = 1;
3368 /* XXX do this here? */
3369 raidPtrs[raidID]->root_partition = 1;
3370 }
3371 }
3372
3373 /* 5. Cleanup */
3374 free(config, M_RAIDFRAME);
3375
3376 *unit = raidID;
3377 return(retcode);
3378 }
3379
3380 void
3381 rf_disk_unbusy(desc)
3382 RF_RaidAccessDesc_t *desc;
3383 {
3384 struct buf *bp;
3385
3386 bp = (struct buf *)desc->bp;
3387 disk_unbusy(&raid_softc[desc->raidPtr->raidid].sc_dkdev,
3388 (bp->b_bcount - bp->b_resid), (bp->b_flags & B_READ));
3389 }
3390