rf_netbsdkintf.c revision 1.116 1 /* $NetBSD: rf_netbsdkintf.c,v 1.116 2002/01/09 04:21:43 thorpej Exp $ */
2 /*-
3 * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
4 * All rights reserved.
5 *
6 * This code is derived from software contributed to The NetBSD Foundation
7 * by Greg Oster; Jason R. Thorpe.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed by the NetBSD
20 * Foundation, Inc. and its contributors.
21 * 4. Neither the name of The NetBSD Foundation nor the names of its
22 * contributors may be used to endorse or promote products derived
23 * from this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
26 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
36 */
37
38 /*
39 * Copyright (c) 1988 University of Utah.
40 * Copyright (c) 1990, 1993
41 * The Regents of the University of California. All rights reserved.
42 *
43 * This code is derived from software contributed to Berkeley by
44 * the Systems Programming Group of the University of Utah Computer
45 * Science Department.
46 *
47 * Redistribution and use in source and binary forms, with or without
48 * modification, are permitted provided that the following conditions
49 * are met:
50 * 1. Redistributions of source code must retain the above copyright
51 * notice, this list of conditions and the following disclaimer.
52 * 2. Redistributions in binary form must reproduce the above copyright
53 * notice, this list of conditions and the following disclaimer in the
54 * documentation and/or other materials provided with the distribution.
55 * 3. All advertising materials mentioning features or use of this software
56 * must display the following acknowledgement:
57 * This product includes software developed by the University of
58 * California, Berkeley and its contributors.
59 * 4. Neither the name of the University nor the names of its contributors
60 * may be used to endorse or promote products derived from this software
61 * without specific prior written permission.
62 *
63 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
64 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
65 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
66 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
67 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
68 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
69 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
70 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
71 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
72 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
73 * SUCH DAMAGE.
74 *
75 * from: Utah $Hdr: cd.c 1.6 90/11/28$
76 *
77 * @(#)cd.c 8.2 (Berkeley) 11/16/93
78 */
79
80
81
82
83 /*
84 * Copyright (c) 1995 Carnegie-Mellon University.
85 * All rights reserved.
86 *
87 * Authors: Mark Holland, Jim Zelenka
88 *
89 * Permission to use, copy, modify and distribute this software and
90 * its documentation is hereby granted, provided that both the copyright
91 * notice and this permission notice appear in all copies of the
92 * software, derivative works or modified versions, and any portions
93 * thereof, and that both notices appear in supporting documentation.
94 *
95 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
96 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
97 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
98 *
99 * Carnegie Mellon requests users of this software to return to
100 *
101 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
102 * School of Computer Science
103 * Carnegie Mellon University
104 * Pittsburgh PA 15213-3890
105 *
106 * any improvements or extensions that they make and grant Carnegie the
107 * rights to redistribute these changes.
108 */
109
110 /***********************************************************
111 *
112 * rf_kintf.c -- the kernel interface routines for RAIDframe
113 *
114 ***********************************************************/
115
116 #include <sys/cdefs.h>
117 __KERNEL_RCSID(0, "$NetBSD: rf_netbsdkintf.c,v 1.116 2002/01/09 04:21:43 thorpej Exp $");
118
119 #include <sys/param.h>
120 #include <sys/errno.h>
121 #include <sys/pool.h>
122 #include <sys/queue.h>
123 #include <sys/disk.h>
124 #include <sys/device.h>
125 #include <sys/stat.h>
126 #include <sys/ioctl.h>
127 #include <sys/fcntl.h>
128 #include <sys/systm.h>
129 #include <sys/namei.h>
130 #include <sys/vnode.h>
131 #include <sys/disklabel.h>
132 #include <sys/conf.h>
133 #include <sys/lock.h>
134 #include <sys/buf.h>
135 #include <sys/user.h>
136 #include <sys/reboot.h>
137
138 #include <dev/raidframe/raidframevar.h>
139 #include <dev/raidframe/raidframeio.h>
140 #include "raid.h"
141 #include "opt_raid_autoconfig.h"
142 #include "rf_raid.h"
143 #include "rf_copyback.h"
144 #include "rf_dag.h"
145 #include "rf_dagflags.h"
146 #include "rf_desc.h"
147 #include "rf_diskqueue.h"
148 #include "rf_acctrace.h"
149 #include "rf_etimer.h"
150 #include "rf_general.h"
151 #include "rf_debugMem.h"
152 #include "rf_kintf.h"
153 #include "rf_options.h"
154 #include "rf_driver.h"
155 #include "rf_parityscan.h"
156 #include "rf_debugprint.h"
157 #include "rf_threadstuff.h"
158
159 int rf_kdebug_level = 0;
160
161 #ifdef DEBUG
162 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
163 #else /* DEBUG */
164 #define db1_printf(a) { }
165 #endif /* DEBUG */
166
167 static RF_Raid_t **raidPtrs; /* global raid device descriptors */
168
169 RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex)
170
171 static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a
172 * spare table */
173 static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from
174 * installation process */
175
176 /* prototypes */
177 static void KernelWakeupFunc(struct buf * bp);
178 static void InitBP(struct buf * bp, struct vnode *, unsigned rw_flag,
179 dev_t dev, RF_SectorNum_t startSect,
180 RF_SectorCount_t numSect, caddr_t buf,
181 void (*cbFunc) (struct buf *), void *cbArg,
182 int logBytesPerSector, struct proc * b_proc);
183 static void raidinit(RF_Raid_t *);
184
185 void raidattach(int);
186 int raidsize(dev_t);
187 int raidopen(dev_t, int, int, struct proc *);
188 int raidclose(dev_t, int, int, struct proc *);
189 int raidioctl(dev_t, u_long, caddr_t, int, struct proc *);
190 int raidwrite(dev_t, struct uio *, int);
191 int raidread(dev_t, struct uio *, int);
192 void raidstrategy(struct buf *);
193 int raiddump(dev_t, daddr_t, caddr_t, size_t);
194
195 /*
196 * Pilfered from ccd.c
197 */
198
199 struct raidbuf {
200 struct buf rf_buf; /* new I/O buf. MUST BE FIRST!!! */
201 struct buf *rf_obp; /* ptr. to original I/O buf */
202 int rf_flags; /* misc. flags */
203 RF_DiskQueueData_t *req;/* the request that this was part of.. */
204 };
205
206 /* component buffer pool */
207 struct pool raidframe_cbufpool;
208
209 #define RAIDGETBUF(rs) pool_get(&raidframe_cbufpool, PR_NOWAIT)
210 #define RAIDPUTBUF(rs, cbp) pool_put(&raidframe_cbufpool, cbp)
211
212 /* XXX Not sure if the following should be replacing the raidPtrs above,
213 or if it should be used in conjunction with that...
214 */
215
216 struct raid_softc {
217 int sc_flags; /* flags */
218 int sc_cflags; /* configuration flags */
219 size_t sc_size; /* size of the raid device */
220 char sc_xname[20]; /* XXX external name */
221 struct disk sc_dkdev; /* generic disk device info */
222 struct buf_queue buf_queue; /* used for the device queue */
223 };
224 /* sc_flags */
225 #define RAIDF_INITED 0x01 /* unit has been initialized */
226 #define RAIDF_WLABEL 0x02 /* label area is writable */
227 #define RAIDF_LABELLING 0x04 /* unit is currently being labelled */
228 #define RAIDF_WANTED 0x40 /* someone is waiting to obtain a lock */
229 #define RAIDF_LOCKED 0x80 /* unit is locked */
230
231 #define raidunit(x) DISKUNIT(x)
232 int numraid = 0;
233
234 /*
235 * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
236 * Be aware that large numbers can allow the driver to consume a lot of
237 * kernel memory, especially on writes, and in degraded mode reads.
238 *
239 * For example: with a stripe width of 64 blocks (32k) and 5 disks,
240 * a single 64K write will typically require 64K for the old data,
241 * 64K for the old parity, and 64K for the new parity, for a total
242 * of 192K (if the parity buffer is not re-used immediately).
243 * Even it if is used immediately, that's still 128K, which when multiplied
244 * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
245 *
246 * Now in degraded mode, for example, a 64K read on the above setup may
247 * require data reconstruction, which will require *all* of the 4 remaining
248 * disks to participate -- 4 * 32K/disk == 128K again.
249 */
250
251 #ifndef RAIDOUTSTANDING
252 #define RAIDOUTSTANDING 6
253 #endif
254
255 #define RAIDLABELDEV(dev) \
256 (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
257
258 /* declared here, and made public, for the benefit of KVM stuff.. */
259 struct raid_softc *raid_softc;
260
261 static void raidgetdefaultlabel(RF_Raid_t *, struct raid_softc *,
262 struct disklabel *);
263 static void raidgetdisklabel(dev_t);
264 static void raidmakedisklabel(struct raid_softc *);
265
266 static int raidlock(struct raid_softc *);
267 static void raidunlock(struct raid_softc *);
268
269 static void rf_markalldirty(RF_Raid_t *);
270 void rf_mountroot_hook(struct device *);
271
272 struct device *raidrootdev;
273
274 void rf_ReconThread(struct rf_recon_req *);
275 /* XXX what I want is: */
276 /*void rf_ReconThread(RF_Raid_t *raidPtr); */
277 void rf_RewriteParityThread(RF_Raid_t *raidPtr);
278 void rf_CopybackThread(RF_Raid_t *raidPtr);
279 void rf_ReconstructInPlaceThread(struct rf_recon_req *);
280 void rf_buildroothack(void *);
281
282 RF_AutoConfig_t *rf_find_raid_components(void);
283 RF_ConfigSet_t *rf_create_auto_sets(RF_AutoConfig_t *);
284 static int rf_does_it_fit(RF_ConfigSet_t *,RF_AutoConfig_t *);
285 static int rf_reasonable_label(RF_ComponentLabel_t *);
286 void rf_create_configuration(RF_AutoConfig_t *,RF_Config_t *, RF_Raid_t *);
287 int rf_set_autoconfig(RF_Raid_t *, int);
288 int rf_set_rootpartition(RF_Raid_t *, int);
289 void rf_release_all_vps(RF_ConfigSet_t *);
290 void rf_cleanup_config_set(RF_ConfigSet_t *);
291 int rf_have_enough_components(RF_ConfigSet_t *);
292 int rf_auto_config_set(RF_ConfigSet_t *, int *);
293
294 static int raidautoconfig = 0; /* Debugging, mostly. Set to 0 to not
295 allow autoconfig to take place.
296 Note that this is overridden by having
297 RAID_AUTOCONFIG as an option in the
298 kernel config file. */
299
300 void
301 raidattach(num)
302 int num;
303 {
304 int raidID;
305 int i, rc;
306 RF_AutoConfig_t *ac_list; /* autoconfig list */
307 RF_ConfigSet_t *config_sets;
308
309 #ifdef DEBUG
310 printf("raidattach: Asked for %d units\n", num);
311 #endif
312
313 if (num <= 0) {
314 #ifdef DIAGNOSTIC
315 panic("raidattach: count <= 0");
316 #endif
317 return;
318 }
319 /* This is where all the initialization stuff gets done. */
320
321 numraid = num;
322
323 /* Make some space for requested number of units... */
324
325 RF_Calloc(raidPtrs, num, sizeof(RF_Raid_t *), (RF_Raid_t **));
326 if (raidPtrs == NULL) {
327 panic("raidPtrs is NULL!!\n");
328 }
329
330 /* Initialize the component buffer pool. */
331 pool_init(&raidframe_cbufpool, sizeof(struct raidbuf), 0,
332 0, 0, "raidpl", 0, NULL, NULL, M_RAIDFRAME);
333
334 rc = rf_mutex_init(&rf_sparet_wait_mutex);
335 if (rc) {
336 RF_PANIC();
337 }
338
339 rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
340
341 for (i = 0; i < num; i++)
342 raidPtrs[i] = NULL;
343 rc = rf_BootRaidframe();
344 if (rc == 0)
345 printf("Kernelized RAIDframe activated\n");
346 else
347 panic("Serious error booting RAID!!\n");
348
349 /* put together some datastructures like the CCD device does.. This
350 * lets us lock the device and what-not when it gets opened. */
351
352 raid_softc = (struct raid_softc *)
353 malloc(num * sizeof(struct raid_softc),
354 M_RAIDFRAME, M_NOWAIT);
355 if (raid_softc == NULL) {
356 printf("WARNING: no memory for RAIDframe driver\n");
357 return;
358 }
359
360 memset(raid_softc, 0, num * sizeof(struct raid_softc));
361
362 raidrootdev = (struct device *)malloc(num * sizeof(struct device),
363 M_RAIDFRAME, M_NOWAIT);
364 if (raidrootdev == NULL) {
365 panic("No memory for RAIDframe driver!!?!?!\n");
366 }
367
368 for (raidID = 0; raidID < num; raidID++) {
369 BUFQ_INIT(&raid_softc[raidID].buf_queue);
370
371 raidrootdev[raidID].dv_class = DV_DISK;
372 raidrootdev[raidID].dv_cfdata = NULL;
373 raidrootdev[raidID].dv_unit = raidID;
374 raidrootdev[raidID].dv_parent = NULL;
375 raidrootdev[raidID].dv_flags = 0;
376 sprintf(raidrootdev[raidID].dv_xname,"raid%d",raidID);
377
378 RF_Calloc(raidPtrs[raidID], 1, sizeof(RF_Raid_t),
379 (RF_Raid_t *));
380 if (raidPtrs[raidID] == NULL) {
381 printf("WARNING: raidPtrs[%d] is NULL\n", raidID);
382 numraid = raidID;
383 return;
384 }
385 }
386
387 #ifdef RAID_AUTOCONFIG
388 raidautoconfig = 1;
389 #endif
390
391 if (raidautoconfig) {
392 /* 1. locate all RAID components on the system */
393
394 #if DEBUG
395 printf("Searching for raid components...\n");
396 #endif
397 ac_list = rf_find_raid_components();
398
399 /* 2. sort them into their respective sets */
400
401 config_sets = rf_create_auto_sets(ac_list);
402
403 /* 3. evaluate each set and configure the valid ones
404 This gets done in rf_buildroothack() */
405
406 /* schedule the creation of the thread to do the
407 "/ on RAID" stuff */
408
409 kthread_create(rf_buildroothack,config_sets);
410
411 #if 0
412 mountroothook_establish(rf_mountroot_hook, &raidrootdev[0]);
413 #endif
414 }
415
416 }
417
418 void
419 rf_buildroothack(arg)
420 void *arg;
421 {
422 RF_ConfigSet_t *config_sets = arg;
423 RF_ConfigSet_t *cset;
424 RF_ConfigSet_t *next_cset;
425 int retcode;
426 int raidID;
427 int rootID;
428 int num_root;
429
430 rootID = 0;
431 num_root = 0;
432 cset = config_sets;
433 while(cset != NULL ) {
434 next_cset = cset->next;
435 if (rf_have_enough_components(cset) &&
436 cset->ac->clabel->autoconfigure==1) {
437 retcode = rf_auto_config_set(cset,&raidID);
438 if (!retcode) {
439 if (cset->rootable) {
440 rootID = raidID;
441 num_root++;
442 }
443 } else {
444 /* The autoconfig didn't work :( */
445 #if DEBUG
446 printf("Autoconfig failed with code %d for raid%d\n", retcode, raidID);
447 #endif
448 rf_release_all_vps(cset);
449 }
450 } else {
451 /* we're not autoconfiguring this set...
452 release the associated resources */
453 rf_release_all_vps(cset);
454 }
455 /* cleanup */
456 rf_cleanup_config_set(cset);
457 cset = next_cset;
458 }
459 if (boothowto & RB_ASKNAME) {
460 /* We don't auto-config... */
461 } else {
462 /* They didn't ask, and we found something bootable... */
463
464 if (num_root == 1) {
465 booted_device = &raidrootdev[rootID];
466 } else if (num_root > 1) {
467 /* we can't guess.. require the user to answer... */
468 boothowto |= RB_ASKNAME;
469 }
470 }
471 }
472
473
474 int
475 raidsize(dev)
476 dev_t dev;
477 {
478 struct raid_softc *rs;
479 struct disklabel *lp;
480 int part, unit, omask, size;
481
482 unit = raidunit(dev);
483 if (unit >= numraid)
484 return (-1);
485 rs = &raid_softc[unit];
486
487 if ((rs->sc_flags & RAIDF_INITED) == 0)
488 return (-1);
489
490 part = DISKPART(dev);
491 omask = rs->sc_dkdev.dk_openmask & (1 << part);
492 lp = rs->sc_dkdev.dk_label;
493
494 if (omask == 0 && raidopen(dev, 0, S_IFBLK, curproc))
495 return (-1);
496
497 if (lp->d_partitions[part].p_fstype != FS_SWAP)
498 size = -1;
499 else
500 size = lp->d_partitions[part].p_size *
501 (lp->d_secsize / DEV_BSIZE);
502
503 if (omask == 0 && raidclose(dev, 0, S_IFBLK, curproc))
504 return (-1);
505
506 return (size);
507
508 }
509
510 int
511 raiddump(dev, blkno, va, size)
512 dev_t dev;
513 daddr_t blkno;
514 caddr_t va;
515 size_t size;
516 {
517 /* Not implemented. */
518 return ENXIO;
519 }
520 /* ARGSUSED */
521 int
522 raidopen(dev, flags, fmt, p)
523 dev_t dev;
524 int flags, fmt;
525 struct proc *p;
526 {
527 int unit = raidunit(dev);
528 struct raid_softc *rs;
529 struct disklabel *lp;
530 int part, pmask;
531 int error = 0;
532
533 if (unit >= numraid)
534 return (ENXIO);
535 rs = &raid_softc[unit];
536
537 if ((error = raidlock(rs)) != 0)
538 return (error);
539 lp = rs->sc_dkdev.dk_label;
540
541 part = DISKPART(dev);
542 pmask = (1 << part);
543
544 db1_printf(("Opening raid device number: %d partition: %d\n",
545 unit, part));
546
547
548 if ((rs->sc_flags & RAIDF_INITED) &&
549 (rs->sc_dkdev.dk_openmask == 0))
550 raidgetdisklabel(dev);
551
552 /* make sure that this partition exists */
553
554 if (part != RAW_PART) {
555 db1_printf(("Not a raw partition..\n"));
556 if (((rs->sc_flags & RAIDF_INITED) == 0) ||
557 ((part >= lp->d_npartitions) ||
558 (lp->d_partitions[part].p_fstype == FS_UNUSED))) {
559 error = ENXIO;
560 raidunlock(rs);
561 db1_printf(("Bailing out...\n"));
562 return (error);
563 }
564 }
565 /* Prevent this unit from being unconfigured while open. */
566 switch (fmt) {
567 case S_IFCHR:
568 rs->sc_dkdev.dk_copenmask |= pmask;
569 break;
570
571 case S_IFBLK:
572 rs->sc_dkdev.dk_bopenmask |= pmask;
573 break;
574 }
575
576 if ((rs->sc_dkdev.dk_openmask == 0) &&
577 ((rs->sc_flags & RAIDF_INITED) != 0)) {
578 /* First one... mark things as dirty... Note that we *MUST*
579 have done a configure before this. I DO NOT WANT TO BE
580 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
581 THAT THEY BELONG TOGETHER!!!!! */
582 /* XXX should check to see if we're only open for reading
583 here... If so, we needn't do this, but then need some
584 other way of keeping track of what's happened.. */
585
586 rf_markalldirty( raidPtrs[unit] );
587 }
588
589
590 rs->sc_dkdev.dk_openmask =
591 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
592
593 raidunlock(rs);
594
595 return (error);
596
597
598 }
599 /* ARGSUSED */
600 int
601 raidclose(dev, flags, fmt, p)
602 dev_t dev;
603 int flags, fmt;
604 struct proc *p;
605 {
606 int unit = raidunit(dev);
607 struct raid_softc *rs;
608 int error = 0;
609 int part;
610
611 if (unit >= numraid)
612 return (ENXIO);
613 rs = &raid_softc[unit];
614
615 if ((error = raidlock(rs)) != 0)
616 return (error);
617
618 part = DISKPART(dev);
619
620 /* ...that much closer to allowing unconfiguration... */
621 switch (fmt) {
622 case S_IFCHR:
623 rs->sc_dkdev.dk_copenmask &= ~(1 << part);
624 break;
625
626 case S_IFBLK:
627 rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
628 break;
629 }
630 rs->sc_dkdev.dk_openmask =
631 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
632
633 if ((rs->sc_dkdev.dk_openmask == 0) &&
634 ((rs->sc_flags & RAIDF_INITED) != 0)) {
635 /* Last one... device is not unconfigured yet.
636 Device shutdown has taken care of setting the
637 clean bits if RAIDF_INITED is not set
638 mark things as clean... */
639 #if 0
640 printf("Last one on raid%d. Updating status.\n",unit);
641 #endif
642 rf_update_component_labels(raidPtrs[unit],
643 RF_FINAL_COMPONENT_UPDATE);
644 if (doing_shutdown) {
645 /* last one, and we're going down, so
646 lights out for this RAID set too. */
647 error = rf_Shutdown(raidPtrs[unit]);
648
649 /* It's no longer initialized... */
650 rs->sc_flags &= ~RAIDF_INITED;
651
652 /* Detach the disk. */
653 disk_detach(&rs->sc_dkdev);
654 }
655 }
656
657 raidunlock(rs);
658 return (0);
659
660 }
661
662 void
663 raidstrategy(bp)
664 struct buf *bp;
665 {
666 int s;
667
668 unsigned int raidID = raidunit(bp->b_dev);
669 RF_Raid_t *raidPtr;
670 struct raid_softc *rs = &raid_softc[raidID];
671 struct disklabel *lp;
672 int wlabel;
673
674 if ((rs->sc_flags & RAIDF_INITED) ==0) {
675 bp->b_error = ENXIO;
676 bp->b_flags |= B_ERROR;
677 bp->b_resid = bp->b_bcount;
678 biodone(bp);
679 return;
680 }
681 if (raidID >= numraid || !raidPtrs[raidID]) {
682 bp->b_error = ENODEV;
683 bp->b_flags |= B_ERROR;
684 bp->b_resid = bp->b_bcount;
685 biodone(bp);
686 return;
687 }
688 raidPtr = raidPtrs[raidID];
689 if (!raidPtr->valid) {
690 bp->b_error = ENODEV;
691 bp->b_flags |= B_ERROR;
692 bp->b_resid = bp->b_bcount;
693 biodone(bp);
694 return;
695 }
696 if (bp->b_bcount == 0) {
697 db1_printf(("b_bcount is zero..\n"));
698 biodone(bp);
699 return;
700 }
701 lp = rs->sc_dkdev.dk_label;
702
703 /*
704 * Do bounds checking and adjust transfer. If there's an
705 * error, the bounds check will flag that for us.
706 */
707
708 wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
709 if (DISKPART(bp->b_dev) != RAW_PART)
710 if (bounds_check_with_label(bp, lp, wlabel) <= 0) {
711 db1_printf(("Bounds check failed!!:%d %d\n",
712 (int) bp->b_blkno, (int) wlabel));
713 biodone(bp);
714 return;
715 }
716 s = splbio();
717
718 bp->b_resid = 0;
719
720 /* stuff it onto our queue */
721 BUFQ_INSERT_TAIL(&rs->buf_queue, bp);
722
723 raidstart(raidPtrs[raidID]);
724
725 splx(s);
726 }
727 /* ARGSUSED */
728 int
729 raidread(dev, uio, flags)
730 dev_t dev;
731 struct uio *uio;
732 int flags;
733 {
734 int unit = raidunit(dev);
735 struct raid_softc *rs;
736 int part;
737
738 if (unit >= numraid)
739 return (ENXIO);
740 rs = &raid_softc[unit];
741
742 if ((rs->sc_flags & RAIDF_INITED) == 0)
743 return (ENXIO);
744 part = DISKPART(dev);
745
746 db1_printf(("raidread: unit: %d partition: %d\n", unit, part));
747
748 return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
749
750 }
751 /* ARGSUSED */
752 int
753 raidwrite(dev, uio, flags)
754 dev_t dev;
755 struct uio *uio;
756 int flags;
757 {
758 int unit = raidunit(dev);
759 struct raid_softc *rs;
760
761 if (unit >= numraid)
762 return (ENXIO);
763 rs = &raid_softc[unit];
764
765 if ((rs->sc_flags & RAIDF_INITED) == 0)
766 return (ENXIO);
767 db1_printf(("raidwrite\n"));
768 return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
769
770 }
771
772 int
773 raidioctl(dev, cmd, data, flag, p)
774 dev_t dev;
775 u_long cmd;
776 caddr_t data;
777 int flag;
778 struct proc *p;
779 {
780 int unit = raidunit(dev);
781 int error = 0;
782 int part, pmask;
783 struct raid_softc *rs;
784 RF_Config_t *k_cfg, *u_cfg;
785 RF_Raid_t *raidPtr;
786 RF_RaidDisk_t *diskPtr;
787 RF_AccTotals_t *totals;
788 RF_DeviceConfig_t *d_cfg, **ucfgp;
789 u_char *specific_buf;
790 int retcode = 0;
791 int row;
792 int column;
793 struct rf_recon_req *rrcopy, *rr;
794 RF_ComponentLabel_t *clabel;
795 RF_ComponentLabel_t ci_label;
796 RF_ComponentLabel_t **clabel_ptr;
797 RF_SingleComponent_t *sparePtr,*componentPtr;
798 RF_SingleComponent_t hot_spare;
799 RF_SingleComponent_t component;
800 RF_ProgressInfo_t progressInfo, **progressInfoPtr;
801 int i, j, d;
802 #ifdef __HAVE_OLD_DISKLABEL
803 struct disklabel newlabel;
804 #endif
805
806 if (unit >= numraid)
807 return (ENXIO);
808 rs = &raid_softc[unit];
809 raidPtr = raidPtrs[unit];
810
811 db1_printf(("raidioctl: %d %d %d %d\n", (int) dev,
812 (int) DISKPART(dev), (int) unit, (int) cmd));
813
814 /* Must be open for writes for these commands... */
815 switch (cmd) {
816 case DIOCSDINFO:
817 case DIOCWDINFO:
818 #ifdef __HAVE_OLD_DISKLABEL
819 case ODIOCWDINFO:
820 case ODIOCSDINFO:
821 #endif
822 case DIOCWLABEL:
823 if ((flag & FWRITE) == 0)
824 return (EBADF);
825 }
826
827 /* Must be initialized for these... */
828 switch (cmd) {
829 case DIOCGDINFO:
830 case DIOCSDINFO:
831 case DIOCWDINFO:
832 #ifdef __HAVE_OLD_DISKLABEL
833 case ODIOCGDINFO:
834 case ODIOCWDINFO:
835 case ODIOCSDINFO:
836 case ODIOCGDEFLABEL:
837 #endif
838 case DIOCGPART:
839 case DIOCWLABEL:
840 case DIOCGDEFLABEL:
841 case RAIDFRAME_SHUTDOWN:
842 case RAIDFRAME_REWRITEPARITY:
843 case RAIDFRAME_GET_INFO:
844 case RAIDFRAME_RESET_ACCTOTALS:
845 case RAIDFRAME_GET_ACCTOTALS:
846 case RAIDFRAME_KEEP_ACCTOTALS:
847 case RAIDFRAME_GET_SIZE:
848 case RAIDFRAME_FAIL_DISK:
849 case RAIDFRAME_COPYBACK:
850 case RAIDFRAME_CHECK_RECON_STATUS:
851 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
852 case RAIDFRAME_GET_COMPONENT_LABEL:
853 case RAIDFRAME_SET_COMPONENT_LABEL:
854 case RAIDFRAME_ADD_HOT_SPARE:
855 case RAIDFRAME_REMOVE_HOT_SPARE:
856 case RAIDFRAME_INIT_LABELS:
857 case RAIDFRAME_REBUILD_IN_PLACE:
858 case RAIDFRAME_CHECK_PARITY:
859 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
860 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
861 case RAIDFRAME_CHECK_COPYBACK_STATUS:
862 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
863 case RAIDFRAME_SET_AUTOCONFIG:
864 case RAIDFRAME_SET_ROOT:
865 case RAIDFRAME_DELETE_COMPONENT:
866 case RAIDFRAME_INCORPORATE_HOT_SPARE:
867 if ((rs->sc_flags & RAIDF_INITED) == 0)
868 return (ENXIO);
869 }
870
871 switch (cmd) {
872
873 /* configure the system */
874 case RAIDFRAME_CONFIGURE:
875
876 if (raidPtr->valid) {
877 /* There is a valid RAID set running on this unit! */
878 printf("raid%d: Device already configured!\n",unit);
879 return(EINVAL);
880 }
881
882 /* copy-in the configuration information */
883 /* data points to a pointer to the configuration structure */
884
885 u_cfg = *((RF_Config_t **) data);
886 RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
887 if (k_cfg == NULL) {
888 return (ENOMEM);
889 }
890 retcode = copyin((caddr_t) u_cfg, (caddr_t) k_cfg,
891 sizeof(RF_Config_t));
892 if (retcode) {
893 RF_Free(k_cfg, sizeof(RF_Config_t));
894 db1_printf(("rf_ioctl: retcode=%d copyin.1\n",
895 retcode));
896 return (retcode);
897 }
898 /* allocate a buffer for the layout-specific data, and copy it
899 * in */
900 if (k_cfg->layoutSpecificSize) {
901 if (k_cfg->layoutSpecificSize > 10000) {
902 /* sanity check */
903 RF_Free(k_cfg, sizeof(RF_Config_t));
904 return (EINVAL);
905 }
906 RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
907 (u_char *));
908 if (specific_buf == NULL) {
909 RF_Free(k_cfg, sizeof(RF_Config_t));
910 return (ENOMEM);
911 }
912 retcode = copyin(k_cfg->layoutSpecific,
913 (caddr_t) specific_buf,
914 k_cfg->layoutSpecificSize);
915 if (retcode) {
916 RF_Free(k_cfg, sizeof(RF_Config_t));
917 RF_Free(specific_buf,
918 k_cfg->layoutSpecificSize);
919 db1_printf(("rf_ioctl: retcode=%d copyin.2\n",
920 retcode));
921 return (retcode);
922 }
923 } else
924 specific_buf = NULL;
925 k_cfg->layoutSpecific = specific_buf;
926
927 /* should do some kind of sanity check on the configuration.
928 * Store the sum of all the bytes in the last byte? */
929
930 /* configure the system */
931
932 /*
933 * Clear the entire RAID descriptor, just to make sure
934 * there is no stale data left in the case of a
935 * reconfiguration
936 */
937 memset((char *) raidPtr, 0, sizeof(RF_Raid_t));
938 raidPtr->raidid = unit;
939
940 retcode = rf_Configure(raidPtr, k_cfg, NULL);
941
942 if (retcode == 0) {
943
944 /* allow this many simultaneous IO's to
945 this RAID device */
946 raidPtr->openings = RAIDOUTSTANDING;
947
948 raidinit(raidPtr);
949 rf_markalldirty(raidPtr);
950 }
951 /* free the buffers. No return code here. */
952 if (k_cfg->layoutSpecificSize) {
953 RF_Free(specific_buf, k_cfg->layoutSpecificSize);
954 }
955 RF_Free(k_cfg, sizeof(RF_Config_t));
956
957 return (retcode);
958
959 /* shutdown the system */
960 case RAIDFRAME_SHUTDOWN:
961
962 if ((error = raidlock(rs)) != 0)
963 return (error);
964
965 /*
966 * If somebody has a partition mounted, we shouldn't
967 * shutdown.
968 */
969
970 part = DISKPART(dev);
971 pmask = (1 << part);
972 if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
973 ((rs->sc_dkdev.dk_bopenmask & pmask) &&
974 (rs->sc_dkdev.dk_copenmask & pmask))) {
975 raidunlock(rs);
976 return (EBUSY);
977 }
978
979 retcode = rf_Shutdown(raidPtr);
980
981 /* It's no longer initialized... */
982 rs->sc_flags &= ~RAIDF_INITED;
983
984 /* Detach the disk. */
985 disk_detach(&rs->sc_dkdev);
986
987 raidunlock(rs);
988
989 return (retcode);
990 case RAIDFRAME_GET_COMPONENT_LABEL:
991 clabel_ptr = (RF_ComponentLabel_t **) data;
992 /* need to read the component label for the disk indicated
993 by row,column in clabel */
994
995 /* For practice, let's get it directly fromdisk, rather
996 than from the in-core copy */
997 RF_Malloc( clabel, sizeof( RF_ComponentLabel_t ),
998 (RF_ComponentLabel_t *));
999 if (clabel == NULL)
1000 return (ENOMEM);
1001
1002 memset((char *) clabel, 0, sizeof(RF_ComponentLabel_t));
1003
1004 retcode = copyin( *clabel_ptr, clabel,
1005 sizeof(RF_ComponentLabel_t));
1006
1007 if (retcode) {
1008 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
1009 return(retcode);
1010 }
1011
1012 row = clabel->row;
1013 column = clabel->column;
1014
1015 if ((row < 0) || (row >= raidPtr->numRow) ||
1016 (column < 0) || (column >= raidPtr->numCol +
1017 raidPtr->numSpare)) {
1018 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
1019 return(EINVAL);
1020 }
1021
1022 raidread_component_label(raidPtr->Disks[row][column].dev,
1023 raidPtr->raid_cinfo[row][column].ci_vp,
1024 clabel );
1025
1026 retcode = copyout((caddr_t) clabel,
1027 (caddr_t) *clabel_ptr,
1028 sizeof(RF_ComponentLabel_t));
1029 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
1030 return (retcode);
1031
1032 case RAIDFRAME_SET_COMPONENT_LABEL:
1033 clabel = (RF_ComponentLabel_t *) data;
1034
1035 /* XXX check the label for valid stuff... */
1036 /* Note that some things *should not* get modified --
1037 the user should be re-initing the labels instead of
1038 trying to patch things.
1039 */
1040
1041 printf("Got component label:\n");
1042 printf("Version: %d\n",clabel->version);
1043 printf("Serial Number: %d\n",clabel->serial_number);
1044 printf("Mod counter: %d\n",clabel->mod_counter);
1045 printf("Row: %d\n", clabel->row);
1046 printf("Column: %d\n", clabel->column);
1047 printf("Num Rows: %d\n", clabel->num_rows);
1048 printf("Num Columns: %d\n", clabel->num_columns);
1049 printf("Clean: %d\n", clabel->clean);
1050 printf("Status: %d\n", clabel->status);
1051
1052 row = clabel->row;
1053 column = clabel->column;
1054
1055 if ((row < 0) || (row >= raidPtr->numRow) ||
1056 (column < 0) || (column >= raidPtr->numCol)) {
1057 return(EINVAL);
1058 }
1059
1060 /* XXX this isn't allowed to do anything for now :-) */
1061
1062 /* XXX and before it is, we need to fill in the rest
1063 of the fields!?!?!?! */
1064 #if 0
1065 raidwrite_component_label(
1066 raidPtr->Disks[row][column].dev,
1067 raidPtr->raid_cinfo[row][column].ci_vp,
1068 clabel );
1069 #endif
1070 return (0);
1071
1072 case RAIDFRAME_INIT_LABELS:
1073 clabel = (RF_ComponentLabel_t *) data;
1074 /*
1075 we only want the serial number from
1076 the above. We get all the rest of the information
1077 from the config that was used to create this RAID
1078 set.
1079 */
1080
1081 raidPtr->serial_number = clabel->serial_number;
1082
1083 raid_init_component_label(raidPtr, &ci_label);
1084 ci_label.serial_number = clabel->serial_number;
1085
1086 for(row=0;row<raidPtr->numRow;row++) {
1087 ci_label.row = row;
1088 for(column=0;column<raidPtr->numCol;column++) {
1089 diskPtr = &raidPtr->Disks[row][column];
1090 if (!RF_DEAD_DISK(diskPtr->status)) {
1091 ci_label.partitionSize = diskPtr->partitionSize;
1092 ci_label.column = column;
1093 raidwrite_component_label(
1094 raidPtr->Disks[row][column].dev,
1095 raidPtr->raid_cinfo[row][column].ci_vp,
1096 &ci_label );
1097 }
1098 }
1099 }
1100
1101 return (retcode);
1102 case RAIDFRAME_SET_AUTOCONFIG:
1103 d = rf_set_autoconfig(raidPtr, *(int *) data);
1104 printf("New autoconfig value is: %d\n", d);
1105 *(int *) data = d;
1106 return (retcode);
1107
1108 case RAIDFRAME_SET_ROOT:
1109 d = rf_set_rootpartition(raidPtr, *(int *) data);
1110 printf("New rootpartition value is: %d\n", d);
1111 *(int *) data = d;
1112 return (retcode);
1113
1114 /* initialize all parity */
1115 case RAIDFRAME_REWRITEPARITY:
1116
1117 if (raidPtr->Layout.map->faultsTolerated == 0) {
1118 /* Parity for RAID 0 is trivially correct */
1119 raidPtr->parity_good = RF_RAID_CLEAN;
1120 return(0);
1121 }
1122
1123 if (raidPtr->parity_rewrite_in_progress == 1) {
1124 /* Re-write is already in progress! */
1125 return(EINVAL);
1126 }
1127
1128 retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
1129 rf_RewriteParityThread,
1130 raidPtr,"raid_parity");
1131 return (retcode);
1132
1133
1134 case RAIDFRAME_ADD_HOT_SPARE:
1135 sparePtr = (RF_SingleComponent_t *) data;
1136 memcpy( &hot_spare, sparePtr, sizeof(RF_SingleComponent_t));
1137 retcode = rf_add_hot_spare(raidPtr, &hot_spare);
1138 return(retcode);
1139
1140 case RAIDFRAME_REMOVE_HOT_SPARE:
1141 return(retcode);
1142
1143 case RAIDFRAME_DELETE_COMPONENT:
1144 componentPtr = (RF_SingleComponent_t *)data;
1145 memcpy( &component, componentPtr,
1146 sizeof(RF_SingleComponent_t));
1147 retcode = rf_delete_component(raidPtr, &component);
1148 return(retcode);
1149
1150 case RAIDFRAME_INCORPORATE_HOT_SPARE:
1151 componentPtr = (RF_SingleComponent_t *)data;
1152 memcpy( &component, componentPtr,
1153 sizeof(RF_SingleComponent_t));
1154 retcode = rf_incorporate_hot_spare(raidPtr, &component);
1155 return(retcode);
1156
1157 case RAIDFRAME_REBUILD_IN_PLACE:
1158
1159 if (raidPtr->Layout.map->faultsTolerated == 0) {
1160 /* Can't do this on a RAID 0!! */
1161 return(EINVAL);
1162 }
1163
1164 if (raidPtr->recon_in_progress == 1) {
1165 /* a reconstruct is already in progress! */
1166 return(EINVAL);
1167 }
1168
1169 componentPtr = (RF_SingleComponent_t *) data;
1170 memcpy( &component, componentPtr,
1171 sizeof(RF_SingleComponent_t));
1172 row = component.row;
1173 column = component.column;
1174 printf("Rebuild: %d %d\n",row, column);
1175 if ((row < 0) || (row >= raidPtr->numRow) ||
1176 (column < 0) || (column >= raidPtr->numCol)) {
1177 return(EINVAL);
1178 }
1179
1180 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1181 if (rrcopy == NULL)
1182 return(ENOMEM);
1183
1184 rrcopy->raidPtr = (void *) raidPtr;
1185 rrcopy->row = row;
1186 rrcopy->col = column;
1187
1188 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1189 rf_ReconstructInPlaceThread,
1190 rrcopy,"raid_reconip");
1191 return(retcode);
1192
1193 case RAIDFRAME_GET_INFO:
1194 if (!raidPtr->valid)
1195 return (ENODEV);
1196 ucfgp = (RF_DeviceConfig_t **) data;
1197 RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t),
1198 (RF_DeviceConfig_t *));
1199 if (d_cfg == NULL)
1200 return (ENOMEM);
1201 memset((char *) d_cfg, 0, sizeof(RF_DeviceConfig_t));
1202 d_cfg->rows = raidPtr->numRow;
1203 d_cfg->cols = raidPtr->numCol;
1204 d_cfg->ndevs = raidPtr->numRow * raidPtr->numCol;
1205 if (d_cfg->ndevs >= RF_MAX_DISKS) {
1206 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1207 return (ENOMEM);
1208 }
1209 d_cfg->nspares = raidPtr->numSpare;
1210 if (d_cfg->nspares >= RF_MAX_DISKS) {
1211 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1212 return (ENOMEM);
1213 }
1214 d_cfg->maxqdepth = raidPtr->maxQueueDepth;
1215 d = 0;
1216 for (i = 0; i < d_cfg->rows; i++) {
1217 for (j = 0; j < d_cfg->cols; j++) {
1218 d_cfg->devs[d] = raidPtr->Disks[i][j];
1219 d++;
1220 }
1221 }
1222 for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) {
1223 d_cfg->spares[i] = raidPtr->Disks[0][j];
1224 }
1225 retcode = copyout((caddr_t) d_cfg, (caddr_t) * ucfgp,
1226 sizeof(RF_DeviceConfig_t));
1227 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1228
1229 return (retcode);
1230
1231 case RAIDFRAME_CHECK_PARITY:
1232 *(int *) data = raidPtr->parity_good;
1233 return (0);
1234
1235 case RAIDFRAME_RESET_ACCTOTALS:
1236 memset(&raidPtr->acc_totals, 0, sizeof(raidPtr->acc_totals));
1237 return (0);
1238
1239 case RAIDFRAME_GET_ACCTOTALS:
1240 totals = (RF_AccTotals_t *) data;
1241 *totals = raidPtr->acc_totals;
1242 return (0);
1243
1244 case RAIDFRAME_KEEP_ACCTOTALS:
1245 raidPtr->keep_acc_totals = *(int *)data;
1246 return (0);
1247
1248 case RAIDFRAME_GET_SIZE:
1249 *(int *) data = raidPtr->totalSectors;
1250 return (0);
1251
1252 /* fail a disk & optionally start reconstruction */
1253 case RAIDFRAME_FAIL_DISK:
1254
1255 if (raidPtr->Layout.map->faultsTolerated == 0) {
1256 /* Can't do this on a RAID 0!! */
1257 return(EINVAL);
1258 }
1259
1260 rr = (struct rf_recon_req *) data;
1261
1262 if (rr->row < 0 || rr->row >= raidPtr->numRow
1263 || rr->col < 0 || rr->col >= raidPtr->numCol)
1264 return (EINVAL);
1265
1266 printf("raid%d: Failing the disk: row: %d col: %d\n",
1267 unit, rr->row, rr->col);
1268
1269 /* make a copy of the recon request so that we don't rely on
1270 * the user's buffer */
1271 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1272 if (rrcopy == NULL)
1273 return(ENOMEM);
1274 bcopy(rr, rrcopy, sizeof(*rr));
1275 rrcopy->raidPtr = (void *) raidPtr;
1276
1277 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1278 rf_ReconThread,
1279 rrcopy,"raid_recon");
1280 return (0);
1281
1282 /* invoke a copyback operation after recon on whatever disk
1283 * needs it, if any */
1284 case RAIDFRAME_COPYBACK:
1285
1286 if (raidPtr->Layout.map->faultsTolerated == 0) {
1287 /* This makes no sense on a RAID 0!! */
1288 return(EINVAL);
1289 }
1290
1291 if (raidPtr->copyback_in_progress == 1) {
1292 /* Copyback is already in progress! */
1293 return(EINVAL);
1294 }
1295
1296 retcode = RF_CREATE_THREAD(raidPtr->copyback_thread,
1297 rf_CopybackThread,
1298 raidPtr,"raid_copyback");
1299 return (retcode);
1300
1301 /* return the percentage completion of reconstruction */
1302 case RAIDFRAME_CHECK_RECON_STATUS:
1303 if (raidPtr->Layout.map->faultsTolerated == 0) {
1304 /* This makes no sense on a RAID 0, so tell the
1305 user it's done. */
1306 *(int *) data = 100;
1307 return(0);
1308 }
1309 row = 0; /* XXX we only consider a single row... */
1310 if (raidPtr->status[row] != rf_rs_reconstructing)
1311 *(int *) data = 100;
1312 else
1313 *(int *) data = raidPtr->reconControl[row]->percentComplete;
1314 return (0);
1315 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
1316 progressInfoPtr = (RF_ProgressInfo_t **) data;
1317 row = 0; /* XXX we only consider a single row... */
1318 if (raidPtr->status[row] != rf_rs_reconstructing) {
1319 progressInfo.remaining = 0;
1320 progressInfo.completed = 100;
1321 progressInfo.total = 100;
1322 } else {
1323 progressInfo.total =
1324 raidPtr->reconControl[row]->numRUsTotal;
1325 progressInfo.completed =
1326 raidPtr->reconControl[row]->numRUsComplete;
1327 progressInfo.remaining = progressInfo.total -
1328 progressInfo.completed;
1329 }
1330 retcode = copyout((caddr_t) &progressInfo,
1331 (caddr_t) *progressInfoPtr,
1332 sizeof(RF_ProgressInfo_t));
1333 return (retcode);
1334
1335 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1336 if (raidPtr->Layout.map->faultsTolerated == 0) {
1337 /* This makes no sense on a RAID 0, so tell the
1338 user it's done. */
1339 *(int *) data = 100;
1340 return(0);
1341 }
1342 if (raidPtr->parity_rewrite_in_progress == 1) {
1343 *(int *) data = 100 *
1344 raidPtr->parity_rewrite_stripes_done /
1345 raidPtr->Layout.numStripe;
1346 } else {
1347 *(int *) data = 100;
1348 }
1349 return (0);
1350
1351 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
1352 progressInfoPtr = (RF_ProgressInfo_t **) data;
1353 if (raidPtr->parity_rewrite_in_progress == 1) {
1354 progressInfo.total = raidPtr->Layout.numStripe;
1355 progressInfo.completed =
1356 raidPtr->parity_rewrite_stripes_done;
1357 progressInfo.remaining = progressInfo.total -
1358 progressInfo.completed;
1359 } else {
1360 progressInfo.remaining = 0;
1361 progressInfo.completed = 100;
1362 progressInfo.total = 100;
1363 }
1364 retcode = copyout((caddr_t) &progressInfo,
1365 (caddr_t) *progressInfoPtr,
1366 sizeof(RF_ProgressInfo_t));
1367 return (retcode);
1368
1369 case RAIDFRAME_CHECK_COPYBACK_STATUS:
1370 if (raidPtr->Layout.map->faultsTolerated == 0) {
1371 /* This makes no sense on a RAID 0 */
1372 *(int *) data = 100;
1373 return(0);
1374 }
1375 if (raidPtr->copyback_in_progress == 1) {
1376 *(int *) data = 100 * raidPtr->copyback_stripes_done /
1377 raidPtr->Layout.numStripe;
1378 } else {
1379 *(int *) data = 100;
1380 }
1381 return (0);
1382
1383 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
1384 progressInfoPtr = (RF_ProgressInfo_t **) data;
1385 if (raidPtr->copyback_in_progress == 1) {
1386 progressInfo.total = raidPtr->Layout.numStripe;
1387 progressInfo.completed =
1388 raidPtr->copyback_stripes_done;
1389 progressInfo.remaining = progressInfo.total -
1390 progressInfo.completed;
1391 } else {
1392 progressInfo.remaining = 0;
1393 progressInfo.completed = 100;
1394 progressInfo.total = 100;
1395 }
1396 retcode = copyout((caddr_t) &progressInfo,
1397 (caddr_t) *progressInfoPtr,
1398 sizeof(RF_ProgressInfo_t));
1399 return (retcode);
1400
1401 /* the sparetable daemon calls this to wait for the kernel to
1402 * need a spare table. this ioctl does not return until a
1403 * spare table is needed. XXX -- calling mpsleep here in the
1404 * ioctl code is almost certainly wrong and evil. -- XXX XXX
1405 * -- I should either compute the spare table in the kernel,
1406 * or have a different -- XXX XXX -- interface (a different
1407 * character device) for delivering the table -- XXX */
1408 #if 0
1409 case RAIDFRAME_SPARET_WAIT:
1410 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1411 while (!rf_sparet_wait_queue)
1412 mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE);
1413 waitreq = rf_sparet_wait_queue;
1414 rf_sparet_wait_queue = rf_sparet_wait_queue->next;
1415 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1416
1417 /* structure assignment */
1418 *((RF_SparetWait_t *) data) = *waitreq;
1419
1420 RF_Free(waitreq, sizeof(*waitreq));
1421 return (0);
1422
1423 /* wakes up a process waiting on SPARET_WAIT and puts an error
1424 * code in it that will cause the dameon to exit */
1425 case RAIDFRAME_ABORT_SPARET_WAIT:
1426 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1427 waitreq->fcol = -1;
1428 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1429 waitreq->next = rf_sparet_wait_queue;
1430 rf_sparet_wait_queue = waitreq;
1431 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1432 wakeup(&rf_sparet_wait_queue);
1433 return (0);
1434
1435 /* used by the spare table daemon to deliver a spare table
1436 * into the kernel */
1437 case RAIDFRAME_SEND_SPARET:
1438
1439 /* install the spare table */
1440 retcode = rf_SetSpareTable(raidPtr, *(void **) data);
1441
1442 /* respond to the requestor. the return status of the spare
1443 * table installation is passed in the "fcol" field */
1444 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1445 waitreq->fcol = retcode;
1446 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1447 waitreq->next = rf_sparet_resp_queue;
1448 rf_sparet_resp_queue = waitreq;
1449 wakeup(&rf_sparet_resp_queue);
1450 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1451
1452 return (retcode);
1453 #endif
1454
1455 default:
1456 break; /* fall through to the os-specific code below */
1457
1458 }
1459
1460 if (!raidPtr->valid)
1461 return (EINVAL);
1462
1463 /*
1464 * Add support for "regular" device ioctls here.
1465 */
1466
1467 switch (cmd) {
1468 case DIOCGDINFO:
1469 *(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
1470 break;
1471 #ifdef __HAVE_OLD_DISKLABEL
1472 case ODIOCGDINFO:
1473 newlabel = *(rs->sc_dkdev.dk_label);
1474 if (newlabel.d_npartitions > OLDMAXPARTITIONS)
1475 return ENOTTY;
1476 memcpy(data, &newlabel, sizeof (struct olddisklabel));
1477 break;
1478 #endif
1479
1480 case DIOCGPART:
1481 ((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
1482 ((struct partinfo *) data)->part =
1483 &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
1484 break;
1485
1486 case DIOCWDINFO:
1487 case DIOCSDINFO:
1488 #ifdef __HAVE_OLD_DISKLABEL
1489 case ODIOCWDINFO:
1490 case ODIOCSDINFO:
1491 #endif
1492 {
1493 struct disklabel *lp;
1494 #ifdef __HAVE_OLD_DISKLABEL
1495 if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) {
1496 memset(&newlabel, 0, sizeof newlabel);
1497 memcpy(&newlabel, data, sizeof (struct olddisklabel));
1498 lp = &newlabel;
1499 } else
1500 #endif
1501 lp = (struct disklabel *)data;
1502
1503 if ((error = raidlock(rs)) != 0)
1504 return (error);
1505
1506 rs->sc_flags |= RAIDF_LABELLING;
1507
1508 error = setdisklabel(rs->sc_dkdev.dk_label,
1509 lp, 0, rs->sc_dkdev.dk_cpulabel);
1510 if (error == 0) {
1511 if (cmd == DIOCWDINFO
1512 #ifdef __HAVE_OLD_DISKLABEL
1513 || cmd == ODIOCWDINFO
1514 #endif
1515 )
1516 error = writedisklabel(RAIDLABELDEV(dev),
1517 raidstrategy, rs->sc_dkdev.dk_label,
1518 rs->sc_dkdev.dk_cpulabel);
1519 }
1520 rs->sc_flags &= ~RAIDF_LABELLING;
1521
1522 raidunlock(rs);
1523
1524 if (error)
1525 return (error);
1526 break;
1527 }
1528
1529 case DIOCWLABEL:
1530 if (*(int *) data != 0)
1531 rs->sc_flags |= RAIDF_WLABEL;
1532 else
1533 rs->sc_flags &= ~RAIDF_WLABEL;
1534 break;
1535
1536 case DIOCGDEFLABEL:
1537 raidgetdefaultlabel(raidPtr, rs, (struct disklabel *) data);
1538 break;
1539
1540 #ifdef __HAVE_OLD_DISKLABEL
1541 case ODIOCGDEFLABEL:
1542 raidgetdefaultlabel(raidPtr, rs, &newlabel);
1543 if (newlabel.d_npartitions > OLDMAXPARTITIONS)
1544 return ENOTTY;
1545 memcpy(data, &newlabel, sizeof (struct olddisklabel));
1546 break;
1547 #endif
1548
1549 default:
1550 retcode = ENOTTY;
1551 }
1552 return (retcode);
1553
1554 }
1555
1556
1557 /* raidinit -- complete the rest of the initialization for the
1558 RAIDframe device. */
1559
1560
1561 static void
1562 raidinit(raidPtr)
1563 RF_Raid_t *raidPtr;
1564 {
1565 struct raid_softc *rs;
1566 int unit;
1567
1568 unit = raidPtr->raidid;
1569
1570 rs = &raid_softc[unit];
1571
1572 /* XXX should check return code first... */
1573 rs->sc_flags |= RAIDF_INITED;
1574
1575 sprintf(rs->sc_xname, "raid%d", unit); /* XXX doesn't check bounds. */
1576
1577 rs->sc_dkdev.dk_name = rs->sc_xname;
1578
1579 /* disk_attach actually creates space for the CPU disklabel, among
1580 * other things, so it's critical to call this *BEFORE* we try putzing
1581 * with disklabels. */
1582
1583 disk_attach(&rs->sc_dkdev);
1584
1585 /* XXX There may be a weird interaction here between this, and
1586 * protectedSectors, as used in RAIDframe. */
1587
1588 rs->sc_size = raidPtr->totalSectors;
1589
1590 }
1591
1592 /* wake up the daemon & tell it to get us a spare table
1593 * XXX
1594 * the entries in the queues should be tagged with the raidPtr
1595 * so that in the extremely rare case that two recons happen at once,
1596 * we know for which device were requesting a spare table
1597 * XXX
1598 *
1599 * XXX This code is not currently used. GO
1600 */
1601 int
1602 rf_GetSpareTableFromDaemon(req)
1603 RF_SparetWait_t *req;
1604 {
1605 int retcode;
1606
1607 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1608 req->next = rf_sparet_wait_queue;
1609 rf_sparet_wait_queue = req;
1610 wakeup(&rf_sparet_wait_queue);
1611
1612 /* mpsleep unlocks the mutex */
1613 while (!rf_sparet_resp_queue) {
1614 tsleep(&rf_sparet_resp_queue, PRIBIO,
1615 "raidframe getsparetable", 0);
1616 }
1617 req = rf_sparet_resp_queue;
1618 rf_sparet_resp_queue = req->next;
1619 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1620
1621 retcode = req->fcol;
1622 RF_Free(req, sizeof(*req)); /* this is not the same req as we
1623 * alloc'd */
1624 return (retcode);
1625 }
1626
1627 /* a wrapper around rf_DoAccess that extracts appropriate info from the
1628 * bp & passes it down.
1629 * any calls originating in the kernel must use non-blocking I/O
1630 * do some extra sanity checking to return "appropriate" error values for
1631 * certain conditions (to make some standard utilities work)
1632 *
1633 * Formerly known as: rf_DoAccessKernel
1634 */
1635 void
1636 raidstart(raidPtr)
1637 RF_Raid_t *raidPtr;
1638 {
1639 RF_SectorCount_t num_blocks, pb, sum;
1640 RF_RaidAddr_t raid_addr;
1641 int retcode;
1642 struct partition *pp;
1643 daddr_t blocknum;
1644 int unit;
1645 struct raid_softc *rs;
1646 int do_async;
1647 struct buf *bp;
1648
1649 unit = raidPtr->raidid;
1650 rs = &raid_softc[unit];
1651
1652 /* quick check to see if anything has died recently */
1653 RF_LOCK_MUTEX(raidPtr->mutex);
1654 if (raidPtr->numNewFailures > 0) {
1655 rf_update_component_labels(raidPtr,
1656 RF_NORMAL_COMPONENT_UPDATE);
1657 raidPtr->numNewFailures--;
1658 }
1659 RF_UNLOCK_MUTEX(raidPtr->mutex);
1660
1661 /* Check to see if we're at the limit... */
1662 RF_LOCK_MUTEX(raidPtr->mutex);
1663 while (raidPtr->openings > 0) {
1664 RF_UNLOCK_MUTEX(raidPtr->mutex);
1665
1666 /* get the next item, if any, from the queue */
1667 if ((bp = BUFQ_FIRST(&rs->buf_queue)) == NULL) {
1668 /* nothing more to do */
1669 return;
1670 }
1671 BUFQ_REMOVE(&rs->buf_queue, bp);
1672
1673 /* Ok, for the bp we have here, bp->b_blkno is relative to the
1674 * partition.. Need to make it absolute to the underlying
1675 * device.. */
1676
1677 blocknum = bp->b_blkno;
1678 if (DISKPART(bp->b_dev) != RAW_PART) {
1679 pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
1680 blocknum += pp->p_offset;
1681 }
1682
1683 db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
1684 (int) blocknum));
1685
1686 db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
1687 db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
1688
1689 /* *THIS* is where we adjust what block we're going to...
1690 * but DO NOT TOUCH bp->b_blkno!!! */
1691 raid_addr = blocknum;
1692
1693 num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
1694 pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
1695 sum = raid_addr + num_blocks + pb;
1696 if (1 || rf_debugKernelAccess) {
1697 db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
1698 (int) raid_addr, (int) sum, (int) num_blocks,
1699 (int) pb, (int) bp->b_resid));
1700 }
1701 if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
1702 || (sum < num_blocks) || (sum < pb)) {
1703 bp->b_error = ENOSPC;
1704 bp->b_flags |= B_ERROR;
1705 bp->b_resid = bp->b_bcount;
1706 biodone(bp);
1707 RF_LOCK_MUTEX(raidPtr->mutex);
1708 continue;
1709 }
1710 /*
1711 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
1712 */
1713
1714 if (bp->b_bcount & raidPtr->sectorMask) {
1715 bp->b_error = EINVAL;
1716 bp->b_flags |= B_ERROR;
1717 bp->b_resid = bp->b_bcount;
1718 biodone(bp);
1719 RF_LOCK_MUTEX(raidPtr->mutex);
1720 continue;
1721
1722 }
1723 db1_printf(("Calling DoAccess..\n"));
1724
1725
1726 RF_LOCK_MUTEX(raidPtr->mutex);
1727 raidPtr->openings--;
1728 RF_UNLOCK_MUTEX(raidPtr->mutex);
1729
1730 /*
1731 * Everything is async.
1732 */
1733 do_async = 1;
1734
1735 disk_busy(&rs->sc_dkdev);
1736
1737 /* XXX we're still at splbio() here... do we *really*
1738 need to be? */
1739
1740 /* don't ever condition on bp->b_flags & B_WRITE.
1741 * always condition on B_READ instead */
1742
1743 retcode = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
1744 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
1745 do_async, raid_addr, num_blocks,
1746 bp->b_data, bp, RF_DAG_NONBLOCKING_IO);
1747
1748 RF_LOCK_MUTEX(raidPtr->mutex);
1749 }
1750 RF_UNLOCK_MUTEX(raidPtr->mutex);
1751 }
1752
1753
1754
1755
1756 /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */
1757
1758 int
1759 rf_DispatchKernelIO(queue, req)
1760 RF_DiskQueue_t *queue;
1761 RF_DiskQueueData_t *req;
1762 {
1763 int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
1764 struct buf *bp;
1765 struct raidbuf *raidbp = NULL;
1766 struct raid_softc *rs;
1767 int unit;
1768 int s;
1769
1770 s=0;
1771 /* s = splbio();*/ /* want to test this */
1772 /* XXX along with the vnode, we also need the softc associated with
1773 * this device.. */
1774
1775 req->queue = queue;
1776
1777 unit = queue->raidPtr->raidid;
1778
1779 db1_printf(("DispatchKernelIO unit: %d\n", unit));
1780
1781 if (unit >= numraid) {
1782 printf("Invalid unit number: %d %d\n", unit, numraid);
1783 panic("Invalid Unit number in rf_DispatchKernelIO\n");
1784 }
1785 rs = &raid_softc[unit];
1786
1787 bp = req->bp;
1788 #if 1
1789 /* XXX when there is a physical disk failure, someone is passing us a
1790 * buffer that contains old stuff!! Attempt to deal with this problem
1791 * without taking a performance hit... (not sure where the real bug
1792 * is. It's buried in RAIDframe somewhere) :-( GO ) */
1793
1794 if (bp->b_flags & B_ERROR) {
1795 bp->b_flags &= ~B_ERROR;
1796 }
1797 if (bp->b_error != 0) {
1798 bp->b_error = 0;
1799 }
1800 #endif
1801 raidbp = RAIDGETBUF(rs);
1802
1803 raidbp->rf_flags = 0; /* XXX not really used anywhere... */
1804
1805 /*
1806 * context for raidiodone
1807 */
1808 raidbp->rf_obp = bp;
1809 raidbp->req = req;
1810
1811 LIST_INIT(&raidbp->rf_buf.b_dep);
1812
1813 switch (req->type) {
1814 case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */
1815 /* XXX need to do something extra here.. */
1816 /* I'm leaving this in, as I've never actually seen it used,
1817 * and I'd like folks to report it... GO */
1818 printf(("WAKEUP CALLED\n"));
1819 queue->numOutstanding++;
1820
1821 /* XXX need to glue the original buffer into this?? */
1822
1823 KernelWakeupFunc(&raidbp->rf_buf);
1824 break;
1825
1826 case RF_IO_TYPE_READ:
1827 case RF_IO_TYPE_WRITE:
1828
1829 if (req->tracerec) {
1830 RF_ETIMER_START(req->tracerec->timer);
1831 }
1832 InitBP(&raidbp->rf_buf, queue->rf_cinfo->ci_vp,
1833 op | bp->b_flags, queue->rf_cinfo->ci_dev,
1834 req->sectorOffset, req->numSector,
1835 req->buf, KernelWakeupFunc, (void *) req,
1836 queue->raidPtr->logBytesPerSector, req->b_proc);
1837
1838 if (rf_debugKernelAccess) {
1839 db1_printf(("dispatch: bp->b_blkno = %ld\n",
1840 (long) bp->b_blkno));
1841 }
1842 queue->numOutstanding++;
1843 queue->last_deq_sector = req->sectorOffset;
1844 /* acc wouldn't have been let in if there were any pending
1845 * reqs at any other priority */
1846 queue->curPriority = req->priority;
1847
1848 db1_printf(("Going for %c to unit %d row %d col %d\n",
1849 req->type, unit, queue->row, queue->col));
1850 db1_printf(("sector %d count %d (%d bytes) %d\n",
1851 (int) req->sectorOffset, (int) req->numSector,
1852 (int) (req->numSector <<
1853 queue->raidPtr->logBytesPerSector),
1854 (int) queue->raidPtr->logBytesPerSector));
1855 if ((raidbp->rf_buf.b_flags & B_READ) == 0) {
1856 raidbp->rf_buf.b_vp->v_numoutput++;
1857 }
1858 VOP_STRATEGY(&raidbp->rf_buf);
1859
1860 break;
1861
1862 default:
1863 panic("bad req->type in rf_DispatchKernelIO");
1864 }
1865 db1_printf(("Exiting from DispatchKernelIO\n"));
1866 /* splx(s); */ /* want to test this */
1867 return (0);
1868 }
1869 /* this is the callback function associated with a I/O invoked from
1870 kernel code.
1871 */
1872 static void
1873 KernelWakeupFunc(vbp)
1874 struct buf *vbp;
1875 {
1876 RF_DiskQueueData_t *req = NULL;
1877 RF_DiskQueue_t *queue;
1878 struct raidbuf *raidbp = (struct raidbuf *) vbp;
1879 struct buf *bp;
1880 struct raid_softc *rs;
1881 int unit;
1882 int s;
1883
1884 s = splbio();
1885 db1_printf(("recovering the request queue:\n"));
1886 req = raidbp->req;
1887
1888 bp = raidbp->rf_obp;
1889
1890 queue = (RF_DiskQueue_t *) req->queue;
1891
1892 if (raidbp->rf_buf.b_flags & B_ERROR) {
1893 bp->b_flags |= B_ERROR;
1894 bp->b_error = raidbp->rf_buf.b_error ?
1895 raidbp->rf_buf.b_error : EIO;
1896 }
1897
1898 /* XXX methinks this could be wrong... */
1899 #if 1
1900 bp->b_resid = raidbp->rf_buf.b_resid;
1901 #endif
1902
1903 if (req->tracerec) {
1904 RF_ETIMER_STOP(req->tracerec->timer);
1905 RF_ETIMER_EVAL(req->tracerec->timer);
1906 RF_LOCK_MUTEX(rf_tracing_mutex);
1907 req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1908 req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1909 req->tracerec->num_phys_ios++;
1910 RF_UNLOCK_MUTEX(rf_tracing_mutex);
1911 }
1912 bp->b_bcount = raidbp->rf_buf.b_bcount; /* XXXX ?? */
1913
1914 unit = queue->raidPtr->raidid; /* *Much* simpler :-> */
1915
1916
1917 /* XXX Ok, let's get aggressive... If B_ERROR is set, let's go
1918 * ballistic, and mark the component as hosed... */
1919
1920 if (bp->b_flags & B_ERROR) {
1921 /* Mark the disk as dead */
1922 /* but only mark it once... */
1923 if (queue->raidPtr->Disks[queue->row][queue->col].status ==
1924 rf_ds_optimal) {
1925 printf("raid%d: IO Error. Marking %s as failed.\n",
1926 unit, queue->raidPtr->Disks[queue->row][queue->col].devname);
1927 queue->raidPtr->Disks[queue->row][queue->col].status =
1928 rf_ds_failed;
1929 queue->raidPtr->status[queue->row] = rf_rs_degraded;
1930 queue->raidPtr->numFailures++;
1931 queue->raidPtr->numNewFailures++;
1932 } else { /* Disk is already dead... */
1933 /* printf("Disk already marked as dead!\n"); */
1934 }
1935
1936 }
1937
1938 rs = &raid_softc[unit];
1939 RAIDPUTBUF(rs, raidbp);
1940
1941 rf_DiskIOComplete(queue, req, (bp->b_flags & B_ERROR) ? 1 : 0);
1942 (req->CompleteFunc) (req->argument, (bp->b_flags & B_ERROR) ? 1 : 0);
1943
1944 splx(s);
1945 }
1946
1947
1948
1949 /*
1950 * initialize a buf structure for doing an I/O in the kernel.
1951 */
1952 static void
1953 InitBP(bp, b_vp, rw_flag, dev, startSect, numSect, buf, cbFunc, cbArg,
1954 logBytesPerSector, b_proc)
1955 struct buf *bp;
1956 struct vnode *b_vp;
1957 unsigned rw_flag;
1958 dev_t dev;
1959 RF_SectorNum_t startSect;
1960 RF_SectorCount_t numSect;
1961 caddr_t buf;
1962 void (*cbFunc) (struct buf *);
1963 void *cbArg;
1964 int logBytesPerSector;
1965 struct proc *b_proc;
1966 {
1967 /* bp->b_flags = B_PHYS | rw_flag; */
1968 bp->b_flags = B_CALL | rw_flag; /* XXX need B_PHYS here too??? */
1969 bp->b_bcount = numSect << logBytesPerSector;
1970 bp->b_bufsize = bp->b_bcount;
1971 bp->b_error = 0;
1972 bp->b_dev = dev;
1973 bp->b_data = buf;
1974 bp->b_blkno = startSect;
1975 bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */
1976 if (bp->b_bcount == 0) {
1977 panic("bp->b_bcount is zero in InitBP!!\n");
1978 }
1979 bp->b_proc = b_proc;
1980 bp->b_iodone = cbFunc;
1981 bp->b_vp = b_vp;
1982
1983 }
1984
1985 static void
1986 raidgetdefaultlabel(raidPtr, rs, lp)
1987 RF_Raid_t *raidPtr;
1988 struct raid_softc *rs;
1989 struct disklabel *lp;
1990 {
1991 db1_printf(("Building a default label...\n"));
1992 memset(lp, 0, sizeof(*lp));
1993
1994 /* fabricate a label... */
1995 lp->d_secperunit = raidPtr->totalSectors;
1996 lp->d_secsize = raidPtr->bytesPerSector;
1997 lp->d_nsectors = raidPtr->Layout.dataSectorsPerStripe;
1998 lp->d_ntracks = 4 * raidPtr->numCol;
1999 lp->d_ncylinders = raidPtr->totalSectors /
2000 (lp->d_nsectors * lp->d_ntracks);
2001 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
2002
2003 strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
2004 lp->d_type = DTYPE_RAID;
2005 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
2006 lp->d_rpm = 3600;
2007 lp->d_interleave = 1;
2008 lp->d_flags = 0;
2009
2010 lp->d_partitions[RAW_PART].p_offset = 0;
2011 lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors;
2012 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
2013 lp->d_npartitions = RAW_PART + 1;
2014
2015 lp->d_magic = DISKMAGIC;
2016 lp->d_magic2 = DISKMAGIC;
2017 lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
2018
2019 }
2020 /*
2021 * Read the disklabel from the raid device. If one is not present, fake one
2022 * up.
2023 */
2024 static void
2025 raidgetdisklabel(dev)
2026 dev_t dev;
2027 {
2028 int unit = raidunit(dev);
2029 struct raid_softc *rs = &raid_softc[unit];
2030 char *errstring;
2031 struct disklabel *lp = rs->sc_dkdev.dk_label;
2032 struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel;
2033 RF_Raid_t *raidPtr;
2034
2035 db1_printf(("Getting the disklabel...\n"));
2036
2037 memset(clp, 0, sizeof(*clp));
2038
2039 raidPtr = raidPtrs[unit];
2040
2041 raidgetdefaultlabel(raidPtr, rs, lp);
2042
2043 /*
2044 * Call the generic disklabel extraction routine.
2045 */
2046 errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
2047 rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
2048 if (errstring)
2049 raidmakedisklabel(rs);
2050 else {
2051 int i;
2052 struct partition *pp;
2053
2054 /*
2055 * Sanity check whether the found disklabel is valid.
2056 *
2057 * This is necessary since total size of the raid device
2058 * may vary when an interleave is changed even though exactly
2059 * same componets are used, and old disklabel may used
2060 * if that is found.
2061 */
2062 if (lp->d_secperunit != rs->sc_size)
2063 printf("WARNING: %s: "
2064 "total sector size in disklabel (%d) != "
2065 "the size of raid (%ld)\n", rs->sc_xname,
2066 lp->d_secperunit, (long) rs->sc_size);
2067 for (i = 0; i < lp->d_npartitions; i++) {
2068 pp = &lp->d_partitions[i];
2069 if (pp->p_offset + pp->p_size > rs->sc_size)
2070 printf("WARNING: %s: end of partition `%c' "
2071 "exceeds the size of raid (%ld)\n",
2072 rs->sc_xname, 'a' + i, (long) rs->sc_size);
2073 }
2074 }
2075
2076 }
2077 /*
2078 * Take care of things one might want to take care of in the event
2079 * that a disklabel isn't present.
2080 */
2081 static void
2082 raidmakedisklabel(rs)
2083 struct raid_softc *rs;
2084 {
2085 struct disklabel *lp = rs->sc_dkdev.dk_label;
2086 db1_printf(("Making a label..\n"));
2087
2088 /*
2089 * For historical reasons, if there's no disklabel present
2090 * the raw partition must be marked FS_BSDFFS.
2091 */
2092
2093 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
2094
2095 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
2096
2097 lp->d_checksum = dkcksum(lp);
2098 }
2099 /*
2100 * Lookup the provided name in the filesystem. If the file exists,
2101 * is a valid block device, and isn't being used by anyone else,
2102 * set *vpp to the file's vnode.
2103 * You'll find the original of this in ccd.c
2104 */
2105 int
2106 raidlookup(path, p, vpp)
2107 char *path;
2108 struct proc *p;
2109 struct vnode **vpp; /* result */
2110 {
2111 struct nameidata nd;
2112 struct vnode *vp;
2113 struct vattr va;
2114 int error;
2115
2116 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, path, p);
2117 if ((error = vn_open(&nd, FREAD | FWRITE, 0)) != 0) {
2118 #ifdef DEBUG
2119 printf("RAIDframe: vn_open returned %d\n", error);
2120 #endif
2121 return (error);
2122 }
2123 vp = nd.ni_vp;
2124 if (vp->v_usecount > 1) {
2125 VOP_UNLOCK(vp, 0);
2126 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2127 return (EBUSY);
2128 }
2129 if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)) != 0) {
2130 VOP_UNLOCK(vp, 0);
2131 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2132 return (error);
2133 }
2134 /* XXX: eventually we should handle VREG, too. */
2135 if (va.va_type != VBLK) {
2136 VOP_UNLOCK(vp, 0);
2137 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2138 return (ENOTBLK);
2139 }
2140 VOP_UNLOCK(vp, 0);
2141 *vpp = vp;
2142 return (0);
2143 }
2144 /*
2145 * Wait interruptibly for an exclusive lock.
2146 *
2147 * XXX
2148 * Several drivers do this; it should be abstracted and made MP-safe.
2149 * (Hmm... where have we seen this warning before :-> GO )
2150 */
2151 static int
2152 raidlock(rs)
2153 struct raid_softc *rs;
2154 {
2155 int error;
2156
2157 while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
2158 rs->sc_flags |= RAIDF_WANTED;
2159 if ((error =
2160 tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
2161 return (error);
2162 }
2163 rs->sc_flags |= RAIDF_LOCKED;
2164 return (0);
2165 }
2166 /*
2167 * Unlock and wake up any waiters.
2168 */
2169 static void
2170 raidunlock(rs)
2171 struct raid_softc *rs;
2172 {
2173
2174 rs->sc_flags &= ~RAIDF_LOCKED;
2175 if ((rs->sc_flags & RAIDF_WANTED) != 0) {
2176 rs->sc_flags &= ~RAIDF_WANTED;
2177 wakeup(rs);
2178 }
2179 }
2180
2181
2182 #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */
2183 #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */
2184
2185 int
2186 raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter)
2187 {
2188 RF_ComponentLabel_t clabel;
2189 raidread_component_label(dev, b_vp, &clabel);
2190 clabel.mod_counter = mod_counter;
2191 clabel.clean = RF_RAID_CLEAN;
2192 raidwrite_component_label(dev, b_vp, &clabel);
2193 return(0);
2194 }
2195
2196
2197 int
2198 raidmarkdirty(dev_t dev, struct vnode *b_vp, int mod_counter)
2199 {
2200 RF_ComponentLabel_t clabel;
2201 raidread_component_label(dev, b_vp, &clabel);
2202 clabel.mod_counter = mod_counter;
2203 clabel.clean = RF_RAID_DIRTY;
2204 raidwrite_component_label(dev, b_vp, &clabel);
2205 return(0);
2206 }
2207
2208 /* ARGSUSED */
2209 int
2210 raidread_component_label(dev, b_vp, clabel)
2211 dev_t dev;
2212 struct vnode *b_vp;
2213 RF_ComponentLabel_t *clabel;
2214 {
2215 struct buf *bp;
2216 int error;
2217
2218 /* XXX should probably ensure that we don't try to do this if
2219 someone has changed rf_protected_sectors. */
2220
2221 if (b_vp == NULL) {
2222 /* For whatever reason, this component is not valid.
2223 Don't try to read a component label from it. */
2224 return(EINVAL);
2225 }
2226
2227 /* get a block of the appropriate size... */
2228 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2229 bp->b_dev = dev;
2230
2231 /* get our ducks in a row for the read */
2232 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2233 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2234 bp->b_flags |= B_READ;
2235 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2236
2237 (*bdevsw[major(bp->b_dev)].d_strategy)(bp);
2238
2239 error = biowait(bp);
2240
2241 if (!error) {
2242 memcpy(clabel, bp->b_data,
2243 sizeof(RF_ComponentLabel_t));
2244 #if 0
2245 rf_print_component_label( clabel );
2246 #endif
2247 } else {
2248 #if 0
2249 printf("Failed to read RAID component label!\n");
2250 #endif
2251 }
2252
2253 brelse(bp);
2254 return(error);
2255 }
2256 /* ARGSUSED */
2257 int
2258 raidwrite_component_label(dev, b_vp, clabel)
2259 dev_t dev;
2260 struct vnode *b_vp;
2261 RF_ComponentLabel_t *clabel;
2262 {
2263 struct buf *bp;
2264 int error;
2265
2266 /* get a block of the appropriate size... */
2267 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2268 bp->b_dev = dev;
2269
2270 /* get our ducks in a row for the write */
2271 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2272 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2273 bp->b_flags |= B_WRITE;
2274 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2275
2276 memset(bp->b_data, 0, RF_COMPONENT_INFO_SIZE );
2277
2278 memcpy(bp->b_data, clabel, sizeof(RF_ComponentLabel_t));
2279
2280 (*bdevsw[major(bp->b_dev)].d_strategy)(bp);
2281 error = biowait(bp);
2282 brelse(bp);
2283 if (error) {
2284 #if 1
2285 printf("Failed to write RAID component info!\n");
2286 #endif
2287 }
2288
2289 return(error);
2290 }
2291
2292 void
2293 rf_markalldirty(raidPtr)
2294 RF_Raid_t *raidPtr;
2295 {
2296 RF_ComponentLabel_t clabel;
2297 int r,c;
2298
2299 raidPtr->mod_counter++;
2300 for (r = 0; r < raidPtr->numRow; r++) {
2301 for (c = 0; c < raidPtr->numCol; c++) {
2302 /* we don't want to touch (at all) a disk that has
2303 failed */
2304 if (!RF_DEAD_DISK(raidPtr->Disks[r][c].status)) {
2305 raidread_component_label(
2306 raidPtr->Disks[r][c].dev,
2307 raidPtr->raid_cinfo[r][c].ci_vp,
2308 &clabel);
2309 if (clabel.status == rf_ds_spared) {
2310 /* XXX do something special...
2311 but whatever you do, don't
2312 try to access it!! */
2313 } else {
2314 #if 0
2315 clabel.status =
2316 raidPtr->Disks[r][c].status;
2317 raidwrite_component_label(
2318 raidPtr->Disks[r][c].dev,
2319 raidPtr->raid_cinfo[r][c].ci_vp,
2320 &clabel);
2321 #endif
2322 raidmarkdirty(
2323 raidPtr->Disks[r][c].dev,
2324 raidPtr->raid_cinfo[r][c].ci_vp,
2325 raidPtr->mod_counter);
2326 }
2327 }
2328 }
2329 }
2330 /* printf("Component labels marked dirty.\n"); */
2331 #if 0
2332 for( c = 0; c < raidPtr->numSpare ; c++) {
2333 sparecol = raidPtr->numCol + c;
2334 if (raidPtr->Disks[r][sparecol].status == rf_ds_used_spare) {
2335 /*
2336
2337 XXX this is where we get fancy and map this spare
2338 into it's correct spot in the array.
2339
2340 */
2341 /*
2342
2343 we claim this disk is "optimal" if it's
2344 rf_ds_used_spare, as that means it should be
2345 directly substitutable for the disk it replaced.
2346 We note that too...
2347
2348 */
2349
2350 for(i=0;i<raidPtr->numRow;i++) {
2351 for(j=0;j<raidPtr->numCol;j++) {
2352 if ((raidPtr->Disks[i][j].spareRow ==
2353 r) &&
2354 (raidPtr->Disks[i][j].spareCol ==
2355 sparecol)) {
2356 srow = r;
2357 scol = sparecol;
2358 break;
2359 }
2360 }
2361 }
2362
2363 raidread_component_label(
2364 raidPtr->Disks[r][sparecol].dev,
2365 raidPtr->raid_cinfo[r][sparecol].ci_vp,
2366 &clabel);
2367 /* make sure status is noted */
2368 clabel.version = RF_COMPONENT_LABEL_VERSION;
2369 clabel.mod_counter = raidPtr->mod_counter;
2370 clabel.serial_number = raidPtr->serial_number;
2371 clabel.row = srow;
2372 clabel.column = scol;
2373 clabel.num_rows = raidPtr->numRow;
2374 clabel.num_columns = raidPtr->numCol;
2375 clabel.clean = RF_RAID_DIRTY; /* changed in a bit*/
2376 clabel.status = rf_ds_optimal;
2377 raidwrite_component_label(
2378 raidPtr->Disks[r][sparecol].dev,
2379 raidPtr->raid_cinfo[r][sparecol].ci_vp,
2380 &clabel);
2381 raidmarkclean( raidPtr->Disks[r][sparecol].dev,
2382 raidPtr->raid_cinfo[r][sparecol].ci_vp);
2383 }
2384 }
2385
2386 #endif
2387 }
2388
2389
2390 void
2391 rf_update_component_labels(raidPtr, final)
2392 RF_Raid_t *raidPtr;
2393 int final;
2394 {
2395 RF_ComponentLabel_t clabel;
2396 int sparecol;
2397 int r,c;
2398 int i,j;
2399 int srow, scol;
2400
2401 srow = -1;
2402 scol = -1;
2403
2404 /* XXX should do extra checks to make sure things really are clean,
2405 rather than blindly setting the clean bit... */
2406
2407 raidPtr->mod_counter++;
2408
2409 for (r = 0; r < raidPtr->numRow; r++) {
2410 for (c = 0; c < raidPtr->numCol; c++) {
2411 if (raidPtr->Disks[r][c].status == rf_ds_optimal) {
2412 raidread_component_label(
2413 raidPtr->Disks[r][c].dev,
2414 raidPtr->raid_cinfo[r][c].ci_vp,
2415 &clabel);
2416 /* make sure status is noted */
2417 clabel.status = rf_ds_optimal;
2418 /* bump the counter */
2419 clabel.mod_counter = raidPtr->mod_counter;
2420
2421 raidwrite_component_label(
2422 raidPtr->Disks[r][c].dev,
2423 raidPtr->raid_cinfo[r][c].ci_vp,
2424 &clabel);
2425 if (final == RF_FINAL_COMPONENT_UPDATE) {
2426 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2427 raidmarkclean(
2428 raidPtr->Disks[r][c].dev,
2429 raidPtr->raid_cinfo[r][c].ci_vp,
2430 raidPtr->mod_counter);
2431 }
2432 }
2433 }
2434 /* else we don't touch it.. */
2435 }
2436 }
2437
2438 for( c = 0; c < raidPtr->numSpare ; c++) {
2439 sparecol = raidPtr->numCol + c;
2440 /* Need to ensure that the reconstruct actually completed! */
2441 if (raidPtr->Disks[0][sparecol].status == rf_ds_used_spare) {
2442 /*
2443
2444 we claim this disk is "optimal" if it's
2445 rf_ds_used_spare, as that means it should be
2446 directly substitutable for the disk it replaced.
2447 We note that too...
2448
2449 */
2450
2451 for(i=0;i<raidPtr->numRow;i++) {
2452 for(j=0;j<raidPtr->numCol;j++) {
2453 if ((raidPtr->Disks[i][j].spareRow ==
2454 0) &&
2455 (raidPtr->Disks[i][j].spareCol ==
2456 sparecol)) {
2457 srow = i;
2458 scol = j;
2459 break;
2460 }
2461 }
2462 }
2463
2464 /* XXX shouldn't *really* need this... */
2465 raidread_component_label(
2466 raidPtr->Disks[0][sparecol].dev,
2467 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2468 &clabel);
2469 /* make sure status is noted */
2470
2471 raid_init_component_label(raidPtr, &clabel);
2472
2473 clabel.mod_counter = raidPtr->mod_counter;
2474 clabel.row = srow;
2475 clabel.column = scol;
2476 clabel.status = rf_ds_optimal;
2477
2478 raidwrite_component_label(
2479 raidPtr->Disks[0][sparecol].dev,
2480 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2481 &clabel);
2482 if (final == RF_FINAL_COMPONENT_UPDATE) {
2483 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2484 raidmarkclean( raidPtr->Disks[0][sparecol].dev,
2485 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2486 raidPtr->mod_counter);
2487 }
2488 }
2489 }
2490 }
2491 /* printf("Component labels updated\n"); */
2492 }
2493
2494 void
2495 rf_close_component(raidPtr, vp, auto_configured)
2496 RF_Raid_t *raidPtr;
2497 struct vnode *vp;
2498 int auto_configured;
2499 {
2500 struct proc *p;
2501
2502 p = raidPtr->engine_thread;
2503
2504 if (vp != NULL) {
2505 if (auto_configured == 1) {
2506 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2507 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
2508 vput(vp);
2509
2510 } else {
2511 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2512 }
2513 } else {
2514 printf("vnode was NULL\n");
2515 }
2516 }
2517
2518
2519 void
2520 rf_UnconfigureVnodes(raidPtr)
2521 RF_Raid_t *raidPtr;
2522 {
2523 int r,c;
2524 struct proc *p;
2525 struct vnode *vp;
2526 int acd;
2527
2528
2529 /* We take this opportunity to close the vnodes like we should.. */
2530
2531 p = raidPtr->engine_thread;
2532
2533 for (r = 0; r < raidPtr->numRow; r++) {
2534 for (c = 0; c < raidPtr->numCol; c++) {
2535 printf("Closing vnode for row: %d col: %d\n", r, c);
2536 vp = raidPtr->raid_cinfo[r][c].ci_vp;
2537 acd = raidPtr->Disks[r][c].auto_configured;
2538 rf_close_component(raidPtr, vp, acd);
2539 raidPtr->raid_cinfo[r][c].ci_vp = NULL;
2540 raidPtr->Disks[r][c].auto_configured = 0;
2541 }
2542 }
2543 for (r = 0; r < raidPtr->numSpare; r++) {
2544 printf("Closing vnode for spare: %d\n", r);
2545 vp = raidPtr->raid_cinfo[0][raidPtr->numCol + r].ci_vp;
2546 acd = raidPtr->Disks[0][raidPtr->numCol + r].auto_configured;
2547 rf_close_component(raidPtr, vp, acd);
2548 raidPtr->raid_cinfo[0][raidPtr->numCol + r].ci_vp = NULL;
2549 raidPtr->Disks[0][raidPtr->numCol + r].auto_configured = 0;
2550 }
2551 }
2552
2553
2554 void
2555 rf_ReconThread(req)
2556 struct rf_recon_req *req;
2557 {
2558 int s;
2559 RF_Raid_t *raidPtr;
2560
2561 s = splbio();
2562 raidPtr = (RF_Raid_t *) req->raidPtr;
2563 raidPtr->recon_in_progress = 1;
2564
2565 rf_FailDisk((RF_Raid_t *) req->raidPtr, req->row, req->col,
2566 ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
2567
2568 /* XXX get rid of this! we don't need it at all.. */
2569 RF_Free(req, sizeof(*req));
2570
2571 raidPtr->recon_in_progress = 0;
2572 splx(s);
2573
2574 /* That's all... */
2575 kthread_exit(0); /* does not return */
2576 }
2577
2578 void
2579 rf_RewriteParityThread(raidPtr)
2580 RF_Raid_t *raidPtr;
2581 {
2582 int retcode;
2583 int s;
2584
2585 raidPtr->parity_rewrite_in_progress = 1;
2586 s = splbio();
2587 retcode = rf_RewriteParity(raidPtr);
2588 splx(s);
2589 if (retcode) {
2590 printf("raid%d: Error re-writing parity!\n",raidPtr->raidid);
2591 } else {
2592 /* set the clean bit! If we shutdown correctly,
2593 the clean bit on each component label will get
2594 set */
2595 raidPtr->parity_good = RF_RAID_CLEAN;
2596 }
2597 raidPtr->parity_rewrite_in_progress = 0;
2598
2599 /* Anyone waiting for us to stop? If so, inform them... */
2600 if (raidPtr->waitShutdown) {
2601 wakeup(&raidPtr->parity_rewrite_in_progress);
2602 }
2603
2604 /* That's all... */
2605 kthread_exit(0); /* does not return */
2606 }
2607
2608
2609 void
2610 rf_CopybackThread(raidPtr)
2611 RF_Raid_t *raidPtr;
2612 {
2613 int s;
2614
2615 raidPtr->copyback_in_progress = 1;
2616 s = splbio();
2617 rf_CopybackReconstructedData(raidPtr);
2618 splx(s);
2619 raidPtr->copyback_in_progress = 0;
2620
2621 /* That's all... */
2622 kthread_exit(0); /* does not return */
2623 }
2624
2625
2626 void
2627 rf_ReconstructInPlaceThread(req)
2628 struct rf_recon_req *req;
2629 {
2630 int retcode;
2631 int s;
2632 RF_Raid_t *raidPtr;
2633
2634 s = splbio();
2635 raidPtr = req->raidPtr;
2636 raidPtr->recon_in_progress = 1;
2637 retcode = rf_ReconstructInPlace(raidPtr, req->row, req->col);
2638 RF_Free(req, sizeof(*req));
2639 raidPtr->recon_in_progress = 0;
2640 splx(s);
2641
2642 /* That's all... */
2643 kthread_exit(0); /* does not return */
2644 }
2645
2646 void
2647 rf_mountroot_hook(dev)
2648 struct device *dev;
2649 {
2650
2651 }
2652
2653
2654 RF_AutoConfig_t *
2655 rf_find_raid_components()
2656 {
2657 struct devnametobdevmaj *dtobdm;
2658 struct vnode *vp;
2659 struct disklabel label;
2660 struct device *dv;
2661 char *cd_name;
2662 dev_t dev;
2663 int error;
2664 int i;
2665 int good_one;
2666 RF_ComponentLabel_t *clabel;
2667 RF_AutoConfig_t *ac_list;
2668 RF_AutoConfig_t *ac;
2669
2670
2671 /* initialize the AutoConfig list */
2672 ac_list = NULL;
2673
2674 /* we begin by trolling through *all* the devices on the system */
2675
2676 for (dv = alldevs.tqh_first; dv != NULL;
2677 dv = dv->dv_list.tqe_next) {
2678
2679 /* we are only interested in disks... */
2680 if (dv->dv_class != DV_DISK)
2681 continue;
2682
2683 /* we don't care about floppies... */
2684 if (!strcmp(dv->dv_cfdata->cf_driver->cd_name,"fd")) {
2685 continue;
2686 }
2687
2688 /* need to find the device_name_to_block_device_major stuff */
2689 cd_name = dv->dv_cfdata->cf_driver->cd_name;
2690 dtobdm = dev_name2blk;
2691 while (dtobdm->d_name && strcmp(dtobdm->d_name, cd_name)) {
2692 dtobdm++;
2693 }
2694
2695 /* get a vnode for the raw partition of this disk */
2696
2697 dev = MAKEDISKDEV(dtobdm->d_maj, dv->dv_unit, RAW_PART);
2698 if (bdevvp(dev, &vp))
2699 panic("RAID can't alloc vnode");
2700
2701 error = VOP_OPEN(vp, FREAD, NOCRED, 0);
2702
2703 if (error) {
2704 /* "Who cares." Continue looking
2705 for something that exists*/
2706 vput(vp);
2707 continue;
2708 }
2709
2710 /* Ok, the disk exists. Go get the disklabel. */
2711 error = VOP_IOCTL(vp, DIOCGDINFO, (caddr_t)&label,
2712 FREAD, NOCRED, 0);
2713 if (error) {
2714 /*
2715 * XXX can't happen - open() would
2716 * have errored out (or faked up one)
2717 */
2718 printf("can't get label for dev %s%c (%d)!?!?\n",
2719 dv->dv_xname, 'a' + RAW_PART, error);
2720 }
2721
2722 /* don't need this any more. We'll allocate it again
2723 a little later if we really do... */
2724 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2725 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
2726 vput(vp);
2727
2728 for (i=0; i < label.d_npartitions; i++) {
2729 /* We only support partitions marked as RAID */
2730 if (label.d_partitions[i].p_fstype != FS_RAID)
2731 continue;
2732
2733 dev = MAKEDISKDEV(dtobdm->d_maj, dv->dv_unit, i);
2734 if (bdevvp(dev, &vp))
2735 panic("RAID can't alloc vnode");
2736
2737 error = VOP_OPEN(vp, FREAD, NOCRED, 0);
2738 if (error) {
2739 /* Whatever... */
2740 vput(vp);
2741 continue;
2742 }
2743
2744 good_one = 0;
2745
2746 clabel = (RF_ComponentLabel_t *)
2747 malloc(sizeof(RF_ComponentLabel_t),
2748 M_RAIDFRAME, M_NOWAIT);
2749 if (clabel == NULL) {
2750 /* XXX CLEANUP HERE */
2751 printf("RAID auto config: out of memory!\n");
2752 return(NULL); /* XXX probably should panic? */
2753 }
2754
2755 if (!raidread_component_label(dev, vp, clabel)) {
2756 /* Got the label. Does it look reasonable? */
2757 if (rf_reasonable_label(clabel) &&
2758 (clabel->partitionSize <=
2759 label.d_partitions[i].p_size)) {
2760 #if DEBUG
2761 printf("Component on: %s%c: %d\n",
2762 dv->dv_xname, 'a'+i,
2763 label.d_partitions[i].p_size);
2764 rf_print_component_label(clabel);
2765 #endif
2766 /* if it's reasonable, add it,
2767 else ignore it. */
2768 ac = (RF_AutoConfig_t *)
2769 malloc(sizeof(RF_AutoConfig_t),
2770 M_RAIDFRAME,
2771 M_NOWAIT);
2772 if (ac == NULL) {
2773 /* XXX should panic?? */
2774 return(NULL);
2775 }
2776
2777 sprintf(ac->devname, "%s%c",
2778 dv->dv_xname, 'a'+i);
2779 ac->dev = dev;
2780 ac->vp = vp;
2781 ac->clabel = clabel;
2782 ac->next = ac_list;
2783 ac_list = ac;
2784 good_one = 1;
2785 }
2786 }
2787 if (!good_one) {
2788 /* cleanup */
2789 free(clabel, M_RAIDFRAME);
2790 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2791 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
2792 vput(vp);
2793 }
2794 }
2795 }
2796 return(ac_list);
2797 }
2798
2799 static int
2800 rf_reasonable_label(clabel)
2801 RF_ComponentLabel_t *clabel;
2802 {
2803
2804 if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) ||
2805 (clabel->version==RF_COMPONENT_LABEL_VERSION)) &&
2806 ((clabel->clean == RF_RAID_CLEAN) ||
2807 (clabel->clean == RF_RAID_DIRTY)) &&
2808 clabel->row >=0 &&
2809 clabel->column >= 0 &&
2810 clabel->num_rows > 0 &&
2811 clabel->num_columns > 0 &&
2812 clabel->row < clabel->num_rows &&
2813 clabel->column < clabel->num_columns &&
2814 clabel->blockSize > 0 &&
2815 clabel->numBlocks > 0) {
2816 /* label looks reasonable enough... */
2817 return(1);
2818 }
2819 return(0);
2820 }
2821
2822
2823 void
2824 rf_print_component_label(clabel)
2825 RF_ComponentLabel_t *clabel;
2826 {
2827 printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
2828 clabel->row, clabel->column,
2829 clabel->num_rows, clabel->num_columns);
2830 printf(" Version: %d Serial Number: %d Mod Counter: %d\n",
2831 clabel->version, clabel->serial_number,
2832 clabel->mod_counter);
2833 printf(" Clean: %s Status: %d\n",
2834 clabel->clean ? "Yes" : "No", clabel->status );
2835 printf(" sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
2836 clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
2837 printf(" RAID Level: %c blocksize: %d numBlocks: %d\n",
2838 (char) clabel->parityConfig, clabel->blockSize,
2839 clabel->numBlocks);
2840 printf(" Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No" );
2841 printf(" Contains root partition: %s\n",
2842 clabel->root_partition ? "Yes" : "No" );
2843 printf(" Last configured as: raid%d\n", clabel->last_unit );
2844 #if 0
2845 printf(" Config order: %d\n", clabel->config_order);
2846 #endif
2847
2848 }
2849
2850 RF_ConfigSet_t *
2851 rf_create_auto_sets(ac_list)
2852 RF_AutoConfig_t *ac_list;
2853 {
2854 RF_AutoConfig_t *ac;
2855 RF_ConfigSet_t *config_sets;
2856 RF_ConfigSet_t *cset;
2857 RF_AutoConfig_t *ac_next;
2858
2859
2860 config_sets = NULL;
2861
2862 /* Go through the AutoConfig list, and figure out which components
2863 belong to what sets. */
2864 ac = ac_list;
2865 while(ac!=NULL) {
2866 /* we're going to putz with ac->next, so save it here
2867 for use at the end of the loop */
2868 ac_next = ac->next;
2869
2870 if (config_sets == NULL) {
2871 /* will need at least this one... */
2872 config_sets = (RF_ConfigSet_t *)
2873 malloc(sizeof(RF_ConfigSet_t),
2874 M_RAIDFRAME, M_NOWAIT);
2875 if (config_sets == NULL) {
2876 panic("rf_create_auto_sets: No memory!\n");
2877 }
2878 /* this one is easy :) */
2879 config_sets->ac = ac;
2880 config_sets->next = NULL;
2881 config_sets->rootable = 0;
2882 ac->next = NULL;
2883 } else {
2884 /* which set does this component fit into? */
2885 cset = config_sets;
2886 while(cset!=NULL) {
2887 if (rf_does_it_fit(cset, ac)) {
2888 /* looks like it matches... */
2889 ac->next = cset->ac;
2890 cset->ac = ac;
2891 break;
2892 }
2893 cset = cset->next;
2894 }
2895 if (cset==NULL) {
2896 /* didn't find a match above... new set..*/
2897 cset = (RF_ConfigSet_t *)
2898 malloc(sizeof(RF_ConfigSet_t),
2899 M_RAIDFRAME, M_NOWAIT);
2900 if (cset == NULL) {
2901 panic("rf_create_auto_sets: No memory!\n");
2902 }
2903 cset->ac = ac;
2904 ac->next = NULL;
2905 cset->next = config_sets;
2906 cset->rootable = 0;
2907 config_sets = cset;
2908 }
2909 }
2910 ac = ac_next;
2911 }
2912
2913
2914 return(config_sets);
2915 }
2916
2917 static int
2918 rf_does_it_fit(cset, ac)
2919 RF_ConfigSet_t *cset;
2920 RF_AutoConfig_t *ac;
2921 {
2922 RF_ComponentLabel_t *clabel1, *clabel2;
2923
2924 /* If this one matches the *first* one in the set, that's good
2925 enough, since the other members of the set would have been
2926 through here too... */
2927 /* note that we are not checking partitionSize here..
2928
2929 Note that we are also not checking the mod_counters here.
2930 If everything else matches execpt the mod_counter, that's
2931 good enough for this test. We will deal with the mod_counters
2932 a little later in the autoconfiguration process.
2933
2934 (clabel1->mod_counter == clabel2->mod_counter) &&
2935
2936 The reason we don't check for this is that failed disks
2937 will have lower modification counts. If those disks are
2938 not added to the set they used to belong to, then they will
2939 form their own set, which may result in 2 different sets,
2940 for example, competing to be configured at raid0, and
2941 perhaps competing to be the root filesystem set. If the
2942 wrong ones get configured, or both attempt to become /,
2943 weird behaviour and or serious lossage will occur. Thus we
2944 need to bring them into the fold here, and kick them out at
2945 a later point.
2946
2947 */
2948
2949 clabel1 = cset->ac->clabel;
2950 clabel2 = ac->clabel;
2951 if ((clabel1->version == clabel2->version) &&
2952 (clabel1->serial_number == clabel2->serial_number) &&
2953 (clabel1->num_rows == clabel2->num_rows) &&
2954 (clabel1->num_columns == clabel2->num_columns) &&
2955 (clabel1->sectPerSU == clabel2->sectPerSU) &&
2956 (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
2957 (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
2958 (clabel1->parityConfig == clabel2->parityConfig) &&
2959 (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
2960 (clabel1->blockSize == clabel2->blockSize) &&
2961 (clabel1->numBlocks == clabel2->numBlocks) &&
2962 (clabel1->autoconfigure == clabel2->autoconfigure) &&
2963 (clabel1->root_partition == clabel2->root_partition) &&
2964 (clabel1->last_unit == clabel2->last_unit) &&
2965 (clabel1->config_order == clabel2->config_order)) {
2966 /* if it get's here, it almost *has* to be a match */
2967 } else {
2968 /* it's not consistent with somebody in the set..
2969 punt */
2970 return(0);
2971 }
2972 /* all was fine.. it must fit... */
2973 return(1);
2974 }
2975
2976 int
2977 rf_have_enough_components(cset)
2978 RF_ConfigSet_t *cset;
2979 {
2980 RF_AutoConfig_t *ac;
2981 RF_AutoConfig_t *auto_config;
2982 RF_ComponentLabel_t *clabel;
2983 int r,c;
2984 int num_rows;
2985 int num_cols;
2986 int num_missing;
2987 int mod_counter;
2988 int mod_counter_found;
2989 int even_pair_failed;
2990 char parity_type;
2991
2992
2993 /* check to see that we have enough 'live' components
2994 of this set. If so, we can configure it if necessary */
2995
2996 num_rows = cset->ac->clabel->num_rows;
2997 num_cols = cset->ac->clabel->num_columns;
2998 parity_type = cset->ac->clabel->parityConfig;
2999
3000 /* XXX Check for duplicate components!?!?!? */
3001
3002 /* Determine what the mod_counter is supposed to be for this set. */
3003
3004 mod_counter_found = 0;
3005 mod_counter = 0;
3006 ac = cset->ac;
3007 while(ac!=NULL) {
3008 if (mod_counter_found==0) {
3009 mod_counter = ac->clabel->mod_counter;
3010 mod_counter_found = 1;
3011 } else {
3012 if (ac->clabel->mod_counter > mod_counter) {
3013 mod_counter = ac->clabel->mod_counter;
3014 }
3015 }
3016 ac = ac->next;
3017 }
3018
3019 num_missing = 0;
3020 auto_config = cset->ac;
3021
3022 for(r=0; r<num_rows; r++) {
3023 even_pair_failed = 0;
3024 for(c=0; c<num_cols; c++) {
3025 ac = auto_config;
3026 while(ac!=NULL) {
3027 if ((ac->clabel->row == r) &&
3028 (ac->clabel->column == c) &&
3029 (ac->clabel->mod_counter == mod_counter)) {
3030 /* it's this one... */
3031 #if DEBUG
3032 printf("Found: %s at %d,%d\n",
3033 ac->devname,r,c);
3034 #endif
3035 break;
3036 }
3037 ac=ac->next;
3038 }
3039 if (ac==NULL) {
3040 /* Didn't find one here! */
3041 /* special case for RAID 1, especially
3042 where there are more than 2
3043 components (where RAIDframe treats
3044 things a little differently :( ) */
3045 if (parity_type == '1') {
3046 if (c%2 == 0) { /* even component */
3047 even_pair_failed = 1;
3048 } else { /* odd component. If
3049 we're failed, and
3050 so is the even
3051 component, it's
3052 "Good Night, Charlie" */
3053 if (even_pair_failed == 1) {
3054 return(0);
3055 }
3056 }
3057 } else {
3058 /* normal accounting */
3059 num_missing++;
3060 }
3061 }
3062 if ((parity_type == '1') && (c%2 == 1)) {
3063 /* Just did an even component, and we didn't
3064 bail.. reset the even_pair_failed flag,
3065 and go on to the next component.... */
3066 even_pair_failed = 0;
3067 }
3068 }
3069 }
3070
3071 clabel = cset->ac->clabel;
3072
3073 if (((clabel->parityConfig == '0') && (num_missing > 0)) ||
3074 ((clabel->parityConfig == '4') && (num_missing > 1)) ||
3075 ((clabel->parityConfig == '5') && (num_missing > 1))) {
3076 /* XXX this needs to be made *much* more general */
3077 /* Too many failures */
3078 return(0);
3079 }
3080 /* otherwise, all is well, and we've got enough to take a kick
3081 at autoconfiguring this set */
3082 return(1);
3083 }
3084
3085 void
3086 rf_create_configuration(ac,config,raidPtr)
3087 RF_AutoConfig_t *ac;
3088 RF_Config_t *config;
3089 RF_Raid_t *raidPtr;
3090 {
3091 RF_ComponentLabel_t *clabel;
3092 int i;
3093
3094 clabel = ac->clabel;
3095
3096 /* 1. Fill in the common stuff */
3097 config->numRow = clabel->num_rows;
3098 config->numCol = clabel->num_columns;
3099 config->numSpare = 0; /* XXX should this be set here? */
3100 config->sectPerSU = clabel->sectPerSU;
3101 config->SUsPerPU = clabel->SUsPerPU;
3102 config->SUsPerRU = clabel->SUsPerRU;
3103 config->parityConfig = clabel->parityConfig;
3104 /* XXX... */
3105 strcpy(config->diskQueueType,"fifo");
3106 config->maxOutstandingDiskReqs = clabel->maxOutstanding;
3107 config->layoutSpecificSize = 0; /* XXX ?? */
3108
3109 while(ac!=NULL) {
3110 /* row/col values will be in range due to the checks
3111 in reasonable_label() */
3112 strcpy(config->devnames[ac->clabel->row][ac->clabel->column],
3113 ac->devname);
3114 ac = ac->next;
3115 }
3116
3117 for(i=0;i<RF_MAXDBGV;i++) {
3118 config->debugVars[i][0] = NULL;
3119 }
3120 }
3121
3122 int
3123 rf_set_autoconfig(raidPtr, new_value)
3124 RF_Raid_t *raidPtr;
3125 int new_value;
3126 {
3127 RF_ComponentLabel_t clabel;
3128 struct vnode *vp;
3129 dev_t dev;
3130 int row, column;
3131
3132 raidPtr->autoconfigure = new_value;
3133 for(row=0; row<raidPtr->numRow; row++) {
3134 for(column=0; column<raidPtr->numCol; column++) {
3135 if (raidPtr->Disks[row][column].status ==
3136 rf_ds_optimal) {
3137 dev = raidPtr->Disks[row][column].dev;
3138 vp = raidPtr->raid_cinfo[row][column].ci_vp;
3139 raidread_component_label(dev, vp, &clabel);
3140 clabel.autoconfigure = new_value;
3141 raidwrite_component_label(dev, vp, &clabel);
3142 }
3143 }
3144 }
3145 return(new_value);
3146 }
3147
3148 int
3149 rf_set_rootpartition(raidPtr, new_value)
3150 RF_Raid_t *raidPtr;
3151 int new_value;
3152 {
3153 RF_ComponentLabel_t clabel;
3154 struct vnode *vp;
3155 dev_t dev;
3156 int row, column;
3157
3158 raidPtr->root_partition = new_value;
3159 for(row=0; row<raidPtr->numRow; row++) {
3160 for(column=0; column<raidPtr->numCol; column++) {
3161 if (raidPtr->Disks[row][column].status ==
3162 rf_ds_optimal) {
3163 dev = raidPtr->Disks[row][column].dev;
3164 vp = raidPtr->raid_cinfo[row][column].ci_vp;
3165 raidread_component_label(dev, vp, &clabel);
3166 clabel.root_partition = new_value;
3167 raidwrite_component_label(dev, vp, &clabel);
3168 }
3169 }
3170 }
3171 return(new_value);
3172 }
3173
3174 void
3175 rf_release_all_vps(cset)
3176 RF_ConfigSet_t *cset;
3177 {
3178 RF_AutoConfig_t *ac;
3179
3180 ac = cset->ac;
3181 while(ac!=NULL) {
3182 /* Close the vp, and give it back */
3183 if (ac->vp) {
3184 vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY);
3185 VOP_CLOSE(ac->vp, FREAD, NOCRED, 0);
3186 vput(ac->vp);
3187 ac->vp = NULL;
3188 }
3189 ac = ac->next;
3190 }
3191 }
3192
3193
3194 void
3195 rf_cleanup_config_set(cset)
3196 RF_ConfigSet_t *cset;
3197 {
3198 RF_AutoConfig_t *ac;
3199 RF_AutoConfig_t *next_ac;
3200
3201 ac = cset->ac;
3202 while(ac!=NULL) {
3203 next_ac = ac->next;
3204 /* nuke the label */
3205 free(ac->clabel, M_RAIDFRAME);
3206 /* cleanup the config structure */
3207 free(ac, M_RAIDFRAME);
3208 /* "next.." */
3209 ac = next_ac;
3210 }
3211 /* and, finally, nuke the config set */
3212 free(cset, M_RAIDFRAME);
3213 }
3214
3215
3216 void
3217 raid_init_component_label(raidPtr, clabel)
3218 RF_Raid_t *raidPtr;
3219 RF_ComponentLabel_t *clabel;
3220 {
3221 /* current version number */
3222 clabel->version = RF_COMPONENT_LABEL_VERSION;
3223 clabel->serial_number = raidPtr->serial_number;
3224 clabel->mod_counter = raidPtr->mod_counter;
3225 clabel->num_rows = raidPtr->numRow;
3226 clabel->num_columns = raidPtr->numCol;
3227 clabel->clean = RF_RAID_DIRTY; /* not clean */
3228 clabel->status = rf_ds_optimal; /* "It's good!" */
3229
3230 clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
3231 clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
3232 clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
3233
3234 clabel->blockSize = raidPtr->bytesPerSector;
3235 clabel->numBlocks = raidPtr->sectorsPerDisk;
3236
3237 /* XXX not portable */
3238 clabel->parityConfig = raidPtr->Layout.map->parityConfig;
3239 clabel->maxOutstanding = raidPtr->maxOutstanding;
3240 clabel->autoconfigure = raidPtr->autoconfigure;
3241 clabel->root_partition = raidPtr->root_partition;
3242 clabel->last_unit = raidPtr->raidid;
3243 clabel->config_order = raidPtr->config_order;
3244 }
3245
3246 int
3247 rf_auto_config_set(cset,unit)
3248 RF_ConfigSet_t *cset;
3249 int *unit;
3250 {
3251 RF_Raid_t *raidPtr;
3252 RF_Config_t *config;
3253 int raidID;
3254 int retcode;
3255
3256 printf("RAID autoconfigure\n");
3257
3258 retcode = 0;
3259 *unit = -1;
3260
3261 /* 1. Create a config structure */
3262
3263 config = (RF_Config_t *)malloc(sizeof(RF_Config_t),
3264 M_RAIDFRAME,
3265 M_NOWAIT);
3266 if (config==NULL) {
3267 printf("Out of mem!?!?\n");
3268 /* XXX do something more intelligent here. */
3269 return(1);
3270 }
3271
3272 memset(config, 0, sizeof(RF_Config_t));
3273
3274 /* XXX raidID needs to be set correctly.. */
3275
3276 /*
3277 2. Figure out what RAID ID this one is supposed to live at
3278 See if we can get the same RAID dev that it was configured
3279 on last time..
3280 */
3281
3282 raidID = cset->ac->clabel->last_unit;
3283 if ((raidID < 0) || (raidID >= numraid)) {
3284 /* let's not wander off into lala land. */
3285 raidID = numraid - 1;
3286 }
3287 if (raidPtrs[raidID]->valid != 0) {
3288
3289 /*
3290 Nope... Go looking for an alternative...
3291 Start high so we don't immediately use raid0 if that's
3292 not taken.
3293 */
3294
3295 for(raidID = numraid - 1; raidID >= 0; raidID--) {
3296 if (raidPtrs[raidID]->valid == 0) {
3297 /* can use this one! */
3298 break;
3299 }
3300 }
3301 }
3302
3303 if (raidID < 0) {
3304 /* punt... */
3305 printf("Unable to auto configure this set!\n");
3306 printf("(Out of RAID devs!)\n");
3307 return(1);
3308 }
3309 printf("Configuring raid%d:\n",raidID);
3310 raidPtr = raidPtrs[raidID];
3311
3312 /* XXX all this stuff should be done SOMEWHERE ELSE! */
3313 raidPtr->raidid = raidID;
3314 raidPtr->openings = RAIDOUTSTANDING;
3315
3316 /* 3. Build the configuration structure */
3317 rf_create_configuration(cset->ac, config, raidPtr);
3318
3319 /* 4. Do the configuration */
3320 retcode = rf_Configure(raidPtr, config, cset->ac);
3321
3322 if (retcode == 0) {
3323
3324 raidinit(raidPtrs[raidID]);
3325
3326 rf_markalldirty(raidPtrs[raidID]);
3327 raidPtrs[raidID]->autoconfigure = 1; /* XXX do this here? */
3328 if (cset->ac->clabel->root_partition==1) {
3329 /* everything configured just fine. Make a note
3330 that this set is eligible to be root. */
3331 cset->rootable = 1;
3332 /* XXX do this here? */
3333 raidPtrs[raidID]->root_partition = 1;
3334 }
3335 }
3336
3337 /* 5. Cleanup */
3338 free(config, M_RAIDFRAME);
3339
3340 *unit = raidID;
3341 return(retcode);
3342 }
3343
3344 void
3345 rf_disk_unbusy(desc)
3346 RF_RaidAccessDesc_t *desc;
3347 {
3348 struct buf *bp;
3349
3350 bp = (struct buf *)desc->bp;
3351 disk_unbusy(&raid_softc[desc->raidPtr->raidid].sc_dkdev,
3352 (bp->b_bcount - bp->b_resid));
3353 }
3354