rf_netbsdkintf.c revision 1.104.2.5 1 /* $NetBSD: rf_netbsdkintf.c,v 1.104.2.5 2001/11/18 13:22:09 scw Exp $ */
2 /*-
3 * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
4 * All rights reserved.
5 *
6 * This code is derived from software contributed to The NetBSD Foundation
7 * by Greg Oster; Jason R. Thorpe.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed by the NetBSD
20 * Foundation, Inc. and its contributors.
21 * 4. Neither the name of The NetBSD Foundation nor the names of its
22 * contributors may be used to endorse or promote products derived
23 * from this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
26 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
36 */
37
38 /*
39 * Copyright (c) 1988 University of Utah.
40 * Copyright (c) 1990, 1993
41 * The Regents of the University of California. All rights reserved.
42 *
43 * This code is derived from software contributed to Berkeley by
44 * the Systems Programming Group of the University of Utah Computer
45 * Science Department.
46 *
47 * Redistribution and use in source and binary forms, with or without
48 * modification, are permitted provided that the following conditions
49 * are met:
50 * 1. Redistributions of source code must retain the above copyright
51 * notice, this list of conditions and the following disclaimer.
52 * 2. Redistributions in binary form must reproduce the above copyright
53 * notice, this list of conditions and the following disclaimer in the
54 * documentation and/or other materials provided with the distribution.
55 * 3. All advertising materials mentioning features or use of this software
56 * must display the following acknowledgement:
57 * This product includes software developed by the University of
58 * California, Berkeley and its contributors.
59 * 4. Neither the name of the University nor the names of its contributors
60 * may be used to endorse or promote products derived from this software
61 * without specific prior written permission.
62 *
63 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
64 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
65 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
66 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
67 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
68 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
69 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
70 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
71 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
72 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
73 * SUCH DAMAGE.
74 *
75 * from: Utah $Hdr: cd.c 1.6 90/11/28$
76 *
77 * @(#)cd.c 8.2 (Berkeley) 11/16/93
78 */
79
80
81
82
83 /*
84 * Copyright (c) 1995 Carnegie-Mellon University.
85 * All rights reserved.
86 *
87 * Authors: Mark Holland, Jim Zelenka
88 *
89 * Permission to use, copy, modify and distribute this software and
90 * its documentation is hereby granted, provided that both the copyright
91 * notice and this permission notice appear in all copies of the
92 * software, derivative works or modified versions, and any portions
93 * thereof, and that both notices appear in supporting documentation.
94 *
95 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
96 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
97 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
98 *
99 * Carnegie Mellon requests users of this software to return to
100 *
101 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
102 * School of Computer Science
103 * Carnegie Mellon University
104 * Pittsburgh PA 15213-3890
105 *
106 * any improvements or extensions that they make and grant Carnegie the
107 * rights to redistribute these changes.
108 */
109
110 /***********************************************************
111 *
112 * rf_kintf.c -- the kernel interface routines for RAIDframe
113 *
114 ***********************************************************/
115
116 #include <sys/cdefs.h>
117 __KERNEL_RCSID(0, "$NetBSD: rf_netbsdkintf.c,v 1.104.2.5 2001/11/18 13:22:09 scw Exp $");
118
119 #include <sys/errno.h>
120 #include <sys/param.h>
121 #include <sys/pool.h>
122 #include <sys/lwp.h>
123 #include <sys/proc.h>
124 #include <sys/queue.h>
125 #include <sys/disk.h>
126 #include <sys/device.h>
127 #include <sys/stat.h>
128 #include <sys/ioctl.h>
129 #include <sys/fcntl.h>
130 #include <sys/systm.h>
131 #include <sys/namei.h>
132 #include <sys/vnode.h>
133 #include <sys/param.h>
134 #include <sys/types.h>
135 #include <machine/types.h>
136 #include <sys/disklabel.h>
137 #include <sys/conf.h>
138 #include <sys/lock.h>
139 #include <sys/buf.h>
140 #include <sys/user.h>
141 #include <sys/reboot.h>
142
143 #include <dev/raidframe/raidframevar.h>
144 #include <dev/raidframe/raidframeio.h>
145 #include "raid.h"
146 #include "opt_raid_autoconfig.h"
147 #include "rf_raid.h"
148 #include "rf_copyback.h"
149 #include "rf_dag.h"
150 #include "rf_dagflags.h"
151 #include "rf_desc.h"
152 #include "rf_diskqueue.h"
153 #include "rf_acctrace.h"
154 #include "rf_etimer.h"
155 #include "rf_general.h"
156 #include "rf_debugMem.h"
157 #include "rf_kintf.h"
158 #include "rf_options.h"
159 #include "rf_driver.h"
160 #include "rf_parityscan.h"
161 #include "rf_debugprint.h"
162 #include "rf_threadstuff.h"
163
164 int rf_kdebug_level = 0;
165
166 #ifdef DEBUG
167 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
168 #else /* DEBUG */
169 #define db1_printf(a) { }
170 #endif /* DEBUG */
171
172 static RF_Raid_t **raidPtrs; /* global raid device descriptors */
173
174 RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex)
175
176 static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a
177 * spare table */
178 static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from
179 * installation process */
180
181 /* prototypes */
182 static void KernelWakeupFunc(struct buf * bp);
183 static void InitBP(struct buf * bp, struct vnode *, unsigned rw_flag,
184 dev_t dev, RF_SectorNum_t startSect,
185 RF_SectorCount_t numSect, caddr_t buf,
186 void (*cbFunc) (struct buf *), void *cbArg,
187 int logBytesPerSector, struct proc * b_proc);
188 static void raidinit(RF_Raid_t *);
189
190 void raidattach(int);
191 int raidsize(dev_t);
192 int raidopen(dev_t, int, int, struct proc *);
193 int raidclose(dev_t, int, int, struct proc *);
194 int raidioctl(dev_t, u_long, caddr_t, int, struct proc *);
195 int raidwrite(dev_t, struct uio *, int);
196 int raidread(dev_t, struct uio *, int);
197 void raidstrategy(struct buf *);
198 int raiddump(dev_t, daddr_t, caddr_t, size_t);
199
200 /*
201 * Pilfered from ccd.c
202 */
203
204 struct raidbuf {
205 struct buf rf_buf; /* new I/O buf. MUST BE FIRST!!! */
206 struct buf *rf_obp; /* ptr. to original I/O buf */
207 int rf_flags; /* misc. flags */
208 RF_DiskQueueData_t *req;/* the request that this was part of.. */
209 };
210
211
212 #define RAIDGETBUF(rs) pool_get(&(rs)->sc_cbufpool, PR_NOWAIT)
213 #define RAIDPUTBUF(rs, cbp) pool_put(&(rs)->sc_cbufpool, cbp)
214
215 /* XXX Not sure if the following should be replacing the raidPtrs above,
216 or if it should be used in conjunction with that...
217 */
218
219 struct raid_softc {
220 int sc_flags; /* flags */
221 int sc_cflags; /* configuration flags */
222 size_t sc_size; /* size of the raid device */
223 char sc_xname[20]; /* XXX external name */
224 struct disk sc_dkdev; /* generic disk device info */
225 struct pool sc_cbufpool; /* component buffer pool */
226 struct buf_queue buf_queue; /* used for the device queue */
227 };
228 /* sc_flags */
229 #define RAIDF_INITED 0x01 /* unit has been initialized */
230 #define RAIDF_WLABEL 0x02 /* label area is writable */
231 #define RAIDF_LABELLING 0x04 /* unit is currently being labelled */
232 #define RAIDF_WANTED 0x40 /* someone is waiting to obtain a lock */
233 #define RAIDF_LOCKED 0x80 /* unit is locked */
234
235 #define raidunit(x) DISKUNIT(x)
236 int numraid = 0;
237
238 /*
239 * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
240 * Be aware that large numbers can allow the driver to consume a lot of
241 * kernel memory, especially on writes, and in degraded mode reads.
242 *
243 * For example: with a stripe width of 64 blocks (32k) and 5 disks,
244 * a single 64K write will typically require 64K for the old data,
245 * 64K for the old parity, and 64K for the new parity, for a total
246 * of 192K (if the parity buffer is not re-used immediately).
247 * Even it if is used immediately, that's still 128K, which when multiplied
248 * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
249 *
250 * Now in degraded mode, for example, a 64K read on the above setup may
251 * require data reconstruction, which will require *all* of the 4 remaining
252 * disks to participate -- 4 * 32K/disk == 128K again.
253 */
254
255 #ifndef RAIDOUTSTANDING
256 #define RAIDOUTSTANDING 6
257 #endif
258
259 #define RAIDLABELDEV(dev) \
260 (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
261
262 /* declared here, and made public, for the benefit of KVM stuff.. */
263 struct raid_softc *raid_softc;
264
265 static void raidgetdefaultlabel(RF_Raid_t *, struct raid_softc *,
266 struct disklabel *);
267 static void raidgetdisklabel(dev_t);
268 static void raidmakedisklabel(struct raid_softc *);
269
270 static int raidlock(struct raid_softc *);
271 static void raidunlock(struct raid_softc *);
272
273 static void rf_markalldirty(RF_Raid_t *);
274 void rf_mountroot_hook(struct device *);
275
276 struct device *raidrootdev;
277
278 void rf_ReconThread(struct rf_recon_req *);
279 /* XXX what I want is: */
280 /*void rf_ReconThread(RF_Raid_t *raidPtr); */
281 void rf_RewriteParityThread(RF_Raid_t *raidPtr);
282 void rf_CopybackThread(RF_Raid_t *raidPtr);
283 void rf_ReconstructInPlaceThread(struct rf_recon_req *);
284 void rf_buildroothack(void *);
285
286 RF_AutoConfig_t *rf_find_raid_components(void);
287 RF_ConfigSet_t *rf_create_auto_sets(RF_AutoConfig_t *);
288 static int rf_does_it_fit(RF_ConfigSet_t *,RF_AutoConfig_t *);
289 static int rf_reasonable_label(RF_ComponentLabel_t *);
290 void rf_create_configuration(RF_AutoConfig_t *,RF_Config_t *, RF_Raid_t *);
291 int rf_set_autoconfig(RF_Raid_t *, int);
292 int rf_set_rootpartition(RF_Raid_t *, int);
293 void rf_release_all_vps(RF_ConfigSet_t *);
294 void rf_cleanup_config_set(RF_ConfigSet_t *);
295 int rf_have_enough_components(RF_ConfigSet_t *);
296 int rf_auto_config_set(RF_ConfigSet_t *, int *);
297
298 static int raidautoconfig = 0; /* Debugging, mostly. Set to 0 to not
299 allow autoconfig to take place.
300 Note that this is overridden by having
301 RAID_AUTOCONFIG as an option in the
302 kernel config file. */
303
304 void
305 raidattach(num)
306 int num;
307 {
308 int raidID;
309 int i, rc;
310 RF_AutoConfig_t *ac_list; /* autoconfig list */
311 RF_ConfigSet_t *config_sets;
312
313 #ifdef DEBUG
314 printf("raidattach: Asked for %d units\n", num);
315 #endif
316
317 if (num <= 0) {
318 #ifdef DIAGNOSTIC
319 panic("raidattach: count <= 0");
320 #endif
321 return;
322 }
323 /* This is where all the initialization stuff gets done. */
324
325 numraid = num;
326
327 /* Make some space for requested number of units... */
328
329 RF_Calloc(raidPtrs, num, sizeof(RF_Raid_t *), (RF_Raid_t **));
330 if (raidPtrs == NULL) {
331 panic("raidPtrs is NULL!!\n");
332 }
333
334 rc = rf_mutex_init(&rf_sparet_wait_mutex);
335 if (rc) {
336 RF_PANIC();
337 }
338
339 rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
340
341 for (i = 0; i < num; i++)
342 raidPtrs[i] = NULL;
343 rc = rf_BootRaidframe();
344 if (rc == 0)
345 printf("Kernelized RAIDframe activated\n");
346 else
347 panic("Serious error booting RAID!!\n");
348
349 /* put together some datastructures like the CCD device does.. This
350 * lets us lock the device and what-not when it gets opened. */
351
352 raid_softc = (struct raid_softc *)
353 malloc(num * sizeof(struct raid_softc),
354 M_RAIDFRAME, M_NOWAIT);
355 if (raid_softc == NULL) {
356 printf("WARNING: no memory for RAIDframe driver\n");
357 return;
358 }
359
360 memset(raid_softc, 0, num * sizeof(struct raid_softc));
361
362 raidrootdev = (struct device *)malloc(num * sizeof(struct device),
363 M_RAIDFRAME, M_NOWAIT);
364 if (raidrootdev == NULL) {
365 panic("No memory for RAIDframe driver!!?!?!\n");
366 }
367
368 for (raidID = 0; raidID < num; raidID++) {
369 BUFQ_INIT(&raid_softc[raidID].buf_queue);
370
371 raidrootdev[raidID].dv_class = DV_DISK;
372 raidrootdev[raidID].dv_cfdata = NULL;
373 raidrootdev[raidID].dv_unit = raidID;
374 raidrootdev[raidID].dv_parent = NULL;
375 raidrootdev[raidID].dv_flags = 0;
376 sprintf(raidrootdev[raidID].dv_xname,"raid%d",raidID);
377
378 RF_Calloc(raidPtrs[raidID], 1, sizeof(RF_Raid_t),
379 (RF_Raid_t *));
380 if (raidPtrs[raidID] == NULL) {
381 printf("WARNING: raidPtrs[%d] is NULL\n", raidID);
382 numraid = raidID;
383 return;
384 }
385 }
386
387 #if RAID_AUTOCONFIG
388 raidautoconfig = 1;
389 #endif
390
391 if (raidautoconfig) {
392 /* 1. locate all RAID components on the system */
393
394 #if DEBUG
395 printf("Searching for raid components...\n");
396 #endif
397 ac_list = rf_find_raid_components();
398
399 /* 2. sort them into their respective sets */
400
401 config_sets = rf_create_auto_sets(ac_list);
402
403 /* 3. evaluate each set and configure the valid ones
404 This gets done in rf_buildroothack() */
405
406 /* schedule the creation of the thread to do the
407 "/ on RAID" stuff */
408
409 kthread_create(rf_buildroothack,config_sets);
410
411 #if 0
412 mountroothook_establish(rf_mountroot_hook, &raidrootdev[0]);
413 #endif
414 }
415
416 }
417
418 void
419 rf_buildroothack(arg)
420 void *arg;
421 {
422 RF_ConfigSet_t *config_sets = arg;
423 RF_ConfigSet_t *cset;
424 RF_ConfigSet_t *next_cset;
425 int retcode;
426 int raidID;
427 int rootID;
428 int num_root;
429
430 rootID = 0;
431 num_root = 0;
432 cset = config_sets;
433 while(cset != NULL ) {
434 next_cset = cset->next;
435 if (rf_have_enough_components(cset) &&
436 cset->ac->clabel->autoconfigure==1) {
437 retcode = rf_auto_config_set(cset,&raidID);
438 if (!retcode) {
439 if (cset->rootable) {
440 rootID = raidID;
441 num_root++;
442 }
443 } else {
444 /* The autoconfig didn't work :( */
445 #if DEBUG
446 printf("Autoconfig failed with code %d for raid%d\n", retcode, raidID);
447 #endif
448 rf_release_all_vps(cset);
449 }
450 } else {
451 /* we're not autoconfiguring this set...
452 release the associated resources */
453 rf_release_all_vps(cset);
454 }
455 /* cleanup */
456 rf_cleanup_config_set(cset);
457 cset = next_cset;
458 }
459 if (boothowto & RB_ASKNAME) {
460 /* We don't auto-config... */
461 } else {
462 /* They didn't ask, and we found something bootable... */
463
464 if (num_root == 1) {
465 booted_device = &raidrootdev[rootID];
466 } else if (num_root > 1) {
467 /* we can't guess.. require the user to answer... */
468 boothowto |= RB_ASKNAME;
469 }
470 }
471 }
472
473
474 int
475 raidsize(dev)
476 dev_t dev;
477 {
478 struct raid_softc *rs;
479 struct disklabel *lp;
480 int part, unit, omask, size;
481
482 unit = raidunit(dev);
483 if (unit >= numraid)
484 return (-1);
485 rs = &raid_softc[unit];
486
487 if ((rs->sc_flags & RAIDF_INITED) == 0)
488 return (-1);
489
490 part = DISKPART(dev);
491 omask = rs->sc_dkdev.dk_openmask & (1 << part);
492 lp = rs->sc_dkdev.dk_label;
493
494 if (omask == 0 && raidopen(dev, 0, S_IFBLK, curproc->l_proc))
495 return (-1);
496
497 if (lp->d_partitions[part].p_fstype != FS_SWAP)
498 size = -1;
499 else
500 size = lp->d_partitions[part].p_size *
501 (lp->d_secsize / DEV_BSIZE);
502
503 if (omask == 0 && raidclose(dev, 0, S_IFBLK, curproc->l_proc))
504 return (-1);
505
506 return (size);
507
508 }
509
510 int
511 raiddump(dev, blkno, va, size)
512 dev_t dev;
513 daddr_t blkno;
514 caddr_t va;
515 size_t size;
516 {
517 /* Not implemented. */
518 return ENXIO;
519 }
520 /* ARGSUSED */
521 int
522 raidopen(dev, flags, fmt, p)
523 dev_t dev;
524 int flags, fmt;
525 struct proc *p;
526 {
527 int unit = raidunit(dev);
528 struct raid_softc *rs;
529 struct disklabel *lp;
530 int part, pmask;
531 int error = 0;
532
533 if (unit >= numraid)
534 return (ENXIO);
535 rs = &raid_softc[unit];
536
537 if ((error = raidlock(rs)) != 0)
538 return (error);
539 lp = rs->sc_dkdev.dk_label;
540
541 part = DISKPART(dev);
542 pmask = (1 << part);
543
544 db1_printf(("Opening raid device number: %d partition: %d\n",
545 unit, part));
546
547
548 if ((rs->sc_flags & RAIDF_INITED) &&
549 (rs->sc_dkdev.dk_openmask == 0))
550 raidgetdisklabel(dev);
551
552 /* make sure that this partition exists */
553
554 if (part != RAW_PART) {
555 db1_printf(("Not a raw partition..\n"));
556 if (((rs->sc_flags & RAIDF_INITED) == 0) ||
557 ((part >= lp->d_npartitions) ||
558 (lp->d_partitions[part].p_fstype == FS_UNUSED))) {
559 error = ENXIO;
560 raidunlock(rs);
561 db1_printf(("Bailing out...\n"));
562 return (error);
563 }
564 }
565 /* Prevent this unit from being unconfigured while open. */
566 switch (fmt) {
567 case S_IFCHR:
568 rs->sc_dkdev.dk_copenmask |= pmask;
569 break;
570
571 case S_IFBLK:
572 rs->sc_dkdev.dk_bopenmask |= pmask;
573 break;
574 }
575
576 if ((rs->sc_dkdev.dk_openmask == 0) &&
577 ((rs->sc_flags & RAIDF_INITED) != 0)) {
578 /* First one... mark things as dirty... Note that we *MUST*
579 have done a configure before this. I DO NOT WANT TO BE
580 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
581 THAT THEY BELONG TOGETHER!!!!! */
582 /* XXX should check to see if we're only open for reading
583 here... If so, we needn't do this, but then need some
584 other way of keeping track of what's happened.. */
585
586 rf_markalldirty( raidPtrs[unit] );
587 }
588
589
590 rs->sc_dkdev.dk_openmask =
591 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
592
593 raidunlock(rs);
594
595 return (error);
596
597
598 }
599 /* ARGSUSED */
600 int
601 raidclose(dev, flags, fmt, p)
602 dev_t dev;
603 int flags, fmt;
604 struct proc *p;
605 {
606 int unit = raidunit(dev);
607 struct raid_softc *rs;
608 int error = 0;
609 int part;
610
611 if (unit >= numraid)
612 return (ENXIO);
613 rs = &raid_softc[unit];
614
615 if ((error = raidlock(rs)) != 0)
616 return (error);
617
618 part = DISKPART(dev);
619
620 /* ...that much closer to allowing unconfiguration... */
621 switch (fmt) {
622 case S_IFCHR:
623 rs->sc_dkdev.dk_copenmask &= ~(1 << part);
624 break;
625
626 case S_IFBLK:
627 rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
628 break;
629 }
630 rs->sc_dkdev.dk_openmask =
631 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
632
633 if ((rs->sc_dkdev.dk_openmask == 0) &&
634 ((rs->sc_flags & RAIDF_INITED) != 0)) {
635 /* Last one... device is not unconfigured yet.
636 Device shutdown has taken care of setting the
637 clean bits if RAIDF_INITED is not set
638 mark things as clean... */
639 #if 0
640 printf("Last one on raid%d. Updating status.\n",unit);
641 #endif
642 rf_update_component_labels(raidPtrs[unit],
643 RF_FINAL_COMPONENT_UPDATE);
644 if (doing_shutdown) {
645 /* last one, and we're going down, so
646 lights out for this RAID set too. */
647 error = rf_Shutdown(raidPtrs[unit]);
648 pool_destroy(&rs->sc_cbufpool);
649
650 /* It's no longer initialized... */
651 rs->sc_flags &= ~RAIDF_INITED;
652
653 /* Detach the disk. */
654 disk_detach(&rs->sc_dkdev);
655 }
656 }
657
658 raidunlock(rs);
659 return (0);
660
661 }
662
663 void
664 raidstrategy(bp)
665 struct buf *bp;
666 {
667 int s;
668
669 unsigned int raidID = raidunit(bp->b_dev);
670 RF_Raid_t *raidPtr;
671 struct raid_softc *rs = &raid_softc[raidID];
672 struct disklabel *lp;
673 int wlabel;
674
675 if ((rs->sc_flags & RAIDF_INITED) ==0) {
676 bp->b_error = ENXIO;
677 bp->b_flags |= B_ERROR;
678 bp->b_resid = bp->b_bcount;
679 biodone(bp);
680 return;
681 }
682 if (raidID >= numraid || !raidPtrs[raidID]) {
683 bp->b_error = ENODEV;
684 bp->b_flags |= B_ERROR;
685 bp->b_resid = bp->b_bcount;
686 biodone(bp);
687 return;
688 }
689 raidPtr = raidPtrs[raidID];
690 if (!raidPtr->valid) {
691 bp->b_error = ENODEV;
692 bp->b_flags |= B_ERROR;
693 bp->b_resid = bp->b_bcount;
694 biodone(bp);
695 return;
696 }
697 if (bp->b_bcount == 0) {
698 db1_printf(("b_bcount is zero..\n"));
699 biodone(bp);
700 return;
701 }
702 lp = rs->sc_dkdev.dk_label;
703
704 /*
705 * Do bounds checking and adjust transfer. If there's an
706 * error, the bounds check will flag that for us.
707 */
708
709 wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
710 if (DISKPART(bp->b_dev) != RAW_PART)
711 if (bounds_check_with_label(bp, lp, wlabel) <= 0) {
712 db1_printf(("Bounds check failed!!:%d %d\n",
713 (int) bp->b_blkno, (int) wlabel));
714 biodone(bp);
715 return;
716 }
717 s = splbio();
718
719 bp->b_resid = 0;
720
721 /* stuff it onto our queue */
722 BUFQ_INSERT_TAIL(&rs->buf_queue, bp);
723
724 raidstart(raidPtrs[raidID]);
725
726 splx(s);
727 }
728 /* ARGSUSED */
729 int
730 raidread(dev, uio, flags)
731 dev_t dev;
732 struct uio *uio;
733 int flags;
734 {
735 int unit = raidunit(dev);
736 struct raid_softc *rs;
737 int part;
738
739 if (unit >= numraid)
740 return (ENXIO);
741 rs = &raid_softc[unit];
742
743 if ((rs->sc_flags & RAIDF_INITED) == 0)
744 return (ENXIO);
745 part = DISKPART(dev);
746
747 db1_printf(("raidread: unit: %d partition: %d\n", unit, part));
748
749 return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
750
751 }
752 /* ARGSUSED */
753 int
754 raidwrite(dev, uio, flags)
755 dev_t dev;
756 struct uio *uio;
757 int flags;
758 {
759 int unit = raidunit(dev);
760 struct raid_softc *rs;
761
762 if (unit >= numraid)
763 return (ENXIO);
764 rs = &raid_softc[unit];
765
766 if ((rs->sc_flags & RAIDF_INITED) == 0)
767 return (ENXIO);
768 db1_printf(("raidwrite\n"));
769 return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
770
771 }
772
773 int
774 raidioctl(dev, cmd, data, flag, p)
775 dev_t dev;
776 u_long cmd;
777 caddr_t data;
778 int flag;
779 struct proc *p;
780 {
781 int unit = raidunit(dev);
782 int error = 0;
783 int part, pmask;
784 struct raid_softc *rs;
785 RF_Config_t *k_cfg, *u_cfg;
786 RF_Raid_t *raidPtr;
787 RF_RaidDisk_t *diskPtr;
788 RF_AccTotals_t *totals;
789 RF_DeviceConfig_t *d_cfg, **ucfgp;
790 u_char *specific_buf;
791 int retcode = 0;
792 int row;
793 int column;
794 struct rf_recon_req *rrcopy, *rr;
795 RF_ComponentLabel_t *clabel;
796 RF_ComponentLabel_t ci_label;
797 RF_ComponentLabel_t **clabel_ptr;
798 RF_SingleComponent_t *sparePtr,*componentPtr;
799 RF_SingleComponent_t hot_spare;
800 RF_SingleComponent_t component;
801 RF_ProgressInfo_t progressInfo, **progressInfoPtr;
802 int i, j, d;
803 #ifdef __HAVE_OLD_DISKLABEL
804 struct disklabel newlabel;
805 #endif
806
807 if (unit >= numraid)
808 return (ENXIO);
809 rs = &raid_softc[unit];
810 raidPtr = raidPtrs[unit];
811
812 db1_printf(("raidioctl: %d %d %d %d\n", (int) dev,
813 (int) DISKPART(dev), (int) unit, (int) cmd));
814
815 /* Must be open for writes for these commands... */
816 switch (cmd) {
817 case DIOCSDINFO:
818 case DIOCWDINFO:
819 #ifdef __HAVE_OLD_DISKLABEL
820 case ODIOCWDINFO:
821 case ODIOCSDINFO:
822 #endif
823 case DIOCWLABEL:
824 if ((flag & FWRITE) == 0)
825 return (EBADF);
826 }
827
828 /* Must be initialized for these... */
829 switch (cmd) {
830 case DIOCGDINFO:
831 case DIOCSDINFO:
832 case DIOCWDINFO:
833 #ifdef __HAVE_OLD_DISKLABEL
834 case ODIOCGDINFO:
835 case ODIOCWDINFO:
836 case ODIOCSDINFO:
837 case ODIOCGDEFLABEL:
838 #endif
839 case DIOCGPART:
840 case DIOCWLABEL:
841 case DIOCGDEFLABEL:
842 case RAIDFRAME_SHUTDOWN:
843 case RAIDFRAME_REWRITEPARITY:
844 case RAIDFRAME_GET_INFO:
845 case RAIDFRAME_RESET_ACCTOTALS:
846 case RAIDFRAME_GET_ACCTOTALS:
847 case RAIDFRAME_KEEP_ACCTOTALS:
848 case RAIDFRAME_GET_SIZE:
849 case RAIDFRAME_FAIL_DISK:
850 case RAIDFRAME_COPYBACK:
851 case RAIDFRAME_CHECK_RECON_STATUS:
852 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
853 case RAIDFRAME_GET_COMPONENT_LABEL:
854 case RAIDFRAME_SET_COMPONENT_LABEL:
855 case RAIDFRAME_ADD_HOT_SPARE:
856 case RAIDFRAME_REMOVE_HOT_SPARE:
857 case RAIDFRAME_INIT_LABELS:
858 case RAIDFRAME_REBUILD_IN_PLACE:
859 case RAIDFRAME_CHECK_PARITY:
860 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
861 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
862 case RAIDFRAME_CHECK_COPYBACK_STATUS:
863 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
864 case RAIDFRAME_SET_AUTOCONFIG:
865 case RAIDFRAME_SET_ROOT:
866 case RAIDFRAME_DELETE_COMPONENT:
867 case RAIDFRAME_INCORPORATE_HOT_SPARE:
868 if ((rs->sc_flags & RAIDF_INITED) == 0)
869 return (ENXIO);
870 }
871
872 switch (cmd) {
873
874 /* configure the system */
875 case RAIDFRAME_CONFIGURE:
876
877 if (raidPtr->valid) {
878 /* There is a valid RAID set running on this unit! */
879 printf("raid%d: Device already configured!\n",unit);
880 return(EINVAL);
881 }
882
883 /* copy-in the configuration information */
884 /* data points to a pointer to the configuration structure */
885
886 u_cfg = *((RF_Config_t **) data);
887 RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
888 if (k_cfg == NULL) {
889 return (ENOMEM);
890 }
891 retcode = copyin((caddr_t) u_cfg, (caddr_t) k_cfg,
892 sizeof(RF_Config_t));
893 if (retcode) {
894 RF_Free(k_cfg, sizeof(RF_Config_t));
895 db1_printf(("rf_ioctl: retcode=%d copyin.1\n",
896 retcode));
897 return (retcode);
898 }
899 /* allocate a buffer for the layout-specific data, and copy it
900 * in */
901 if (k_cfg->layoutSpecificSize) {
902 if (k_cfg->layoutSpecificSize > 10000) {
903 /* sanity check */
904 RF_Free(k_cfg, sizeof(RF_Config_t));
905 return (EINVAL);
906 }
907 RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
908 (u_char *));
909 if (specific_buf == NULL) {
910 RF_Free(k_cfg, sizeof(RF_Config_t));
911 return (ENOMEM);
912 }
913 retcode = copyin(k_cfg->layoutSpecific,
914 (caddr_t) specific_buf,
915 k_cfg->layoutSpecificSize);
916 if (retcode) {
917 RF_Free(k_cfg, sizeof(RF_Config_t));
918 RF_Free(specific_buf,
919 k_cfg->layoutSpecificSize);
920 db1_printf(("rf_ioctl: retcode=%d copyin.2\n",
921 retcode));
922 return (retcode);
923 }
924 } else
925 specific_buf = NULL;
926 k_cfg->layoutSpecific = specific_buf;
927
928 /* should do some kind of sanity check on the configuration.
929 * Store the sum of all the bytes in the last byte? */
930
931 /* configure the system */
932
933 /*
934 * Clear the entire RAID descriptor, just to make sure
935 * there is no stale data left in the case of a
936 * reconfiguration
937 */
938 memset((char *) raidPtr, 0, sizeof(RF_Raid_t));
939 raidPtr->raidid = unit;
940
941 retcode = rf_Configure(raidPtr, k_cfg, NULL);
942
943 if (retcode == 0) {
944
945 /* allow this many simultaneous IO's to
946 this RAID device */
947 raidPtr->openings = RAIDOUTSTANDING;
948
949 raidinit(raidPtr);
950 rf_markalldirty(raidPtr);
951 }
952 /* free the buffers. No return code here. */
953 if (k_cfg->layoutSpecificSize) {
954 RF_Free(specific_buf, k_cfg->layoutSpecificSize);
955 }
956 RF_Free(k_cfg, sizeof(RF_Config_t));
957
958 return (retcode);
959
960 /* shutdown the system */
961 case RAIDFRAME_SHUTDOWN:
962
963 if ((error = raidlock(rs)) != 0)
964 return (error);
965
966 /*
967 * If somebody has a partition mounted, we shouldn't
968 * shutdown.
969 */
970
971 part = DISKPART(dev);
972 pmask = (1 << part);
973 if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
974 ((rs->sc_dkdev.dk_bopenmask & pmask) &&
975 (rs->sc_dkdev.dk_copenmask & pmask))) {
976 raidunlock(rs);
977 return (EBUSY);
978 }
979
980 retcode = rf_Shutdown(raidPtr);
981
982 pool_destroy(&rs->sc_cbufpool);
983
984 /* It's no longer initialized... */
985 rs->sc_flags &= ~RAIDF_INITED;
986
987 /* Detach the disk. */
988 disk_detach(&rs->sc_dkdev);
989
990 raidunlock(rs);
991
992 return (retcode);
993 case RAIDFRAME_GET_COMPONENT_LABEL:
994 clabel_ptr = (RF_ComponentLabel_t **) data;
995 /* need to read the component label for the disk indicated
996 by row,column in clabel */
997
998 /* For practice, let's get it directly fromdisk, rather
999 than from the in-core copy */
1000 RF_Malloc( clabel, sizeof( RF_ComponentLabel_t ),
1001 (RF_ComponentLabel_t *));
1002 if (clabel == NULL)
1003 return (ENOMEM);
1004
1005 memset((char *) clabel, 0, sizeof(RF_ComponentLabel_t));
1006
1007 retcode = copyin( *clabel_ptr, clabel,
1008 sizeof(RF_ComponentLabel_t));
1009
1010 if (retcode) {
1011 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
1012 return(retcode);
1013 }
1014
1015 row = clabel->row;
1016 column = clabel->column;
1017
1018 if ((row < 0) || (row >= raidPtr->numRow) ||
1019 (column < 0) || (column >= raidPtr->numCol +
1020 raidPtr->numSpare)) {
1021 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
1022 return(EINVAL);
1023 }
1024
1025 raidread_component_label(raidPtr->Disks[row][column].dev,
1026 raidPtr->raid_cinfo[row][column].ci_vp,
1027 clabel );
1028
1029 retcode = copyout((caddr_t) clabel,
1030 (caddr_t) *clabel_ptr,
1031 sizeof(RF_ComponentLabel_t));
1032 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
1033 return (retcode);
1034
1035 case RAIDFRAME_SET_COMPONENT_LABEL:
1036 clabel = (RF_ComponentLabel_t *) data;
1037
1038 /* XXX check the label for valid stuff... */
1039 /* Note that some things *should not* get modified --
1040 the user should be re-initing the labels instead of
1041 trying to patch things.
1042 */
1043
1044 printf("Got component label:\n");
1045 printf("Version: %d\n",clabel->version);
1046 printf("Serial Number: %d\n",clabel->serial_number);
1047 printf("Mod counter: %d\n",clabel->mod_counter);
1048 printf("Row: %d\n", clabel->row);
1049 printf("Column: %d\n", clabel->column);
1050 printf("Num Rows: %d\n", clabel->num_rows);
1051 printf("Num Columns: %d\n", clabel->num_columns);
1052 printf("Clean: %d\n", clabel->clean);
1053 printf("Status: %d\n", clabel->status);
1054
1055 row = clabel->row;
1056 column = clabel->column;
1057
1058 if ((row < 0) || (row >= raidPtr->numRow) ||
1059 (column < 0) || (column >= raidPtr->numCol)) {
1060 return(EINVAL);
1061 }
1062
1063 /* XXX this isn't allowed to do anything for now :-) */
1064
1065 /* XXX and before it is, we need to fill in the rest
1066 of the fields!?!?!?! */
1067 #if 0
1068 raidwrite_component_label(
1069 raidPtr->Disks[row][column].dev,
1070 raidPtr->raid_cinfo[row][column].ci_vp,
1071 clabel );
1072 #endif
1073 return (0);
1074
1075 case RAIDFRAME_INIT_LABELS:
1076 clabel = (RF_ComponentLabel_t *) data;
1077 /*
1078 we only want the serial number from
1079 the above. We get all the rest of the information
1080 from the config that was used to create this RAID
1081 set.
1082 */
1083
1084 raidPtr->serial_number = clabel->serial_number;
1085
1086 raid_init_component_label(raidPtr, &ci_label);
1087 ci_label.serial_number = clabel->serial_number;
1088
1089 for(row=0;row<raidPtr->numRow;row++) {
1090 ci_label.row = row;
1091 for(column=0;column<raidPtr->numCol;column++) {
1092 diskPtr = &raidPtr->Disks[row][column];
1093 if (!RF_DEAD_DISK(diskPtr->status)) {
1094 ci_label.partitionSize = diskPtr->partitionSize;
1095 ci_label.column = column;
1096 raidwrite_component_label(
1097 raidPtr->Disks[row][column].dev,
1098 raidPtr->raid_cinfo[row][column].ci_vp,
1099 &ci_label );
1100 }
1101 }
1102 }
1103
1104 return (retcode);
1105 case RAIDFRAME_SET_AUTOCONFIG:
1106 d = rf_set_autoconfig(raidPtr, *(int *) data);
1107 printf("New autoconfig value is: %d\n", d);
1108 *(int *) data = d;
1109 return (retcode);
1110
1111 case RAIDFRAME_SET_ROOT:
1112 d = rf_set_rootpartition(raidPtr, *(int *) data);
1113 printf("New rootpartition value is: %d\n", d);
1114 *(int *) data = d;
1115 return (retcode);
1116
1117 /* initialize all parity */
1118 case RAIDFRAME_REWRITEPARITY:
1119
1120 if (raidPtr->Layout.map->faultsTolerated == 0) {
1121 /* Parity for RAID 0 is trivially correct */
1122 raidPtr->parity_good = RF_RAID_CLEAN;
1123 return(0);
1124 }
1125
1126 if (raidPtr->parity_rewrite_in_progress == 1) {
1127 /* Re-write is already in progress! */
1128 return(EINVAL);
1129 }
1130
1131 retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
1132 rf_RewriteParityThread,
1133 raidPtr,"raid_parity");
1134 return (retcode);
1135
1136
1137 case RAIDFRAME_ADD_HOT_SPARE:
1138 sparePtr = (RF_SingleComponent_t *) data;
1139 memcpy( &hot_spare, sparePtr, sizeof(RF_SingleComponent_t));
1140 retcode = rf_add_hot_spare(raidPtr, &hot_spare);
1141 return(retcode);
1142
1143 case RAIDFRAME_REMOVE_HOT_SPARE:
1144 return(retcode);
1145
1146 case RAIDFRAME_DELETE_COMPONENT:
1147 componentPtr = (RF_SingleComponent_t *)data;
1148 memcpy( &component, componentPtr,
1149 sizeof(RF_SingleComponent_t));
1150 retcode = rf_delete_component(raidPtr, &component);
1151 return(retcode);
1152
1153 case RAIDFRAME_INCORPORATE_HOT_SPARE:
1154 componentPtr = (RF_SingleComponent_t *)data;
1155 memcpy( &component, componentPtr,
1156 sizeof(RF_SingleComponent_t));
1157 retcode = rf_incorporate_hot_spare(raidPtr, &component);
1158 return(retcode);
1159
1160 case RAIDFRAME_REBUILD_IN_PLACE:
1161
1162 if (raidPtr->Layout.map->faultsTolerated == 0) {
1163 /* Can't do this on a RAID 0!! */
1164 return(EINVAL);
1165 }
1166
1167 if (raidPtr->recon_in_progress == 1) {
1168 /* a reconstruct is already in progress! */
1169 return(EINVAL);
1170 }
1171
1172 componentPtr = (RF_SingleComponent_t *) data;
1173 memcpy( &component, componentPtr,
1174 sizeof(RF_SingleComponent_t));
1175 row = component.row;
1176 column = component.column;
1177 printf("Rebuild: %d %d\n",row, column);
1178 if ((row < 0) || (row >= raidPtr->numRow) ||
1179 (column < 0) || (column >= raidPtr->numCol)) {
1180 return(EINVAL);
1181 }
1182
1183 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1184 if (rrcopy == NULL)
1185 return(ENOMEM);
1186
1187 rrcopy->raidPtr = (void *) raidPtr;
1188 rrcopy->row = row;
1189 rrcopy->col = column;
1190
1191 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1192 rf_ReconstructInPlaceThread,
1193 rrcopy,"raid_reconip");
1194 return(retcode);
1195
1196 case RAIDFRAME_GET_INFO:
1197 if (!raidPtr->valid)
1198 return (ENODEV);
1199 ucfgp = (RF_DeviceConfig_t **) data;
1200 RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t),
1201 (RF_DeviceConfig_t *));
1202 if (d_cfg == NULL)
1203 return (ENOMEM);
1204 memset((char *) d_cfg, 0, sizeof(RF_DeviceConfig_t));
1205 d_cfg->rows = raidPtr->numRow;
1206 d_cfg->cols = raidPtr->numCol;
1207 d_cfg->ndevs = raidPtr->numRow * raidPtr->numCol;
1208 if (d_cfg->ndevs >= RF_MAX_DISKS) {
1209 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1210 return (ENOMEM);
1211 }
1212 d_cfg->nspares = raidPtr->numSpare;
1213 if (d_cfg->nspares >= RF_MAX_DISKS) {
1214 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1215 return (ENOMEM);
1216 }
1217 d_cfg->maxqdepth = raidPtr->maxQueueDepth;
1218 d = 0;
1219 for (i = 0; i < d_cfg->rows; i++) {
1220 for (j = 0; j < d_cfg->cols; j++) {
1221 d_cfg->devs[d] = raidPtr->Disks[i][j];
1222 d++;
1223 }
1224 }
1225 for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) {
1226 d_cfg->spares[i] = raidPtr->Disks[0][j];
1227 }
1228 retcode = copyout((caddr_t) d_cfg, (caddr_t) * ucfgp,
1229 sizeof(RF_DeviceConfig_t));
1230 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1231
1232 return (retcode);
1233
1234 case RAIDFRAME_CHECK_PARITY:
1235 *(int *) data = raidPtr->parity_good;
1236 return (0);
1237
1238 case RAIDFRAME_RESET_ACCTOTALS:
1239 memset(&raidPtr->acc_totals, 0, sizeof(raidPtr->acc_totals));
1240 return (0);
1241
1242 case RAIDFRAME_GET_ACCTOTALS:
1243 totals = (RF_AccTotals_t *) data;
1244 *totals = raidPtr->acc_totals;
1245 return (0);
1246
1247 case RAIDFRAME_KEEP_ACCTOTALS:
1248 raidPtr->keep_acc_totals = *(int *)data;
1249 return (0);
1250
1251 case RAIDFRAME_GET_SIZE:
1252 *(int *) data = raidPtr->totalSectors;
1253 return (0);
1254
1255 /* fail a disk & optionally start reconstruction */
1256 case RAIDFRAME_FAIL_DISK:
1257
1258 if (raidPtr->Layout.map->faultsTolerated == 0) {
1259 /* Can't do this on a RAID 0!! */
1260 return(EINVAL);
1261 }
1262
1263 rr = (struct rf_recon_req *) data;
1264
1265 if (rr->row < 0 || rr->row >= raidPtr->numRow
1266 || rr->col < 0 || rr->col >= raidPtr->numCol)
1267 return (EINVAL);
1268
1269 printf("raid%d: Failing the disk: row: %d col: %d\n",
1270 unit, rr->row, rr->col);
1271
1272 /* make a copy of the recon request so that we don't rely on
1273 * the user's buffer */
1274 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1275 if (rrcopy == NULL)
1276 return(ENOMEM);
1277 bcopy(rr, rrcopy, sizeof(*rr));
1278 rrcopy->raidPtr = (void *) raidPtr;
1279
1280 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1281 rf_ReconThread,
1282 rrcopy,"raid_recon");
1283 return (0);
1284
1285 /* invoke a copyback operation after recon on whatever disk
1286 * needs it, if any */
1287 case RAIDFRAME_COPYBACK:
1288
1289 if (raidPtr->Layout.map->faultsTolerated == 0) {
1290 /* This makes no sense on a RAID 0!! */
1291 return(EINVAL);
1292 }
1293
1294 if (raidPtr->copyback_in_progress == 1) {
1295 /* Copyback is already in progress! */
1296 return(EINVAL);
1297 }
1298
1299 retcode = RF_CREATE_THREAD(raidPtr->copyback_thread,
1300 rf_CopybackThread,
1301 raidPtr,"raid_copyback");
1302 return (retcode);
1303
1304 /* return the percentage completion of reconstruction */
1305 case RAIDFRAME_CHECK_RECON_STATUS:
1306 if (raidPtr->Layout.map->faultsTolerated == 0) {
1307 /* This makes no sense on a RAID 0, so tell the
1308 user it's done. */
1309 *(int *) data = 100;
1310 return(0);
1311 }
1312 row = 0; /* XXX we only consider a single row... */
1313 if (raidPtr->status[row] != rf_rs_reconstructing)
1314 *(int *) data = 100;
1315 else
1316 *(int *) data = raidPtr->reconControl[row]->percentComplete;
1317 return (0);
1318 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
1319 progressInfoPtr = (RF_ProgressInfo_t **) data;
1320 row = 0; /* XXX we only consider a single row... */
1321 if (raidPtr->status[row] != rf_rs_reconstructing) {
1322 progressInfo.remaining = 0;
1323 progressInfo.completed = 100;
1324 progressInfo.total = 100;
1325 } else {
1326 progressInfo.total =
1327 raidPtr->reconControl[row]->numRUsTotal;
1328 progressInfo.completed =
1329 raidPtr->reconControl[row]->numRUsComplete;
1330 progressInfo.remaining = progressInfo.total -
1331 progressInfo.completed;
1332 }
1333 retcode = copyout((caddr_t) &progressInfo,
1334 (caddr_t) *progressInfoPtr,
1335 sizeof(RF_ProgressInfo_t));
1336 return (retcode);
1337
1338 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1339 if (raidPtr->Layout.map->faultsTolerated == 0) {
1340 /* This makes no sense on a RAID 0, so tell the
1341 user it's done. */
1342 *(int *) data = 100;
1343 return(0);
1344 }
1345 if (raidPtr->parity_rewrite_in_progress == 1) {
1346 *(int *) data = 100 *
1347 raidPtr->parity_rewrite_stripes_done /
1348 raidPtr->Layout.numStripe;
1349 } else {
1350 *(int *) data = 100;
1351 }
1352 return (0);
1353
1354 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
1355 progressInfoPtr = (RF_ProgressInfo_t **) data;
1356 if (raidPtr->parity_rewrite_in_progress == 1) {
1357 progressInfo.total = raidPtr->Layout.numStripe;
1358 progressInfo.completed =
1359 raidPtr->parity_rewrite_stripes_done;
1360 progressInfo.remaining = progressInfo.total -
1361 progressInfo.completed;
1362 } else {
1363 progressInfo.remaining = 0;
1364 progressInfo.completed = 100;
1365 progressInfo.total = 100;
1366 }
1367 retcode = copyout((caddr_t) &progressInfo,
1368 (caddr_t) *progressInfoPtr,
1369 sizeof(RF_ProgressInfo_t));
1370 return (retcode);
1371
1372 case RAIDFRAME_CHECK_COPYBACK_STATUS:
1373 if (raidPtr->Layout.map->faultsTolerated == 0) {
1374 /* This makes no sense on a RAID 0 */
1375 *(int *) data = 100;
1376 return(0);
1377 }
1378 if (raidPtr->copyback_in_progress == 1) {
1379 *(int *) data = 100 * raidPtr->copyback_stripes_done /
1380 raidPtr->Layout.numStripe;
1381 } else {
1382 *(int *) data = 100;
1383 }
1384 return (0);
1385
1386 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
1387 progressInfoPtr = (RF_ProgressInfo_t **) data;
1388 if (raidPtr->copyback_in_progress == 1) {
1389 progressInfo.total = raidPtr->Layout.numStripe;
1390 progressInfo.completed =
1391 raidPtr->copyback_stripes_done;
1392 progressInfo.remaining = progressInfo.total -
1393 progressInfo.completed;
1394 } else {
1395 progressInfo.remaining = 0;
1396 progressInfo.completed = 100;
1397 progressInfo.total = 100;
1398 }
1399 retcode = copyout((caddr_t) &progressInfo,
1400 (caddr_t) *progressInfoPtr,
1401 sizeof(RF_ProgressInfo_t));
1402 return (retcode);
1403
1404 /* the sparetable daemon calls this to wait for the kernel to
1405 * need a spare table. this ioctl does not return until a
1406 * spare table is needed. XXX -- calling mpsleep here in the
1407 * ioctl code is almost certainly wrong and evil. -- XXX XXX
1408 * -- I should either compute the spare table in the kernel,
1409 * or have a different -- XXX XXX -- interface (a different
1410 * character device) for delivering the table -- XXX */
1411 #if 0
1412 case RAIDFRAME_SPARET_WAIT:
1413 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1414 while (!rf_sparet_wait_queue)
1415 mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE);
1416 waitreq = rf_sparet_wait_queue;
1417 rf_sparet_wait_queue = rf_sparet_wait_queue->next;
1418 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1419
1420 /* structure assignment */
1421 *((RF_SparetWait_t *) data) = *waitreq;
1422
1423 RF_Free(waitreq, sizeof(*waitreq));
1424 return (0);
1425
1426 /* wakes up a process waiting on SPARET_WAIT and puts an error
1427 * code in it that will cause the dameon to exit */
1428 case RAIDFRAME_ABORT_SPARET_WAIT:
1429 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1430 waitreq->fcol = -1;
1431 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1432 waitreq->next = rf_sparet_wait_queue;
1433 rf_sparet_wait_queue = waitreq;
1434 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1435 wakeup(&rf_sparet_wait_queue);
1436 return (0);
1437
1438 /* used by the spare table daemon to deliver a spare table
1439 * into the kernel */
1440 case RAIDFRAME_SEND_SPARET:
1441
1442 /* install the spare table */
1443 retcode = rf_SetSpareTable(raidPtr, *(void **) data);
1444
1445 /* respond to the requestor. the return status of the spare
1446 * table installation is passed in the "fcol" field */
1447 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1448 waitreq->fcol = retcode;
1449 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1450 waitreq->next = rf_sparet_resp_queue;
1451 rf_sparet_resp_queue = waitreq;
1452 wakeup(&rf_sparet_resp_queue);
1453 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1454
1455 return (retcode);
1456 #endif
1457
1458 default:
1459 break; /* fall through to the os-specific code below */
1460
1461 }
1462
1463 if (!raidPtr->valid)
1464 return (EINVAL);
1465
1466 /*
1467 * Add support for "regular" device ioctls here.
1468 */
1469
1470 switch (cmd) {
1471 case DIOCGDINFO:
1472 *(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
1473 break;
1474 #ifdef __HAVE_OLD_DISKLABEL
1475 case ODIOCGDINFO:
1476 newlabel = *(rs->sc_dkdev.dk_label);
1477 if (newlabel.d_npartitions > OLDMAXPARTITIONS)
1478 return ENOTTY;
1479 memcpy(data, &newlabel, sizeof (struct olddisklabel));
1480 break;
1481 #endif
1482
1483 case DIOCGPART:
1484 ((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
1485 ((struct partinfo *) data)->part =
1486 &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
1487 break;
1488
1489 case DIOCWDINFO:
1490 case DIOCSDINFO:
1491 #ifdef __HAVE_OLD_DISKLABEL
1492 case ODIOCWDINFO:
1493 case ODIOCSDINFO:
1494 #endif
1495 {
1496 struct disklabel *lp;
1497 #ifdef __HAVE_OLD_DISKLABEL
1498 if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) {
1499 memset(&newlabel, 0, sizeof newlabel);
1500 memcpy(&newlabel, data, sizeof (struct olddisklabel));
1501 lp = &newlabel;
1502 } else
1503 #endif
1504 lp = (struct disklabel *)data;
1505
1506 if ((error = raidlock(rs)) != 0)
1507 return (error);
1508
1509 rs->sc_flags |= RAIDF_LABELLING;
1510
1511 error = setdisklabel(rs->sc_dkdev.dk_label,
1512 lp, 0, rs->sc_dkdev.dk_cpulabel);
1513 if (error == 0) {
1514 if (cmd == DIOCWDINFO
1515 #ifdef __HAVE_OLD_DISKLABEL
1516 || cmd == ODIOCWDINFO
1517 #endif
1518 )
1519 error = writedisklabel(RAIDLABELDEV(dev),
1520 raidstrategy, rs->sc_dkdev.dk_label,
1521 rs->sc_dkdev.dk_cpulabel);
1522 }
1523 rs->sc_flags &= ~RAIDF_LABELLING;
1524
1525 raidunlock(rs);
1526
1527 if (error)
1528 return (error);
1529 break;
1530 }
1531
1532 case DIOCWLABEL:
1533 if (*(int *) data != 0)
1534 rs->sc_flags |= RAIDF_WLABEL;
1535 else
1536 rs->sc_flags &= ~RAIDF_WLABEL;
1537 break;
1538
1539 case DIOCGDEFLABEL:
1540 raidgetdefaultlabel(raidPtr, rs, (struct disklabel *) data);
1541 break;
1542
1543 #ifdef __HAVE_OLD_DISKLABEL
1544 case ODIOCGDEFLABEL:
1545 raidgetdefaultlabel(raidPtr, rs, &newlabel);
1546 if (newlabel.d_npartitions > OLDMAXPARTITIONS)
1547 return ENOTTY;
1548 memcpy(data, &newlabel, sizeof (struct olddisklabel));
1549 break;
1550 #endif
1551
1552 default:
1553 retcode = ENOTTY;
1554 }
1555 return (retcode);
1556
1557 }
1558
1559
1560 /* raidinit -- complete the rest of the initialization for the
1561 RAIDframe device. */
1562
1563
1564 static void
1565 raidinit(raidPtr)
1566 RF_Raid_t *raidPtr;
1567 {
1568 struct raid_softc *rs;
1569 int unit;
1570
1571 unit = raidPtr->raidid;
1572
1573 rs = &raid_softc[unit];
1574 pool_init(&rs->sc_cbufpool, sizeof(struct raidbuf), 0,
1575 0, 0, "raidpl", 0, NULL, NULL, M_RAIDFRAME);
1576
1577
1578 /* XXX should check return code first... */
1579 rs->sc_flags |= RAIDF_INITED;
1580
1581 sprintf(rs->sc_xname, "raid%d", unit); /* XXX doesn't check bounds. */
1582
1583 rs->sc_dkdev.dk_name = rs->sc_xname;
1584
1585 /* disk_attach actually creates space for the CPU disklabel, among
1586 * other things, so it's critical to call this *BEFORE* we try putzing
1587 * with disklabels. */
1588
1589 disk_attach(&rs->sc_dkdev);
1590
1591 /* XXX There may be a weird interaction here between this, and
1592 * protectedSectors, as used in RAIDframe. */
1593
1594 rs->sc_size = raidPtr->totalSectors;
1595
1596 }
1597
1598 /* wake up the daemon & tell it to get us a spare table
1599 * XXX
1600 * the entries in the queues should be tagged with the raidPtr
1601 * so that in the extremely rare case that two recons happen at once,
1602 * we know for which device were requesting a spare table
1603 * XXX
1604 *
1605 * XXX This code is not currently used. GO
1606 */
1607 int
1608 rf_GetSpareTableFromDaemon(req)
1609 RF_SparetWait_t *req;
1610 {
1611 int retcode;
1612
1613 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1614 req->next = rf_sparet_wait_queue;
1615 rf_sparet_wait_queue = req;
1616 wakeup(&rf_sparet_wait_queue);
1617
1618 /* mpsleep unlocks the mutex */
1619 while (!rf_sparet_resp_queue) {
1620 tsleep(&rf_sparet_resp_queue, PRIBIO,
1621 "raidframe getsparetable", 0);
1622 }
1623 req = rf_sparet_resp_queue;
1624 rf_sparet_resp_queue = req->next;
1625 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1626
1627 retcode = req->fcol;
1628 RF_Free(req, sizeof(*req)); /* this is not the same req as we
1629 * alloc'd */
1630 return (retcode);
1631 }
1632
1633 /* a wrapper around rf_DoAccess that extracts appropriate info from the
1634 * bp & passes it down.
1635 * any calls originating in the kernel must use non-blocking I/O
1636 * do some extra sanity checking to return "appropriate" error values for
1637 * certain conditions (to make some standard utilities work)
1638 *
1639 * Formerly known as: rf_DoAccessKernel
1640 */
1641 void
1642 raidstart(raidPtr)
1643 RF_Raid_t *raidPtr;
1644 {
1645 RF_SectorCount_t num_blocks, pb, sum;
1646 RF_RaidAddr_t raid_addr;
1647 int retcode;
1648 struct partition *pp;
1649 daddr_t blocknum;
1650 int unit;
1651 struct raid_softc *rs;
1652 int do_async;
1653 struct buf *bp;
1654
1655 unit = raidPtr->raidid;
1656 rs = &raid_softc[unit];
1657
1658 /* quick check to see if anything has died recently */
1659 RF_LOCK_MUTEX(raidPtr->mutex);
1660 if (raidPtr->numNewFailures > 0) {
1661 rf_update_component_labels(raidPtr,
1662 RF_NORMAL_COMPONENT_UPDATE);
1663 raidPtr->numNewFailures--;
1664 }
1665 RF_UNLOCK_MUTEX(raidPtr->mutex);
1666
1667 /* Check to see if we're at the limit... */
1668 RF_LOCK_MUTEX(raidPtr->mutex);
1669 while (raidPtr->openings > 0) {
1670 RF_UNLOCK_MUTEX(raidPtr->mutex);
1671
1672 /* get the next item, if any, from the queue */
1673 if ((bp = BUFQ_FIRST(&rs->buf_queue)) == NULL) {
1674 /* nothing more to do */
1675 return;
1676 }
1677 BUFQ_REMOVE(&rs->buf_queue, bp);
1678
1679 /* Ok, for the bp we have here, bp->b_blkno is relative to the
1680 * partition.. Need to make it absolute to the underlying
1681 * device.. */
1682
1683 blocknum = bp->b_blkno;
1684 if (DISKPART(bp->b_dev) != RAW_PART) {
1685 pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
1686 blocknum += pp->p_offset;
1687 }
1688
1689 db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
1690 (int) blocknum));
1691
1692 db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
1693 db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
1694
1695 /* *THIS* is where we adjust what block we're going to...
1696 * but DO NOT TOUCH bp->b_blkno!!! */
1697 raid_addr = blocknum;
1698
1699 num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
1700 pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
1701 sum = raid_addr + num_blocks + pb;
1702 if (1 || rf_debugKernelAccess) {
1703 db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
1704 (int) raid_addr, (int) sum, (int) num_blocks,
1705 (int) pb, (int) bp->b_resid));
1706 }
1707 if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
1708 || (sum < num_blocks) || (sum < pb)) {
1709 bp->b_error = ENOSPC;
1710 bp->b_flags |= B_ERROR;
1711 bp->b_resid = bp->b_bcount;
1712 biodone(bp);
1713 RF_LOCK_MUTEX(raidPtr->mutex);
1714 continue;
1715 }
1716 /*
1717 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
1718 */
1719
1720 if (bp->b_bcount & raidPtr->sectorMask) {
1721 bp->b_error = EINVAL;
1722 bp->b_flags |= B_ERROR;
1723 bp->b_resid = bp->b_bcount;
1724 biodone(bp);
1725 RF_LOCK_MUTEX(raidPtr->mutex);
1726 continue;
1727
1728 }
1729 db1_printf(("Calling DoAccess..\n"));
1730
1731
1732 RF_LOCK_MUTEX(raidPtr->mutex);
1733 raidPtr->openings--;
1734 RF_UNLOCK_MUTEX(raidPtr->mutex);
1735
1736 /*
1737 * Everything is async.
1738 */
1739 do_async = 1;
1740
1741 disk_busy(&rs->sc_dkdev);
1742
1743 /* XXX we're still at splbio() here... do we *really*
1744 need to be? */
1745
1746 /* don't ever condition on bp->b_flags & B_WRITE.
1747 * always condition on B_READ instead */
1748
1749 retcode = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
1750 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
1751 do_async, raid_addr, num_blocks,
1752 bp->b_data, bp, RF_DAG_NONBLOCKING_IO);
1753
1754 RF_LOCK_MUTEX(raidPtr->mutex);
1755 }
1756 RF_UNLOCK_MUTEX(raidPtr->mutex);
1757 }
1758
1759
1760
1761
1762 /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */
1763
1764 int
1765 rf_DispatchKernelIO(queue, req)
1766 RF_DiskQueue_t *queue;
1767 RF_DiskQueueData_t *req;
1768 {
1769 int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
1770 struct buf *bp;
1771 struct raidbuf *raidbp = NULL;
1772 struct raid_softc *rs;
1773 int unit;
1774 int s;
1775
1776 s=0;
1777 /* s = splbio();*/ /* want to test this */
1778 /* XXX along with the vnode, we also need the softc associated with
1779 * this device.. */
1780
1781 req->queue = queue;
1782
1783 unit = queue->raidPtr->raidid;
1784
1785 db1_printf(("DispatchKernelIO unit: %d\n", unit));
1786
1787 if (unit >= numraid) {
1788 printf("Invalid unit number: %d %d\n", unit, numraid);
1789 panic("Invalid Unit number in rf_DispatchKernelIO\n");
1790 }
1791 rs = &raid_softc[unit];
1792
1793 bp = req->bp;
1794 #if 1
1795 /* XXX when there is a physical disk failure, someone is passing us a
1796 * buffer that contains old stuff!! Attempt to deal with this problem
1797 * without taking a performance hit... (not sure where the real bug
1798 * is. It's buried in RAIDframe somewhere) :-( GO ) */
1799
1800 if (bp->b_flags & B_ERROR) {
1801 bp->b_flags &= ~B_ERROR;
1802 }
1803 if (bp->b_error != 0) {
1804 bp->b_error = 0;
1805 }
1806 #endif
1807 raidbp = RAIDGETBUF(rs);
1808
1809 raidbp->rf_flags = 0; /* XXX not really used anywhere... */
1810
1811 /*
1812 * context for raidiodone
1813 */
1814 raidbp->rf_obp = bp;
1815 raidbp->req = req;
1816
1817 LIST_INIT(&raidbp->rf_buf.b_dep);
1818
1819 switch (req->type) {
1820 case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */
1821 /* XXX need to do something extra here.. */
1822 /* I'm leaving this in, as I've never actually seen it used,
1823 * and I'd like folks to report it... GO */
1824 printf(("WAKEUP CALLED\n"));
1825 queue->numOutstanding++;
1826
1827 /* XXX need to glue the original buffer into this?? */
1828
1829 KernelWakeupFunc(&raidbp->rf_buf);
1830 break;
1831
1832 case RF_IO_TYPE_READ:
1833 case RF_IO_TYPE_WRITE:
1834
1835 if (req->tracerec) {
1836 RF_ETIMER_START(req->tracerec->timer);
1837 }
1838 InitBP(&raidbp->rf_buf, queue->rf_cinfo->ci_vp,
1839 op | bp->b_flags, queue->rf_cinfo->ci_dev,
1840 req->sectorOffset, req->numSector,
1841 req->buf, KernelWakeupFunc, (void *) req,
1842 queue->raidPtr->logBytesPerSector, req->b_proc);
1843
1844 if (rf_debugKernelAccess) {
1845 db1_printf(("dispatch: bp->b_blkno = %ld\n",
1846 (long) bp->b_blkno));
1847 }
1848 queue->numOutstanding++;
1849 queue->last_deq_sector = req->sectorOffset;
1850 /* acc wouldn't have been let in if there were any pending
1851 * reqs at any other priority */
1852 queue->curPriority = req->priority;
1853
1854 db1_printf(("Going for %c to unit %d row %d col %d\n",
1855 req->type, unit, queue->row, queue->col));
1856 db1_printf(("sector %d count %d (%d bytes) %d\n",
1857 (int) req->sectorOffset, (int) req->numSector,
1858 (int) (req->numSector <<
1859 queue->raidPtr->logBytesPerSector),
1860 (int) queue->raidPtr->logBytesPerSector));
1861 if ((raidbp->rf_buf.b_flags & B_READ) == 0) {
1862 raidbp->rf_buf.b_vp->v_numoutput++;
1863 }
1864 VOP_STRATEGY(&raidbp->rf_buf);
1865
1866 break;
1867
1868 default:
1869 panic("bad req->type in rf_DispatchKernelIO");
1870 }
1871 db1_printf(("Exiting from DispatchKernelIO\n"));
1872 /* splx(s); */ /* want to test this */
1873 return (0);
1874 }
1875 /* this is the callback function associated with a I/O invoked from
1876 kernel code.
1877 */
1878 static void
1879 KernelWakeupFunc(vbp)
1880 struct buf *vbp;
1881 {
1882 RF_DiskQueueData_t *req = NULL;
1883 RF_DiskQueue_t *queue;
1884 struct raidbuf *raidbp = (struct raidbuf *) vbp;
1885 struct buf *bp;
1886 struct raid_softc *rs;
1887 int unit;
1888 int s;
1889
1890 s = splbio();
1891 db1_printf(("recovering the request queue:\n"));
1892 req = raidbp->req;
1893
1894 bp = raidbp->rf_obp;
1895
1896 queue = (RF_DiskQueue_t *) req->queue;
1897
1898 if (raidbp->rf_buf.b_flags & B_ERROR) {
1899 bp->b_flags |= B_ERROR;
1900 bp->b_error = raidbp->rf_buf.b_error ?
1901 raidbp->rf_buf.b_error : EIO;
1902 }
1903
1904 /* XXX methinks this could be wrong... */
1905 #if 1
1906 bp->b_resid = raidbp->rf_buf.b_resid;
1907 #endif
1908
1909 if (req->tracerec) {
1910 RF_ETIMER_STOP(req->tracerec->timer);
1911 RF_ETIMER_EVAL(req->tracerec->timer);
1912 RF_LOCK_MUTEX(rf_tracing_mutex);
1913 req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1914 req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1915 req->tracerec->num_phys_ios++;
1916 RF_UNLOCK_MUTEX(rf_tracing_mutex);
1917 }
1918 bp->b_bcount = raidbp->rf_buf.b_bcount; /* XXXX ?? */
1919
1920 unit = queue->raidPtr->raidid; /* *Much* simpler :-> */
1921
1922
1923 /* XXX Ok, let's get aggressive... If B_ERROR is set, let's go
1924 * ballistic, and mark the component as hosed... */
1925
1926 if (bp->b_flags & B_ERROR) {
1927 /* Mark the disk as dead */
1928 /* but only mark it once... */
1929 if (queue->raidPtr->Disks[queue->row][queue->col].status ==
1930 rf_ds_optimal) {
1931 printf("raid%d: IO Error. Marking %s as failed.\n",
1932 unit, queue->raidPtr->Disks[queue->row][queue->col].devname);
1933 queue->raidPtr->Disks[queue->row][queue->col].status =
1934 rf_ds_failed;
1935 queue->raidPtr->status[queue->row] = rf_rs_degraded;
1936 queue->raidPtr->numFailures++;
1937 queue->raidPtr->numNewFailures++;
1938 } else { /* Disk is already dead... */
1939 /* printf("Disk already marked as dead!\n"); */
1940 }
1941
1942 }
1943
1944 rs = &raid_softc[unit];
1945 RAIDPUTBUF(rs, raidbp);
1946
1947 rf_DiskIOComplete(queue, req, (bp->b_flags & B_ERROR) ? 1 : 0);
1948 (req->CompleteFunc) (req->argument, (bp->b_flags & B_ERROR) ? 1 : 0);
1949
1950 splx(s);
1951 }
1952
1953
1954
1955 /*
1956 * initialize a buf structure for doing an I/O in the kernel.
1957 */
1958 static void
1959 InitBP(bp, b_vp, rw_flag, dev, startSect, numSect, buf, cbFunc, cbArg,
1960 logBytesPerSector, b_proc)
1961 struct buf *bp;
1962 struct vnode *b_vp;
1963 unsigned rw_flag;
1964 dev_t dev;
1965 RF_SectorNum_t startSect;
1966 RF_SectorCount_t numSect;
1967 caddr_t buf;
1968 void (*cbFunc) (struct buf *);
1969 void *cbArg;
1970 int logBytesPerSector;
1971 struct proc *b_proc;
1972 {
1973 /* bp->b_flags = B_PHYS | rw_flag; */
1974 bp->b_flags = B_CALL | rw_flag; /* XXX need B_PHYS here too??? */
1975 bp->b_bcount = numSect << logBytesPerSector;
1976 bp->b_bufsize = bp->b_bcount;
1977 bp->b_error = 0;
1978 bp->b_dev = dev;
1979 bp->b_data = buf;
1980 bp->b_blkno = startSect;
1981 bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */
1982 if (bp->b_bcount == 0) {
1983 panic("bp->b_bcount is zero in InitBP!!\n");
1984 }
1985 bp->b_proc = b_proc;
1986 bp->b_iodone = cbFunc;
1987 bp->b_vp = b_vp;
1988
1989 }
1990
1991 static void
1992 raidgetdefaultlabel(raidPtr, rs, lp)
1993 RF_Raid_t *raidPtr;
1994 struct raid_softc *rs;
1995 struct disklabel *lp;
1996 {
1997 db1_printf(("Building a default label...\n"));
1998 memset(lp, 0, sizeof(*lp));
1999
2000 /* fabricate a label... */
2001 lp->d_secperunit = raidPtr->totalSectors;
2002 lp->d_secsize = raidPtr->bytesPerSector;
2003 lp->d_nsectors = raidPtr->Layout.dataSectorsPerStripe;
2004 lp->d_ntracks = 4 * raidPtr->numCol;
2005 lp->d_ncylinders = raidPtr->totalSectors /
2006 (lp->d_nsectors * lp->d_ntracks);
2007 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
2008
2009 strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
2010 lp->d_type = DTYPE_RAID;
2011 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
2012 lp->d_rpm = 3600;
2013 lp->d_interleave = 1;
2014 lp->d_flags = 0;
2015
2016 lp->d_partitions[RAW_PART].p_offset = 0;
2017 lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors;
2018 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
2019 lp->d_npartitions = RAW_PART + 1;
2020
2021 lp->d_magic = DISKMAGIC;
2022 lp->d_magic2 = DISKMAGIC;
2023 lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
2024
2025 }
2026 /*
2027 * Read the disklabel from the raid device. If one is not present, fake one
2028 * up.
2029 */
2030 static void
2031 raidgetdisklabel(dev)
2032 dev_t dev;
2033 {
2034 int unit = raidunit(dev);
2035 struct raid_softc *rs = &raid_softc[unit];
2036 char *errstring;
2037 struct disklabel *lp = rs->sc_dkdev.dk_label;
2038 struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel;
2039 RF_Raid_t *raidPtr;
2040
2041 db1_printf(("Getting the disklabel...\n"));
2042
2043 memset(clp, 0, sizeof(*clp));
2044
2045 raidPtr = raidPtrs[unit];
2046
2047 raidgetdefaultlabel(raidPtr, rs, lp);
2048
2049 /*
2050 * Call the generic disklabel extraction routine.
2051 */
2052 errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
2053 rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
2054 if (errstring)
2055 raidmakedisklabel(rs);
2056 else {
2057 int i;
2058 struct partition *pp;
2059
2060 /*
2061 * Sanity check whether the found disklabel is valid.
2062 *
2063 * This is necessary since total size of the raid device
2064 * may vary when an interleave is changed even though exactly
2065 * same componets are used, and old disklabel may used
2066 * if that is found.
2067 */
2068 if (lp->d_secperunit != rs->sc_size)
2069 printf("WARNING: %s: "
2070 "total sector size in disklabel (%d) != "
2071 "the size of raid (%ld)\n", rs->sc_xname,
2072 lp->d_secperunit, (long) rs->sc_size);
2073 for (i = 0; i < lp->d_npartitions; i++) {
2074 pp = &lp->d_partitions[i];
2075 if (pp->p_offset + pp->p_size > rs->sc_size)
2076 printf("WARNING: %s: end of partition `%c' "
2077 "exceeds the size of raid (%ld)\n",
2078 rs->sc_xname, 'a' + i, (long) rs->sc_size);
2079 }
2080 }
2081
2082 }
2083 /*
2084 * Take care of things one might want to take care of in the event
2085 * that a disklabel isn't present.
2086 */
2087 static void
2088 raidmakedisklabel(rs)
2089 struct raid_softc *rs;
2090 {
2091 struct disklabel *lp = rs->sc_dkdev.dk_label;
2092 db1_printf(("Making a label..\n"));
2093
2094 /*
2095 * For historical reasons, if there's no disklabel present
2096 * the raw partition must be marked FS_BSDFFS.
2097 */
2098
2099 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
2100
2101 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
2102
2103 lp->d_checksum = dkcksum(lp);
2104 }
2105 /*
2106 * Lookup the provided name in the filesystem. If the file exists,
2107 * is a valid block device, and isn't being used by anyone else,
2108 * set *vpp to the file's vnode.
2109 * You'll find the original of this in ccd.c
2110 */
2111 int
2112 raidlookup(path, p, vpp)
2113 char *path;
2114 struct proc *p;
2115 struct vnode **vpp; /* result */
2116 {
2117 struct nameidata nd;
2118 struct vnode *vp;
2119 struct vattr va;
2120 int error;
2121
2122 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, path, p);
2123 if ((error = vn_open(&nd, FREAD | FWRITE, 0)) != 0) {
2124 #ifdef DEBUG
2125 printf("RAIDframe: vn_open returned %d\n", error);
2126 #endif
2127 return (error);
2128 }
2129 vp = nd.ni_vp;
2130 if (vp->v_usecount > 1) {
2131 VOP_UNLOCK(vp, 0);
2132 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2133 return (EBUSY);
2134 }
2135 if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)) != 0) {
2136 VOP_UNLOCK(vp, 0);
2137 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2138 return (error);
2139 }
2140 /* XXX: eventually we should handle VREG, too. */
2141 if (va.va_type != VBLK) {
2142 VOP_UNLOCK(vp, 0);
2143 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2144 return (ENOTBLK);
2145 }
2146 VOP_UNLOCK(vp, 0);
2147 *vpp = vp;
2148 return (0);
2149 }
2150 /*
2151 * Wait interruptibly for an exclusive lock.
2152 *
2153 * XXX
2154 * Several drivers do this; it should be abstracted and made MP-safe.
2155 * (Hmm... where have we seen this warning before :-> GO )
2156 */
2157 static int
2158 raidlock(rs)
2159 struct raid_softc *rs;
2160 {
2161 int error;
2162
2163 while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
2164 rs->sc_flags |= RAIDF_WANTED;
2165 if ((error =
2166 tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
2167 return (error);
2168 }
2169 rs->sc_flags |= RAIDF_LOCKED;
2170 return (0);
2171 }
2172 /*
2173 * Unlock and wake up any waiters.
2174 */
2175 static void
2176 raidunlock(rs)
2177 struct raid_softc *rs;
2178 {
2179
2180 rs->sc_flags &= ~RAIDF_LOCKED;
2181 if ((rs->sc_flags & RAIDF_WANTED) != 0) {
2182 rs->sc_flags &= ~RAIDF_WANTED;
2183 wakeup(rs);
2184 }
2185 }
2186
2187
2188 #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */
2189 #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */
2190
2191 int
2192 raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter)
2193 {
2194 RF_ComponentLabel_t clabel;
2195 raidread_component_label(dev, b_vp, &clabel);
2196 clabel.mod_counter = mod_counter;
2197 clabel.clean = RF_RAID_CLEAN;
2198 raidwrite_component_label(dev, b_vp, &clabel);
2199 return(0);
2200 }
2201
2202
2203 int
2204 raidmarkdirty(dev_t dev, struct vnode *b_vp, int mod_counter)
2205 {
2206 RF_ComponentLabel_t clabel;
2207 raidread_component_label(dev, b_vp, &clabel);
2208 clabel.mod_counter = mod_counter;
2209 clabel.clean = RF_RAID_DIRTY;
2210 raidwrite_component_label(dev, b_vp, &clabel);
2211 return(0);
2212 }
2213
2214 /* ARGSUSED */
2215 int
2216 raidread_component_label(dev, b_vp, clabel)
2217 dev_t dev;
2218 struct vnode *b_vp;
2219 RF_ComponentLabel_t *clabel;
2220 {
2221 struct buf *bp;
2222 int error;
2223
2224 /* XXX should probably ensure that we don't try to do this if
2225 someone has changed rf_protected_sectors. */
2226
2227 if (b_vp == NULL) {
2228 /* For whatever reason, this component is not valid.
2229 Don't try to read a component label from it. */
2230 return(EINVAL);
2231 }
2232
2233 /* get a block of the appropriate size... */
2234 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2235 bp->b_dev = dev;
2236
2237 /* get our ducks in a row for the read */
2238 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2239 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2240 bp->b_flags |= B_READ;
2241 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2242
2243 (*bdevsw[major(bp->b_dev)].d_strategy)(bp);
2244
2245 error = biowait(bp);
2246
2247 if (!error) {
2248 memcpy(clabel, bp->b_data,
2249 sizeof(RF_ComponentLabel_t));
2250 #if 0
2251 rf_print_component_label( clabel );
2252 #endif
2253 } else {
2254 #if 0
2255 printf("Failed to read RAID component label!\n");
2256 #endif
2257 }
2258
2259 brelse(bp);
2260 return(error);
2261 }
2262 /* ARGSUSED */
2263 int
2264 raidwrite_component_label(dev, b_vp, clabel)
2265 dev_t dev;
2266 struct vnode *b_vp;
2267 RF_ComponentLabel_t *clabel;
2268 {
2269 struct buf *bp;
2270 int error;
2271
2272 /* get a block of the appropriate size... */
2273 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2274 bp->b_dev = dev;
2275
2276 /* get our ducks in a row for the write */
2277 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2278 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2279 bp->b_flags |= B_WRITE;
2280 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2281
2282 memset(bp->b_data, 0, RF_COMPONENT_INFO_SIZE );
2283
2284 memcpy(bp->b_data, clabel, sizeof(RF_ComponentLabel_t));
2285
2286 (*bdevsw[major(bp->b_dev)].d_strategy)(bp);
2287 error = biowait(bp);
2288 brelse(bp);
2289 if (error) {
2290 #if 1
2291 printf("Failed to write RAID component info!\n");
2292 #endif
2293 }
2294
2295 return(error);
2296 }
2297
2298 void
2299 rf_markalldirty(raidPtr)
2300 RF_Raid_t *raidPtr;
2301 {
2302 RF_ComponentLabel_t clabel;
2303 int r,c;
2304
2305 raidPtr->mod_counter++;
2306 for (r = 0; r < raidPtr->numRow; r++) {
2307 for (c = 0; c < raidPtr->numCol; c++) {
2308 /* we don't want to touch (at all) a disk that has
2309 failed */
2310 if (!RF_DEAD_DISK(raidPtr->Disks[r][c].status)) {
2311 raidread_component_label(
2312 raidPtr->Disks[r][c].dev,
2313 raidPtr->raid_cinfo[r][c].ci_vp,
2314 &clabel);
2315 if (clabel.status == rf_ds_spared) {
2316 /* XXX do something special...
2317 but whatever you do, don't
2318 try to access it!! */
2319 } else {
2320 #if 0
2321 clabel.status =
2322 raidPtr->Disks[r][c].status;
2323 raidwrite_component_label(
2324 raidPtr->Disks[r][c].dev,
2325 raidPtr->raid_cinfo[r][c].ci_vp,
2326 &clabel);
2327 #endif
2328 raidmarkdirty(
2329 raidPtr->Disks[r][c].dev,
2330 raidPtr->raid_cinfo[r][c].ci_vp,
2331 raidPtr->mod_counter);
2332 }
2333 }
2334 }
2335 }
2336 /* printf("Component labels marked dirty.\n"); */
2337 #if 0
2338 for( c = 0; c < raidPtr->numSpare ; c++) {
2339 sparecol = raidPtr->numCol + c;
2340 if (raidPtr->Disks[r][sparecol].status == rf_ds_used_spare) {
2341 /*
2342
2343 XXX this is where we get fancy and map this spare
2344 into it's correct spot in the array.
2345
2346 */
2347 /*
2348
2349 we claim this disk is "optimal" if it's
2350 rf_ds_used_spare, as that means it should be
2351 directly substitutable for the disk it replaced.
2352 We note that too...
2353
2354 */
2355
2356 for(i=0;i<raidPtr->numRow;i++) {
2357 for(j=0;j<raidPtr->numCol;j++) {
2358 if ((raidPtr->Disks[i][j].spareRow ==
2359 r) &&
2360 (raidPtr->Disks[i][j].spareCol ==
2361 sparecol)) {
2362 srow = r;
2363 scol = sparecol;
2364 break;
2365 }
2366 }
2367 }
2368
2369 raidread_component_label(
2370 raidPtr->Disks[r][sparecol].dev,
2371 raidPtr->raid_cinfo[r][sparecol].ci_vp,
2372 &clabel);
2373 /* make sure status is noted */
2374 clabel.version = RF_COMPONENT_LABEL_VERSION;
2375 clabel.mod_counter = raidPtr->mod_counter;
2376 clabel.serial_number = raidPtr->serial_number;
2377 clabel.row = srow;
2378 clabel.column = scol;
2379 clabel.num_rows = raidPtr->numRow;
2380 clabel.num_columns = raidPtr->numCol;
2381 clabel.clean = RF_RAID_DIRTY; /* changed in a bit*/
2382 clabel.status = rf_ds_optimal;
2383 raidwrite_component_label(
2384 raidPtr->Disks[r][sparecol].dev,
2385 raidPtr->raid_cinfo[r][sparecol].ci_vp,
2386 &clabel);
2387 raidmarkclean( raidPtr->Disks[r][sparecol].dev,
2388 raidPtr->raid_cinfo[r][sparecol].ci_vp);
2389 }
2390 }
2391
2392 #endif
2393 }
2394
2395
2396 void
2397 rf_update_component_labels(raidPtr, final)
2398 RF_Raid_t *raidPtr;
2399 int final;
2400 {
2401 RF_ComponentLabel_t clabel;
2402 int sparecol;
2403 int r,c;
2404 int i,j;
2405 int srow, scol;
2406
2407 srow = -1;
2408 scol = -1;
2409
2410 /* XXX should do extra checks to make sure things really are clean,
2411 rather than blindly setting the clean bit... */
2412
2413 raidPtr->mod_counter++;
2414
2415 for (r = 0; r < raidPtr->numRow; r++) {
2416 for (c = 0; c < raidPtr->numCol; c++) {
2417 if (raidPtr->Disks[r][c].status == rf_ds_optimal) {
2418 raidread_component_label(
2419 raidPtr->Disks[r][c].dev,
2420 raidPtr->raid_cinfo[r][c].ci_vp,
2421 &clabel);
2422 /* make sure status is noted */
2423 clabel.status = rf_ds_optimal;
2424 /* bump the counter */
2425 clabel.mod_counter = raidPtr->mod_counter;
2426
2427 raidwrite_component_label(
2428 raidPtr->Disks[r][c].dev,
2429 raidPtr->raid_cinfo[r][c].ci_vp,
2430 &clabel);
2431 if (final == RF_FINAL_COMPONENT_UPDATE) {
2432 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2433 raidmarkclean(
2434 raidPtr->Disks[r][c].dev,
2435 raidPtr->raid_cinfo[r][c].ci_vp,
2436 raidPtr->mod_counter);
2437 }
2438 }
2439 }
2440 /* else we don't touch it.. */
2441 }
2442 }
2443
2444 for( c = 0; c < raidPtr->numSpare ; c++) {
2445 sparecol = raidPtr->numCol + c;
2446 /* Need to ensure that the reconstruct actually completed! */
2447 if (raidPtr->Disks[0][sparecol].status == rf_ds_used_spare) {
2448 /*
2449
2450 we claim this disk is "optimal" if it's
2451 rf_ds_used_spare, as that means it should be
2452 directly substitutable for the disk it replaced.
2453 We note that too...
2454
2455 */
2456
2457 for(i=0;i<raidPtr->numRow;i++) {
2458 for(j=0;j<raidPtr->numCol;j++) {
2459 if ((raidPtr->Disks[i][j].spareRow ==
2460 0) &&
2461 (raidPtr->Disks[i][j].spareCol ==
2462 sparecol)) {
2463 srow = i;
2464 scol = j;
2465 break;
2466 }
2467 }
2468 }
2469
2470 /* XXX shouldn't *really* need this... */
2471 raidread_component_label(
2472 raidPtr->Disks[0][sparecol].dev,
2473 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2474 &clabel);
2475 /* make sure status is noted */
2476
2477 raid_init_component_label(raidPtr, &clabel);
2478
2479 clabel.mod_counter = raidPtr->mod_counter;
2480 clabel.row = srow;
2481 clabel.column = scol;
2482 clabel.status = rf_ds_optimal;
2483
2484 raidwrite_component_label(
2485 raidPtr->Disks[0][sparecol].dev,
2486 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2487 &clabel);
2488 if (final == RF_FINAL_COMPONENT_UPDATE) {
2489 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2490 raidmarkclean( raidPtr->Disks[0][sparecol].dev,
2491 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2492 raidPtr->mod_counter);
2493 }
2494 }
2495 }
2496 }
2497 /* printf("Component labels updated\n"); */
2498 }
2499
2500 void
2501 rf_close_component(raidPtr, vp, auto_configured)
2502 RF_Raid_t *raidPtr;
2503 struct vnode *vp;
2504 int auto_configured;
2505 {
2506 struct proc *p;
2507
2508 p = raidPtr->engine_thread;
2509
2510 if (vp != NULL) {
2511 if (auto_configured == 1) {
2512 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2513 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
2514 vput(vp);
2515
2516 } else {
2517 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2518 }
2519 } else {
2520 printf("vnode was NULL\n");
2521 }
2522 }
2523
2524
2525 void
2526 rf_UnconfigureVnodes(raidPtr)
2527 RF_Raid_t *raidPtr;
2528 {
2529 int r,c;
2530 struct proc *p;
2531 struct vnode *vp;
2532 int acd;
2533
2534
2535 /* We take this opportunity to close the vnodes like we should.. */
2536
2537 p = raidPtr->engine_thread;
2538
2539 for (r = 0; r < raidPtr->numRow; r++) {
2540 for (c = 0; c < raidPtr->numCol; c++) {
2541 printf("Closing vnode for row: %d col: %d\n", r, c);
2542 vp = raidPtr->raid_cinfo[r][c].ci_vp;
2543 acd = raidPtr->Disks[r][c].auto_configured;
2544 rf_close_component(raidPtr, vp, acd);
2545 raidPtr->raid_cinfo[r][c].ci_vp = NULL;
2546 raidPtr->Disks[r][c].auto_configured = 0;
2547 }
2548 }
2549 for (r = 0; r < raidPtr->numSpare; r++) {
2550 printf("Closing vnode for spare: %d\n", r);
2551 vp = raidPtr->raid_cinfo[0][raidPtr->numCol + r].ci_vp;
2552 acd = raidPtr->Disks[0][raidPtr->numCol + r].auto_configured;
2553 rf_close_component(raidPtr, vp, acd);
2554 raidPtr->raid_cinfo[0][raidPtr->numCol + r].ci_vp = NULL;
2555 raidPtr->Disks[0][raidPtr->numCol + r].auto_configured = 0;
2556 }
2557 }
2558
2559
2560 void
2561 rf_ReconThread(req)
2562 struct rf_recon_req *req;
2563 {
2564 int s;
2565 RF_Raid_t *raidPtr;
2566
2567 s = splbio();
2568 raidPtr = (RF_Raid_t *) req->raidPtr;
2569 raidPtr->recon_in_progress = 1;
2570
2571 rf_FailDisk((RF_Raid_t *) req->raidPtr, req->row, req->col,
2572 ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
2573
2574 /* XXX get rid of this! we don't need it at all.. */
2575 RF_Free(req, sizeof(*req));
2576
2577 raidPtr->recon_in_progress = 0;
2578 splx(s);
2579
2580 /* That's all... */
2581 kthread_exit(0); /* does not return */
2582 }
2583
2584 void
2585 rf_RewriteParityThread(raidPtr)
2586 RF_Raid_t *raidPtr;
2587 {
2588 int retcode;
2589 int s;
2590
2591 raidPtr->parity_rewrite_in_progress = 1;
2592 s = splbio();
2593 retcode = rf_RewriteParity(raidPtr);
2594 splx(s);
2595 if (retcode) {
2596 printf("raid%d: Error re-writing parity!\n",raidPtr->raidid);
2597 } else {
2598 /* set the clean bit! If we shutdown correctly,
2599 the clean bit on each component label will get
2600 set */
2601 raidPtr->parity_good = RF_RAID_CLEAN;
2602 }
2603 raidPtr->parity_rewrite_in_progress = 0;
2604
2605 /* Anyone waiting for us to stop? If so, inform them... */
2606 if (raidPtr->waitShutdown) {
2607 wakeup(&raidPtr->parity_rewrite_in_progress);
2608 }
2609
2610 /* That's all... */
2611 kthread_exit(0); /* does not return */
2612 }
2613
2614
2615 void
2616 rf_CopybackThread(raidPtr)
2617 RF_Raid_t *raidPtr;
2618 {
2619 int s;
2620
2621 raidPtr->copyback_in_progress = 1;
2622 s = splbio();
2623 rf_CopybackReconstructedData(raidPtr);
2624 splx(s);
2625 raidPtr->copyback_in_progress = 0;
2626
2627 /* That's all... */
2628 kthread_exit(0); /* does not return */
2629 }
2630
2631
2632 void
2633 rf_ReconstructInPlaceThread(req)
2634 struct rf_recon_req *req;
2635 {
2636 int retcode;
2637 int s;
2638 RF_Raid_t *raidPtr;
2639
2640 s = splbio();
2641 raidPtr = req->raidPtr;
2642 raidPtr->recon_in_progress = 1;
2643 retcode = rf_ReconstructInPlace(raidPtr, req->row, req->col);
2644 RF_Free(req, sizeof(*req));
2645 raidPtr->recon_in_progress = 0;
2646 splx(s);
2647
2648 /* That's all... */
2649 kthread_exit(0); /* does not return */
2650 }
2651
2652 void
2653 rf_mountroot_hook(dev)
2654 struct device *dev;
2655 {
2656
2657 }
2658
2659
2660 RF_AutoConfig_t *
2661 rf_find_raid_components()
2662 {
2663 struct devnametobdevmaj *dtobdm;
2664 struct vnode *vp;
2665 struct disklabel label;
2666 struct device *dv;
2667 char *cd_name;
2668 dev_t dev;
2669 int error;
2670 int i;
2671 int good_one;
2672 RF_ComponentLabel_t *clabel;
2673 RF_AutoConfig_t *ac_list;
2674 RF_AutoConfig_t *ac;
2675
2676
2677 /* initialize the AutoConfig list */
2678 ac_list = NULL;
2679
2680 /* we begin by trolling through *all* the devices on the system */
2681
2682 for (dv = alldevs.tqh_first; dv != NULL;
2683 dv = dv->dv_list.tqe_next) {
2684
2685 /* we are only interested in disks... */
2686 if (dv->dv_class != DV_DISK)
2687 continue;
2688
2689 /* we don't care about floppies... */
2690 if (!strcmp(dv->dv_cfdata->cf_driver->cd_name,"fd")) {
2691 continue;
2692 }
2693
2694 /* need to find the device_name_to_block_device_major stuff */
2695 cd_name = dv->dv_cfdata->cf_driver->cd_name;
2696 dtobdm = dev_name2blk;
2697 while (dtobdm->d_name && strcmp(dtobdm->d_name, cd_name)) {
2698 dtobdm++;
2699 }
2700
2701 /* get a vnode for the raw partition of this disk */
2702
2703 dev = MAKEDISKDEV(dtobdm->d_maj, dv->dv_unit, RAW_PART);
2704 if (bdevvp(dev, &vp))
2705 panic("RAID can't alloc vnode");
2706
2707 error = VOP_OPEN(vp, FREAD, NOCRED, 0);
2708
2709 if (error) {
2710 /* "Who cares." Continue looking
2711 for something that exists*/
2712 vput(vp);
2713 continue;
2714 }
2715
2716 /* Ok, the disk exists. Go get the disklabel. */
2717 error = VOP_IOCTL(vp, DIOCGDINFO, (caddr_t)&label,
2718 FREAD, NOCRED, 0);
2719 if (error) {
2720 /*
2721 * XXX can't happen - open() would
2722 * have errored out (or faked up one)
2723 */
2724 printf("can't get label for dev %s%c (%d)!?!?\n",
2725 dv->dv_xname, 'a' + RAW_PART, error);
2726 }
2727
2728 /* don't need this any more. We'll allocate it again
2729 a little later if we really do... */
2730 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2731 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
2732 vput(vp);
2733
2734 for (i=0; i < label.d_npartitions; i++) {
2735 /* We only support partitions marked as RAID */
2736 if (label.d_partitions[i].p_fstype != FS_RAID)
2737 continue;
2738
2739 dev = MAKEDISKDEV(dtobdm->d_maj, dv->dv_unit, i);
2740 if (bdevvp(dev, &vp))
2741 panic("RAID can't alloc vnode");
2742
2743 error = VOP_OPEN(vp, FREAD, NOCRED, 0);
2744 if (error) {
2745 /* Whatever... */
2746 vput(vp);
2747 continue;
2748 }
2749
2750 good_one = 0;
2751
2752 clabel = (RF_ComponentLabel_t *)
2753 malloc(sizeof(RF_ComponentLabel_t),
2754 M_RAIDFRAME, M_NOWAIT);
2755 if (clabel == NULL) {
2756 /* XXX CLEANUP HERE */
2757 printf("RAID auto config: out of memory!\n");
2758 return(NULL); /* XXX probably should panic? */
2759 }
2760
2761 if (!raidread_component_label(dev, vp, clabel)) {
2762 /* Got the label. Does it look reasonable? */
2763 if (rf_reasonable_label(clabel) &&
2764 (clabel->partitionSize <=
2765 label.d_partitions[i].p_size)) {
2766 #if DEBUG
2767 printf("Component on: %s%c: %d\n",
2768 dv->dv_xname, 'a'+i,
2769 label.d_partitions[i].p_size);
2770 rf_print_component_label(clabel);
2771 #endif
2772 /* if it's reasonable, add it,
2773 else ignore it. */
2774 ac = (RF_AutoConfig_t *)
2775 malloc(sizeof(RF_AutoConfig_t),
2776 M_RAIDFRAME,
2777 M_NOWAIT);
2778 if (ac == NULL) {
2779 /* XXX should panic?? */
2780 return(NULL);
2781 }
2782
2783 sprintf(ac->devname, "%s%c",
2784 dv->dv_xname, 'a'+i);
2785 ac->dev = dev;
2786 ac->vp = vp;
2787 ac->clabel = clabel;
2788 ac->next = ac_list;
2789 ac_list = ac;
2790 good_one = 1;
2791 }
2792 }
2793 if (!good_one) {
2794 /* cleanup */
2795 free(clabel, M_RAIDFRAME);
2796 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2797 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
2798 vput(vp);
2799 }
2800 }
2801 }
2802 return(ac_list);
2803 }
2804
2805 static int
2806 rf_reasonable_label(clabel)
2807 RF_ComponentLabel_t *clabel;
2808 {
2809
2810 if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) ||
2811 (clabel->version==RF_COMPONENT_LABEL_VERSION)) &&
2812 ((clabel->clean == RF_RAID_CLEAN) ||
2813 (clabel->clean == RF_RAID_DIRTY)) &&
2814 clabel->row >=0 &&
2815 clabel->column >= 0 &&
2816 clabel->num_rows > 0 &&
2817 clabel->num_columns > 0 &&
2818 clabel->row < clabel->num_rows &&
2819 clabel->column < clabel->num_columns &&
2820 clabel->blockSize > 0 &&
2821 clabel->numBlocks > 0) {
2822 /* label looks reasonable enough... */
2823 return(1);
2824 }
2825 return(0);
2826 }
2827
2828
2829 void
2830 rf_print_component_label(clabel)
2831 RF_ComponentLabel_t *clabel;
2832 {
2833 printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
2834 clabel->row, clabel->column,
2835 clabel->num_rows, clabel->num_columns);
2836 printf(" Version: %d Serial Number: %d Mod Counter: %d\n",
2837 clabel->version, clabel->serial_number,
2838 clabel->mod_counter);
2839 printf(" Clean: %s Status: %d\n",
2840 clabel->clean ? "Yes" : "No", clabel->status );
2841 printf(" sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
2842 clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
2843 printf(" RAID Level: %c blocksize: %d numBlocks: %d\n",
2844 (char) clabel->parityConfig, clabel->blockSize,
2845 clabel->numBlocks);
2846 printf(" Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No" );
2847 printf(" Contains root partition: %s\n",
2848 clabel->root_partition ? "Yes" : "No" );
2849 printf(" Last configured as: raid%d\n", clabel->last_unit );
2850 #if 0
2851 printf(" Config order: %d\n", clabel->config_order);
2852 #endif
2853
2854 }
2855
2856 RF_ConfigSet_t *
2857 rf_create_auto_sets(ac_list)
2858 RF_AutoConfig_t *ac_list;
2859 {
2860 RF_AutoConfig_t *ac;
2861 RF_ConfigSet_t *config_sets;
2862 RF_ConfigSet_t *cset;
2863 RF_AutoConfig_t *ac_next;
2864
2865
2866 config_sets = NULL;
2867
2868 /* Go through the AutoConfig list, and figure out which components
2869 belong to what sets. */
2870 ac = ac_list;
2871 while(ac!=NULL) {
2872 /* we're going to putz with ac->next, so save it here
2873 for use at the end of the loop */
2874 ac_next = ac->next;
2875
2876 if (config_sets == NULL) {
2877 /* will need at least this one... */
2878 config_sets = (RF_ConfigSet_t *)
2879 malloc(sizeof(RF_ConfigSet_t),
2880 M_RAIDFRAME, M_NOWAIT);
2881 if (config_sets == NULL) {
2882 panic("rf_create_auto_sets: No memory!\n");
2883 }
2884 /* this one is easy :) */
2885 config_sets->ac = ac;
2886 config_sets->next = NULL;
2887 config_sets->rootable = 0;
2888 ac->next = NULL;
2889 } else {
2890 /* which set does this component fit into? */
2891 cset = config_sets;
2892 while(cset!=NULL) {
2893 if (rf_does_it_fit(cset, ac)) {
2894 /* looks like it matches... */
2895 ac->next = cset->ac;
2896 cset->ac = ac;
2897 break;
2898 }
2899 cset = cset->next;
2900 }
2901 if (cset==NULL) {
2902 /* didn't find a match above... new set..*/
2903 cset = (RF_ConfigSet_t *)
2904 malloc(sizeof(RF_ConfigSet_t),
2905 M_RAIDFRAME, M_NOWAIT);
2906 if (cset == NULL) {
2907 panic("rf_create_auto_sets: No memory!\n");
2908 }
2909 cset->ac = ac;
2910 ac->next = NULL;
2911 cset->next = config_sets;
2912 cset->rootable = 0;
2913 config_sets = cset;
2914 }
2915 }
2916 ac = ac_next;
2917 }
2918
2919
2920 return(config_sets);
2921 }
2922
2923 static int
2924 rf_does_it_fit(cset, ac)
2925 RF_ConfigSet_t *cset;
2926 RF_AutoConfig_t *ac;
2927 {
2928 RF_ComponentLabel_t *clabel1, *clabel2;
2929
2930 /* If this one matches the *first* one in the set, that's good
2931 enough, since the other members of the set would have been
2932 through here too... */
2933 /* note that we are not checking partitionSize here..
2934
2935 Note that we are also not checking the mod_counters here.
2936 If everything else matches execpt the mod_counter, that's
2937 good enough for this test. We will deal with the mod_counters
2938 a little later in the autoconfiguration process.
2939
2940 (clabel1->mod_counter == clabel2->mod_counter) &&
2941
2942 The reason we don't check for this is that failed disks
2943 will have lower modification counts. If those disks are
2944 not added to the set they used to belong to, then they will
2945 form their own set, which may result in 2 different sets,
2946 for example, competing to be configured at raid0, and
2947 perhaps competing to be the root filesystem set. If the
2948 wrong ones get configured, or both attempt to become /,
2949 weird behaviour and or serious lossage will occur. Thus we
2950 need to bring them into the fold here, and kick them out at
2951 a later point.
2952
2953 */
2954
2955 clabel1 = cset->ac->clabel;
2956 clabel2 = ac->clabel;
2957 if ((clabel1->version == clabel2->version) &&
2958 (clabel1->serial_number == clabel2->serial_number) &&
2959 (clabel1->num_rows == clabel2->num_rows) &&
2960 (clabel1->num_columns == clabel2->num_columns) &&
2961 (clabel1->sectPerSU == clabel2->sectPerSU) &&
2962 (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
2963 (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
2964 (clabel1->parityConfig == clabel2->parityConfig) &&
2965 (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
2966 (clabel1->blockSize == clabel2->blockSize) &&
2967 (clabel1->numBlocks == clabel2->numBlocks) &&
2968 (clabel1->autoconfigure == clabel2->autoconfigure) &&
2969 (clabel1->root_partition == clabel2->root_partition) &&
2970 (clabel1->last_unit == clabel2->last_unit) &&
2971 (clabel1->config_order == clabel2->config_order)) {
2972 /* if it get's here, it almost *has* to be a match */
2973 } else {
2974 /* it's not consistent with somebody in the set..
2975 punt */
2976 return(0);
2977 }
2978 /* all was fine.. it must fit... */
2979 return(1);
2980 }
2981
2982 int
2983 rf_have_enough_components(cset)
2984 RF_ConfigSet_t *cset;
2985 {
2986 RF_AutoConfig_t *ac;
2987 RF_AutoConfig_t *auto_config;
2988 RF_ComponentLabel_t *clabel;
2989 int r,c;
2990 int num_rows;
2991 int num_cols;
2992 int num_missing;
2993 int mod_counter;
2994 int mod_counter_found;
2995 int even_pair_failed;
2996 char parity_type;
2997
2998
2999 /* check to see that we have enough 'live' components
3000 of this set. If so, we can configure it if necessary */
3001
3002 num_rows = cset->ac->clabel->num_rows;
3003 num_cols = cset->ac->clabel->num_columns;
3004 parity_type = cset->ac->clabel->parityConfig;
3005
3006 /* XXX Check for duplicate components!?!?!? */
3007
3008 /* Determine what the mod_counter is supposed to be for this set. */
3009
3010 mod_counter_found = 0;
3011 mod_counter = 0;
3012 ac = cset->ac;
3013 while(ac!=NULL) {
3014 if (mod_counter_found==0) {
3015 mod_counter = ac->clabel->mod_counter;
3016 mod_counter_found = 1;
3017 } else {
3018 if (ac->clabel->mod_counter > mod_counter) {
3019 mod_counter = ac->clabel->mod_counter;
3020 }
3021 }
3022 ac = ac->next;
3023 }
3024
3025 num_missing = 0;
3026 auto_config = cset->ac;
3027
3028 for(r=0; r<num_rows; r++) {
3029 even_pair_failed = 0;
3030 for(c=0; c<num_cols; c++) {
3031 ac = auto_config;
3032 while(ac!=NULL) {
3033 if ((ac->clabel->row == r) &&
3034 (ac->clabel->column == c) &&
3035 (ac->clabel->mod_counter == mod_counter)) {
3036 /* it's this one... */
3037 #if DEBUG
3038 printf("Found: %s at %d,%d\n",
3039 ac->devname,r,c);
3040 #endif
3041 break;
3042 }
3043 ac=ac->next;
3044 }
3045 if (ac==NULL) {
3046 /* Didn't find one here! */
3047 /* special case for RAID 1, especially
3048 where there are more than 2
3049 components (where RAIDframe treats
3050 things a little differently :( ) */
3051 if (parity_type == '1') {
3052 if (c%2 == 0) { /* even component */
3053 even_pair_failed = 1;
3054 } else { /* odd component. If
3055 we're failed, and
3056 so is the even
3057 component, it's
3058 "Good Night, Charlie" */
3059 if (even_pair_failed == 1) {
3060 return(0);
3061 }
3062 }
3063 } else {
3064 /* normal accounting */
3065 num_missing++;
3066 }
3067 }
3068 if ((parity_type == '1') && (c%2 == 1)) {
3069 /* Just did an even component, and we didn't
3070 bail.. reset the even_pair_failed flag,
3071 and go on to the next component.... */
3072 even_pair_failed = 0;
3073 }
3074 }
3075 }
3076
3077 clabel = cset->ac->clabel;
3078
3079 if (((clabel->parityConfig == '0') && (num_missing > 0)) ||
3080 ((clabel->parityConfig == '4') && (num_missing > 1)) ||
3081 ((clabel->parityConfig == '5') && (num_missing > 1))) {
3082 /* XXX this needs to be made *much* more general */
3083 /* Too many failures */
3084 return(0);
3085 }
3086 /* otherwise, all is well, and we've got enough to take a kick
3087 at autoconfiguring this set */
3088 return(1);
3089 }
3090
3091 void
3092 rf_create_configuration(ac,config,raidPtr)
3093 RF_AutoConfig_t *ac;
3094 RF_Config_t *config;
3095 RF_Raid_t *raidPtr;
3096 {
3097 RF_ComponentLabel_t *clabel;
3098 int i;
3099
3100 clabel = ac->clabel;
3101
3102 /* 1. Fill in the common stuff */
3103 config->numRow = clabel->num_rows;
3104 config->numCol = clabel->num_columns;
3105 config->numSpare = 0; /* XXX should this be set here? */
3106 config->sectPerSU = clabel->sectPerSU;
3107 config->SUsPerPU = clabel->SUsPerPU;
3108 config->SUsPerRU = clabel->SUsPerRU;
3109 config->parityConfig = clabel->parityConfig;
3110 /* XXX... */
3111 strcpy(config->diskQueueType,"fifo");
3112 config->maxOutstandingDiskReqs = clabel->maxOutstanding;
3113 config->layoutSpecificSize = 0; /* XXX ?? */
3114
3115 while(ac!=NULL) {
3116 /* row/col values will be in range due to the checks
3117 in reasonable_label() */
3118 strcpy(config->devnames[ac->clabel->row][ac->clabel->column],
3119 ac->devname);
3120 ac = ac->next;
3121 }
3122
3123 for(i=0;i<RF_MAXDBGV;i++) {
3124 config->debugVars[i][0] = NULL;
3125 }
3126 }
3127
3128 int
3129 rf_set_autoconfig(raidPtr, new_value)
3130 RF_Raid_t *raidPtr;
3131 int new_value;
3132 {
3133 RF_ComponentLabel_t clabel;
3134 struct vnode *vp;
3135 dev_t dev;
3136 int row, column;
3137
3138 raidPtr->autoconfigure = new_value;
3139 for(row=0; row<raidPtr->numRow; row++) {
3140 for(column=0; column<raidPtr->numCol; column++) {
3141 if (raidPtr->Disks[row][column].status ==
3142 rf_ds_optimal) {
3143 dev = raidPtr->Disks[row][column].dev;
3144 vp = raidPtr->raid_cinfo[row][column].ci_vp;
3145 raidread_component_label(dev, vp, &clabel);
3146 clabel.autoconfigure = new_value;
3147 raidwrite_component_label(dev, vp, &clabel);
3148 }
3149 }
3150 }
3151 return(new_value);
3152 }
3153
3154 int
3155 rf_set_rootpartition(raidPtr, new_value)
3156 RF_Raid_t *raidPtr;
3157 int new_value;
3158 {
3159 RF_ComponentLabel_t clabel;
3160 struct vnode *vp;
3161 dev_t dev;
3162 int row, column;
3163
3164 raidPtr->root_partition = new_value;
3165 for(row=0; row<raidPtr->numRow; row++) {
3166 for(column=0; column<raidPtr->numCol; column++) {
3167 if (raidPtr->Disks[row][column].status ==
3168 rf_ds_optimal) {
3169 dev = raidPtr->Disks[row][column].dev;
3170 vp = raidPtr->raid_cinfo[row][column].ci_vp;
3171 raidread_component_label(dev, vp, &clabel);
3172 clabel.root_partition = new_value;
3173 raidwrite_component_label(dev, vp, &clabel);
3174 }
3175 }
3176 }
3177 return(new_value);
3178 }
3179
3180 void
3181 rf_release_all_vps(cset)
3182 RF_ConfigSet_t *cset;
3183 {
3184 RF_AutoConfig_t *ac;
3185
3186 ac = cset->ac;
3187 while(ac!=NULL) {
3188 /* Close the vp, and give it back */
3189 if (ac->vp) {
3190 vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY);
3191 VOP_CLOSE(ac->vp, FREAD, NOCRED, 0);
3192 vput(ac->vp);
3193 ac->vp = NULL;
3194 }
3195 ac = ac->next;
3196 }
3197 }
3198
3199
3200 void
3201 rf_cleanup_config_set(cset)
3202 RF_ConfigSet_t *cset;
3203 {
3204 RF_AutoConfig_t *ac;
3205 RF_AutoConfig_t *next_ac;
3206
3207 ac = cset->ac;
3208 while(ac!=NULL) {
3209 next_ac = ac->next;
3210 /* nuke the label */
3211 free(ac->clabel, M_RAIDFRAME);
3212 /* cleanup the config structure */
3213 free(ac, M_RAIDFRAME);
3214 /* "next.." */
3215 ac = next_ac;
3216 }
3217 /* and, finally, nuke the config set */
3218 free(cset, M_RAIDFRAME);
3219 }
3220
3221
3222 void
3223 raid_init_component_label(raidPtr, clabel)
3224 RF_Raid_t *raidPtr;
3225 RF_ComponentLabel_t *clabel;
3226 {
3227 /* current version number */
3228 clabel->version = RF_COMPONENT_LABEL_VERSION;
3229 clabel->serial_number = raidPtr->serial_number;
3230 clabel->mod_counter = raidPtr->mod_counter;
3231 clabel->num_rows = raidPtr->numRow;
3232 clabel->num_columns = raidPtr->numCol;
3233 clabel->clean = RF_RAID_DIRTY; /* not clean */
3234 clabel->status = rf_ds_optimal; /* "It's good!" */
3235
3236 clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
3237 clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
3238 clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
3239
3240 clabel->blockSize = raidPtr->bytesPerSector;
3241 clabel->numBlocks = raidPtr->sectorsPerDisk;
3242
3243 /* XXX not portable */
3244 clabel->parityConfig = raidPtr->Layout.map->parityConfig;
3245 clabel->maxOutstanding = raidPtr->maxOutstanding;
3246 clabel->autoconfigure = raidPtr->autoconfigure;
3247 clabel->root_partition = raidPtr->root_partition;
3248 clabel->last_unit = raidPtr->raidid;
3249 clabel->config_order = raidPtr->config_order;
3250 }
3251
3252 int
3253 rf_auto_config_set(cset,unit)
3254 RF_ConfigSet_t *cset;
3255 int *unit;
3256 {
3257 RF_Raid_t *raidPtr;
3258 RF_Config_t *config;
3259 int raidID;
3260 int retcode;
3261
3262 printf("RAID autoconfigure\n");
3263
3264 retcode = 0;
3265 *unit = -1;
3266
3267 /* 1. Create a config structure */
3268
3269 config = (RF_Config_t *)malloc(sizeof(RF_Config_t),
3270 M_RAIDFRAME,
3271 M_NOWAIT);
3272 if (config==NULL) {
3273 printf("Out of mem!?!?\n");
3274 /* XXX do something more intelligent here. */
3275 return(1);
3276 }
3277
3278 memset(config, 0, sizeof(RF_Config_t));
3279
3280 /* XXX raidID needs to be set correctly.. */
3281
3282 /*
3283 2. Figure out what RAID ID this one is supposed to live at
3284 See if we can get the same RAID dev that it was configured
3285 on last time..
3286 */
3287
3288 raidID = cset->ac->clabel->last_unit;
3289 if ((raidID < 0) || (raidID >= numraid)) {
3290 /* let's not wander off into lala land. */
3291 raidID = numraid - 1;
3292 }
3293 if (raidPtrs[raidID]->valid != 0) {
3294
3295 /*
3296 Nope... Go looking for an alternative...
3297 Start high so we don't immediately use raid0 if that's
3298 not taken.
3299 */
3300
3301 for(raidID = numraid; raidID >= 0; raidID--) {
3302 if (raidPtrs[raidID]->valid == 0) {
3303 /* can use this one! */
3304 break;
3305 }
3306 }
3307 }
3308
3309 if (raidID < 0) {
3310 /* punt... */
3311 printf("Unable to auto configure this set!\n");
3312 printf("(Out of RAID devs!)\n");
3313 return(1);
3314 }
3315 printf("Configuring raid%d:\n",raidID);
3316 raidPtr = raidPtrs[raidID];
3317
3318 /* XXX all this stuff should be done SOMEWHERE ELSE! */
3319 raidPtr->raidid = raidID;
3320 raidPtr->openings = RAIDOUTSTANDING;
3321
3322 /* 3. Build the configuration structure */
3323 rf_create_configuration(cset->ac, config, raidPtr);
3324
3325 /* 4. Do the configuration */
3326 retcode = rf_Configure(raidPtr, config, cset->ac);
3327
3328 if (retcode == 0) {
3329
3330 raidinit(raidPtrs[raidID]);
3331
3332 rf_markalldirty(raidPtrs[raidID]);
3333 raidPtrs[raidID]->autoconfigure = 1; /* XXX do this here? */
3334 if (cset->ac->clabel->root_partition==1) {
3335 /* everything configured just fine. Make a note
3336 that this set is eligible to be root. */
3337 cset->rootable = 1;
3338 /* XXX do this here? */
3339 raidPtrs[raidID]->root_partition = 1;
3340 }
3341 }
3342
3343 /* 5. Cleanup */
3344 free(config, M_RAIDFRAME);
3345
3346 *unit = raidID;
3347 return(retcode);
3348 }
3349
3350 void
3351 rf_disk_unbusy(desc)
3352 RF_RaidAccessDesc_t *desc;
3353 {
3354 struct buf *bp;
3355
3356 bp = (struct buf *)desc->bp;
3357 disk_unbusy(&raid_softc[desc->raidPtr->raidid].sc_dkdev,
3358 (bp->b_bcount - bp->b_resid));
3359 }
3360