rf_netbsdkintf.c revision 1.135 1 /* $NetBSD: rf_netbsdkintf.c,v 1.135 2002/09/22 03:46:40 oster Exp $ */
2 /*-
3 * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
4 * All rights reserved.
5 *
6 * This code is derived from software contributed to The NetBSD Foundation
7 * by Greg Oster; Jason R. Thorpe.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed by the NetBSD
20 * Foundation, Inc. and its contributors.
21 * 4. Neither the name of The NetBSD Foundation nor the names of its
22 * contributors may be used to endorse or promote products derived
23 * from this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
26 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
36 */
37
38 /*
39 * Copyright (c) 1988 University of Utah.
40 * Copyright (c) 1990, 1993
41 * The Regents of the University of California. All rights reserved.
42 *
43 * This code is derived from software contributed to Berkeley by
44 * the Systems Programming Group of the University of Utah Computer
45 * Science Department.
46 *
47 * Redistribution and use in source and binary forms, with or without
48 * modification, are permitted provided that the following conditions
49 * are met:
50 * 1. Redistributions of source code must retain the above copyright
51 * notice, this list of conditions and the following disclaimer.
52 * 2. Redistributions in binary form must reproduce the above copyright
53 * notice, this list of conditions and the following disclaimer in the
54 * documentation and/or other materials provided with the distribution.
55 * 3. All advertising materials mentioning features or use of this software
56 * must display the following acknowledgement:
57 * This product includes software developed by the University of
58 * California, Berkeley and its contributors.
59 * 4. Neither the name of the University nor the names of its contributors
60 * may be used to endorse or promote products derived from this software
61 * without specific prior written permission.
62 *
63 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
64 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
65 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
66 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
67 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
68 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
69 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
70 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
71 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
72 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
73 * SUCH DAMAGE.
74 *
75 * from: Utah $Hdr: cd.c 1.6 90/11/28$
76 *
77 * @(#)cd.c 8.2 (Berkeley) 11/16/93
78 */
79
80
81
82
83 /*
84 * Copyright (c) 1995 Carnegie-Mellon University.
85 * All rights reserved.
86 *
87 * Authors: Mark Holland, Jim Zelenka
88 *
89 * Permission to use, copy, modify and distribute this software and
90 * its documentation is hereby granted, provided that both the copyright
91 * notice and this permission notice appear in all copies of the
92 * software, derivative works or modified versions, and any portions
93 * thereof, and that both notices appear in supporting documentation.
94 *
95 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
96 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
97 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
98 *
99 * Carnegie Mellon requests users of this software to return to
100 *
101 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
102 * School of Computer Science
103 * Carnegie Mellon University
104 * Pittsburgh PA 15213-3890
105 *
106 * any improvements or extensions that they make and grant Carnegie the
107 * rights to redistribute these changes.
108 */
109
110 /***********************************************************
111 *
112 * rf_kintf.c -- the kernel interface routines for RAIDframe
113 *
114 ***********************************************************/
115
116 #include <sys/cdefs.h>
117 __KERNEL_RCSID(0, "$NetBSD: rf_netbsdkintf.c,v 1.135 2002/09/22 03:46:40 oster Exp $");
118
119 #include <sys/param.h>
120 #include <sys/errno.h>
121 #include <sys/pool.h>
122 #include <sys/queue.h>
123 #include <sys/disk.h>
124 #include <sys/device.h>
125 #include <sys/stat.h>
126 #include <sys/ioctl.h>
127 #include <sys/fcntl.h>
128 #include <sys/systm.h>
129 #include <sys/namei.h>
130 #include <sys/vnode.h>
131 #include <sys/disklabel.h>
132 #include <sys/conf.h>
133 #include <sys/lock.h>
134 #include <sys/buf.h>
135 #include <sys/user.h>
136 #include <sys/reboot.h>
137
138 #include <dev/raidframe/raidframevar.h>
139 #include <dev/raidframe/raidframeio.h>
140 #include "raid.h"
141 #include "opt_raid_autoconfig.h"
142 #include "rf_raid.h"
143 #include "rf_copyback.h"
144 #include "rf_dag.h"
145 #include "rf_dagflags.h"
146 #include "rf_desc.h"
147 #include "rf_diskqueue.h"
148 #include "rf_etimer.h"
149 #include "rf_general.h"
150 #include "rf_kintf.h"
151 #include "rf_options.h"
152 #include "rf_driver.h"
153 #include "rf_parityscan.h"
154 #include "rf_threadstuff.h"
155
156 #ifdef DEBUG
157 int rf_kdebug_level = 0;
158 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
159 #else /* DEBUG */
160 #define db1_printf(a) { }
161 #endif /* DEBUG */
162
163 static RF_Raid_t **raidPtrs; /* global raid device descriptors */
164
165 RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex)
166
167 static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a
168 * spare table */
169 static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from
170 * installation process */
171
172 /* prototypes */
173 static void KernelWakeupFunc(struct buf * bp);
174 static void InitBP(struct buf * bp, struct vnode *, unsigned rw_flag,
175 dev_t dev, RF_SectorNum_t startSect,
176 RF_SectorCount_t numSect, caddr_t buf,
177 void (*cbFunc) (struct buf *), void *cbArg,
178 int logBytesPerSector, struct proc * b_proc);
179 static void raidinit(RF_Raid_t *);
180
181 void raidattach(int);
182
183 dev_type_open(raidopen);
184 dev_type_close(raidclose);
185 dev_type_read(raidread);
186 dev_type_write(raidwrite);
187 dev_type_ioctl(raidioctl);
188 dev_type_strategy(raidstrategy);
189 dev_type_dump(raiddump);
190 dev_type_size(raidsize);
191
192 const struct bdevsw raid_bdevsw = {
193 raidopen, raidclose, raidstrategy, raidioctl,
194 raiddump, raidsize, D_DISK
195 };
196
197 const struct cdevsw raid_cdevsw = {
198 raidopen, raidclose, raidread, raidwrite, raidioctl,
199 nostop, notty, nopoll, nommap, D_DISK
200 };
201
202 /*
203 * Pilfered from ccd.c
204 */
205
206 struct raidbuf {
207 struct buf rf_buf; /* new I/O buf. MUST BE FIRST!!! */
208 struct buf *rf_obp; /* ptr. to original I/O buf */
209 RF_DiskQueueData_t *req;/* the request that this was part of.. */
210 };
211
212 /* component buffer pool */
213 struct pool raidframe_cbufpool;
214
215 #define RAIDGETBUF(rs) pool_get(&raidframe_cbufpool, PR_NOWAIT)
216 #define RAIDPUTBUF(rs, cbp) pool_put(&raidframe_cbufpool, cbp)
217
218 /* XXX Not sure if the following should be replacing the raidPtrs above,
219 or if it should be used in conjunction with that...
220 */
221
222 struct raid_softc {
223 int sc_flags; /* flags */
224 int sc_cflags; /* configuration flags */
225 size_t sc_size; /* size of the raid device */
226 char sc_xname[20]; /* XXX external name */
227 struct disk sc_dkdev; /* generic disk device info */
228 struct bufq_state buf_queue; /* used for the device queue */
229 };
230 /* sc_flags */
231 #define RAIDF_INITED 0x01 /* unit has been initialized */
232 #define RAIDF_WLABEL 0x02 /* label area is writable */
233 #define RAIDF_LABELLING 0x04 /* unit is currently being labelled */
234 #define RAIDF_WANTED 0x40 /* someone is waiting to obtain a lock */
235 #define RAIDF_LOCKED 0x80 /* unit is locked */
236
237 #define raidunit(x) DISKUNIT(x)
238 int numraid = 0;
239
240 /*
241 * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
242 * Be aware that large numbers can allow the driver to consume a lot of
243 * kernel memory, especially on writes, and in degraded mode reads.
244 *
245 * For example: with a stripe width of 64 blocks (32k) and 5 disks,
246 * a single 64K write will typically require 64K for the old data,
247 * 64K for the old parity, and 64K for the new parity, for a total
248 * of 192K (if the parity buffer is not re-used immediately).
249 * Even it if is used immediately, that's still 128K, which when multiplied
250 * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
251 *
252 * Now in degraded mode, for example, a 64K read on the above setup may
253 * require data reconstruction, which will require *all* of the 4 remaining
254 * disks to participate -- 4 * 32K/disk == 128K again.
255 */
256
257 #ifndef RAIDOUTSTANDING
258 #define RAIDOUTSTANDING 6
259 #endif
260
261 #define RAIDLABELDEV(dev) \
262 (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
263
264 /* declared here, and made public, for the benefit of KVM stuff.. */
265 struct raid_softc *raid_softc;
266
267 static void raidgetdefaultlabel(RF_Raid_t *, struct raid_softc *,
268 struct disklabel *);
269 static void raidgetdisklabel(dev_t);
270 static void raidmakedisklabel(struct raid_softc *);
271
272 static int raidlock(struct raid_softc *);
273 static void raidunlock(struct raid_softc *);
274
275 static void rf_markalldirty(RF_Raid_t *);
276
277 struct device *raidrootdev;
278
279 void rf_ReconThread(struct rf_recon_req *);
280 /* XXX what I want is: */
281 /*void rf_ReconThread(RF_Raid_t *raidPtr); */
282 void rf_RewriteParityThread(RF_Raid_t *raidPtr);
283 void rf_CopybackThread(RF_Raid_t *raidPtr);
284 void rf_ReconstructInPlaceThread(struct rf_recon_req *);
285 void rf_buildroothack(void *);
286
287 RF_AutoConfig_t *rf_find_raid_components(void);
288 RF_ConfigSet_t *rf_create_auto_sets(RF_AutoConfig_t *);
289 static int rf_does_it_fit(RF_ConfigSet_t *,RF_AutoConfig_t *);
290 static int rf_reasonable_label(RF_ComponentLabel_t *);
291 void rf_create_configuration(RF_AutoConfig_t *,RF_Config_t *, RF_Raid_t *);
292 int rf_set_autoconfig(RF_Raid_t *, int);
293 int rf_set_rootpartition(RF_Raid_t *, int);
294 void rf_release_all_vps(RF_ConfigSet_t *);
295 void rf_cleanup_config_set(RF_ConfigSet_t *);
296 int rf_have_enough_components(RF_ConfigSet_t *);
297 int rf_auto_config_set(RF_ConfigSet_t *, int *);
298
299 static int raidautoconfig = 0; /* Debugging, mostly. Set to 0 to not
300 allow autoconfig to take place.
301 Note that this is overridden by having
302 RAID_AUTOCONFIG as an option in the
303 kernel config file. */
304
305 void
306 raidattach(num)
307 int num;
308 {
309 int raidID;
310 int i, rc;
311 RF_AutoConfig_t *ac_list; /* autoconfig list */
312 RF_ConfigSet_t *config_sets;
313
314 #ifdef DEBUG
315 printf("raidattach: Asked for %d units\n", num);
316 #endif
317
318 if (num <= 0) {
319 #ifdef DIAGNOSTIC
320 panic("raidattach: count <= 0");
321 #endif
322 return;
323 }
324 /* This is where all the initialization stuff gets done. */
325
326 numraid = num;
327
328 /* Make some space for requested number of units... */
329
330 RF_Calloc(raidPtrs, num, sizeof(RF_Raid_t *), (RF_Raid_t **));
331 if (raidPtrs == NULL) {
332 panic("raidPtrs is NULL!!\n");
333 }
334
335 /* Initialize the component buffer pool. */
336 pool_init(&raidframe_cbufpool, sizeof(struct raidbuf), 0,
337 0, 0, "raidpl", NULL);
338
339 rc = rf_mutex_init(&rf_sparet_wait_mutex);
340 if (rc) {
341 RF_PANIC();
342 }
343
344 rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
345
346 for (i = 0; i < num; i++)
347 raidPtrs[i] = NULL;
348 rc = rf_BootRaidframe();
349 if (rc == 0)
350 printf("Kernelized RAIDframe activated\n");
351 else
352 panic("Serious error booting RAID!!\n");
353
354 /* put together some datastructures like the CCD device does.. This
355 * lets us lock the device and what-not when it gets opened. */
356
357 raid_softc = (struct raid_softc *)
358 malloc(num * sizeof(struct raid_softc),
359 M_RAIDFRAME, M_NOWAIT);
360 if (raid_softc == NULL) {
361 printf("WARNING: no memory for RAIDframe driver\n");
362 return;
363 }
364
365 memset(raid_softc, 0, num * sizeof(struct raid_softc));
366
367 raidrootdev = (struct device *)malloc(num * sizeof(struct device),
368 M_RAIDFRAME, M_NOWAIT);
369 if (raidrootdev == NULL) {
370 panic("No memory for RAIDframe driver!!?!?!\n");
371 }
372
373 for (raidID = 0; raidID < num; raidID++) {
374 bufq_alloc(&raid_softc[raidID].buf_queue, BUFQ_FCFS);
375
376 raidrootdev[raidID].dv_class = DV_DISK;
377 raidrootdev[raidID].dv_cfdata = NULL;
378 raidrootdev[raidID].dv_unit = raidID;
379 raidrootdev[raidID].dv_parent = NULL;
380 raidrootdev[raidID].dv_flags = 0;
381 sprintf(raidrootdev[raidID].dv_xname,"raid%d",raidID);
382
383 RF_Calloc(raidPtrs[raidID], 1, sizeof(RF_Raid_t),
384 (RF_Raid_t *));
385 if (raidPtrs[raidID] == NULL) {
386 printf("WARNING: raidPtrs[%d] is NULL\n", raidID);
387 numraid = raidID;
388 return;
389 }
390 }
391
392 #ifdef RAID_AUTOCONFIG
393 raidautoconfig = 1;
394 #endif
395
396 if (raidautoconfig) {
397 /* 1. locate all RAID components on the system */
398
399 #if DEBUG
400 printf("Searching for raid components...\n");
401 #endif
402 ac_list = rf_find_raid_components();
403
404 /* 2. sort them into their respective sets */
405
406 config_sets = rf_create_auto_sets(ac_list);
407
408 /* 3. evaluate each set and configure the valid ones
409 This gets done in rf_buildroothack() */
410
411 /* schedule the creation of the thread to do the
412 "/ on RAID" stuff */
413
414 kthread_create(rf_buildroothack,config_sets);
415
416 }
417
418 }
419
420 void
421 rf_buildroothack(arg)
422 void *arg;
423 {
424 RF_ConfigSet_t *config_sets = arg;
425 RF_ConfigSet_t *cset;
426 RF_ConfigSet_t *next_cset;
427 int retcode;
428 int raidID;
429 int rootID;
430 int num_root;
431
432 rootID = 0;
433 num_root = 0;
434 cset = config_sets;
435 while(cset != NULL ) {
436 next_cset = cset->next;
437 if (rf_have_enough_components(cset) &&
438 cset->ac->clabel->autoconfigure==1) {
439 retcode = rf_auto_config_set(cset,&raidID);
440 if (!retcode) {
441 if (cset->rootable) {
442 rootID = raidID;
443 num_root++;
444 }
445 } else {
446 /* The autoconfig didn't work :( */
447 #if DEBUG
448 printf("Autoconfig failed with code %d for raid%d\n", retcode, raidID);
449 #endif
450 rf_release_all_vps(cset);
451 }
452 } else {
453 /* we're not autoconfiguring this set...
454 release the associated resources */
455 rf_release_all_vps(cset);
456 }
457 /* cleanup */
458 rf_cleanup_config_set(cset);
459 cset = next_cset;
460 }
461
462 /* we found something bootable... */
463
464 if (num_root == 1) {
465 booted_device = &raidrootdev[rootID];
466 } else if (num_root > 1) {
467 /* we can't guess.. require the user to answer... */
468 boothowto |= RB_ASKNAME;
469 }
470 }
471
472
473 int
474 raidsize(dev)
475 dev_t dev;
476 {
477 struct raid_softc *rs;
478 struct disklabel *lp;
479 int part, unit, omask, size;
480
481 unit = raidunit(dev);
482 if (unit >= numraid)
483 return (-1);
484 rs = &raid_softc[unit];
485
486 if ((rs->sc_flags & RAIDF_INITED) == 0)
487 return (-1);
488
489 part = DISKPART(dev);
490 omask = rs->sc_dkdev.dk_openmask & (1 << part);
491 lp = rs->sc_dkdev.dk_label;
492
493 if (omask == 0 && raidopen(dev, 0, S_IFBLK, curproc))
494 return (-1);
495
496 if (lp->d_partitions[part].p_fstype != FS_SWAP)
497 size = -1;
498 else
499 size = lp->d_partitions[part].p_size *
500 (lp->d_secsize / DEV_BSIZE);
501
502 if (omask == 0 && raidclose(dev, 0, S_IFBLK, curproc))
503 return (-1);
504
505 return (size);
506
507 }
508
509 int
510 raiddump(dev, blkno, va, size)
511 dev_t dev;
512 daddr_t blkno;
513 caddr_t va;
514 size_t size;
515 {
516 /* Not implemented. */
517 return ENXIO;
518 }
519 /* ARGSUSED */
520 int
521 raidopen(dev, flags, fmt, p)
522 dev_t dev;
523 int flags, fmt;
524 struct proc *p;
525 {
526 int unit = raidunit(dev);
527 struct raid_softc *rs;
528 struct disklabel *lp;
529 int part, pmask;
530 int error = 0;
531
532 if (unit >= numraid)
533 return (ENXIO);
534 rs = &raid_softc[unit];
535
536 if ((error = raidlock(rs)) != 0)
537 return (error);
538 lp = rs->sc_dkdev.dk_label;
539
540 part = DISKPART(dev);
541 pmask = (1 << part);
542
543 db1_printf(("Opening raid device number: %d partition: %d\n",
544 unit, part));
545
546
547 if ((rs->sc_flags & RAIDF_INITED) &&
548 (rs->sc_dkdev.dk_openmask == 0))
549 raidgetdisklabel(dev);
550
551 /* make sure that this partition exists */
552
553 if (part != RAW_PART) {
554 db1_printf(("Not a raw partition..\n"));
555 if (((rs->sc_flags & RAIDF_INITED) == 0) ||
556 ((part >= lp->d_npartitions) ||
557 (lp->d_partitions[part].p_fstype == FS_UNUSED))) {
558 error = ENXIO;
559 raidunlock(rs);
560 db1_printf(("Bailing out...\n"));
561 return (error);
562 }
563 }
564 /* Prevent this unit from being unconfigured while open. */
565 switch (fmt) {
566 case S_IFCHR:
567 rs->sc_dkdev.dk_copenmask |= pmask;
568 break;
569
570 case S_IFBLK:
571 rs->sc_dkdev.dk_bopenmask |= pmask;
572 break;
573 }
574
575 if ((rs->sc_dkdev.dk_openmask == 0) &&
576 ((rs->sc_flags & RAIDF_INITED) != 0)) {
577 /* First one... mark things as dirty... Note that we *MUST*
578 have done a configure before this. I DO NOT WANT TO BE
579 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
580 THAT THEY BELONG TOGETHER!!!!! */
581 /* XXX should check to see if we're only open for reading
582 here... If so, we needn't do this, but then need some
583 other way of keeping track of what's happened.. */
584
585 rf_markalldirty( raidPtrs[unit] );
586 }
587
588
589 rs->sc_dkdev.dk_openmask =
590 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
591
592 raidunlock(rs);
593
594 return (error);
595
596
597 }
598 /* ARGSUSED */
599 int
600 raidclose(dev, flags, fmt, p)
601 dev_t dev;
602 int flags, fmt;
603 struct proc *p;
604 {
605 int unit = raidunit(dev);
606 struct raid_softc *rs;
607 int error = 0;
608 int part;
609
610 if (unit >= numraid)
611 return (ENXIO);
612 rs = &raid_softc[unit];
613
614 if ((error = raidlock(rs)) != 0)
615 return (error);
616
617 part = DISKPART(dev);
618
619 /* ...that much closer to allowing unconfiguration... */
620 switch (fmt) {
621 case S_IFCHR:
622 rs->sc_dkdev.dk_copenmask &= ~(1 << part);
623 break;
624
625 case S_IFBLK:
626 rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
627 break;
628 }
629 rs->sc_dkdev.dk_openmask =
630 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
631
632 if ((rs->sc_dkdev.dk_openmask == 0) &&
633 ((rs->sc_flags & RAIDF_INITED) != 0)) {
634 /* Last one... device is not unconfigured yet.
635 Device shutdown has taken care of setting the
636 clean bits if RAIDF_INITED is not set
637 mark things as clean... */
638 #if 0
639 printf("Last one on raid%d. Updating status.\n",unit);
640 #endif
641 rf_update_component_labels(raidPtrs[unit],
642 RF_FINAL_COMPONENT_UPDATE);
643 if (doing_shutdown) {
644 /* last one, and we're going down, so
645 lights out for this RAID set too. */
646 error = rf_Shutdown(raidPtrs[unit]);
647
648 /* It's no longer initialized... */
649 rs->sc_flags &= ~RAIDF_INITED;
650
651 /* Detach the disk. */
652 disk_detach(&rs->sc_dkdev);
653 }
654 }
655
656 raidunlock(rs);
657 return (0);
658
659 }
660
661 void
662 raidstrategy(bp)
663 struct buf *bp;
664 {
665 int s;
666
667 unsigned int raidID = raidunit(bp->b_dev);
668 RF_Raid_t *raidPtr;
669 struct raid_softc *rs = &raid_softc[raidID];
670 struct disklabel *lp;
671 int wlabel;
672
673 if ((rs->sc_flags & RAIDF_INITED) ==0) {
674 bp->b_error = ENXIO;
675 bp->b_flags |= B_ERROR;
676 bp->b_resid = bp->b_bcount;
677 biodone(bp);
678 return;
679 }
680 if (raidID >= numraid || !raidPtrs[raidID]) {
681 bp->b_error = ENODEV;
682 bp->b_flags |= B_ERROR;
683 bp->b_resid = bp->b_bcount;
684 biodone(bp);
685 return;
686 }
687 raidPtr = raidPtrs[raidID];
688 if (!raidPtr->valid) {
689 bp->b_error = ENODEV;
690 bp->b_flags |= B_ERROR;
691 bp->b_resid = bp->b_bcount;
692 biodone(bp);
693 return;
694 }
695 if (bp->b_bcount == 0) {
696 db1_printf(("b_bcount is zero..\n"));
697 biodone(bp);
698 return;
699 }
700 lp = rs->sc_dkdev.dk_label;
701
702 /*
703 * Do bounds checking and adjust transfer. If there's an
704 * error, the bounds check will flag that for us.
705 */
706
707 wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
708 if (DISKPART(bp->b_dev) != RAW_PART)
709 if (bounds_check_with_label(bp, lp, wlabel) <= 0) {
710 db1_printf(("Bounds check failed!!:%d %d\n",
711 (int) bp->b_blkno, (int) wlabel));
712 biodone(bp);
713 return;
714 }
715 s = splbio();
716
717 bp->b_resid = 0;
718
719 /* stuff it onto our queue */
720 BUFQ_PUT(&rs->buf_queue, bp);
721
722 raidstart(raidPtrs[raidID]);
723
724 splx(s);
725 }
726 /* ARGSUSED */
727 int
728 raidread(dev, uio, flags)
729 dev_t dev;
730 struct uio *uio;
731 int flags;
732 {
733 int unit = raidunit(dev);
734 struct raid_softc *rs;
735 int part;
736
737 if (unit >= numraid)
738 return (ENXIO);
739 rs = &raid_softc[unit];
740
741 if ((rs->sc_flags & RAIDF_INITED) == 0)
742 return (ENXIO);
743 part = DISKPART(dev);
744
745 db1_printf(("raidread: unit: %d partition: %d\n", unit, part));
746
747 return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
748
749 }
750 /* ARGSUSED */
751 int
752 raidwrite(dev, uio, flags)
753 dev_t dev;
754 struct uio *uio;
755 int flags;
756 {
757 int unit = raidunit(dev);
758 struct raid_softc *rs;
759
760 if (unit >= numraid)
761 return (ENXIO);
762 rs = &raid_softc[unit];
763
764 if ((rs->sc_flags & RAIDF_INITED) == 0)
765 return (ENXIO);
766 db1_printf(("raidwrite\n"));
767 return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
768
769 }
770
771 int
772 raidioctl(dev, cmd, data, flag, p)
773 dev_t dev;
774 u_long cmd;
775 caddr_t data;
776 int flag;
777 struct proc *p;
778 {
779 int unit = raidunit(dev);
780 int error = 0;
781 int part, pmask;
782 struct raid_softc *rs;
783 RF_Config_t *k_cfg, *u_cfg;
784 RF_Raid_t *raidPtr;
785 RF_RaidDisk_t *diskPtr;
786 RF_AccTotals_t *totals;
787 RF_DeviceConfig_t *d_cfg, **ucfgp;
788 u_char *specific_buf;
789 int retcode = 0;
790 int row;
791 int column;
792 int raidid;
793 struct rf_recon_req *rrcopy, *rr;
794 RF_ComponentLabel_t *clabel;
795 RF_ComponentLabel_t ci_label;
796 RF_ComponentLabel_t **clabel_ptr;
797 RF_SingleComponent_t *sparePtr,*componentPtr;
798 RF_SingleComponent_t hot_spare;
799 RF_SingleComponent_t component;
800 RF_ProgressInfo_t progressInfo, **progressInfoPtr;
801 int i, j, d;
802 #ifdef __HAVE_OLD_DISKLABEL
803 struct disklabel newlabel;
804 #endif
805
806 if (unit >= numraid)
807 return (ENXIO);
808 rs = &raid_softc[unit];
809 raidPtr = raidPtrs[unit];
810
811 db1_printf(("raidioctl: %d %d %d %d\n", (int) dev,
812 (int) DISKPART(dev), (int) unit, (int) cmd));
813
814 /* Must be open for writes for these commands... */
815 switch (cmd) {
816 case DIOCSDINFO:
817 case DIOCWDINFO:
818 #ifdef __HAVE_OLD_DISKLABEL
819 case ODIOCWDINFO:
820 case ODIOCSDINFO:
821 #endif
822 case DIOCWLABEL:
823 if ((flag & FWRITE) == 0)
824 return (EBADF);
825 }
826
827 /* Must be initialized for these... */
828 switch (cmd) {
829 case DIOCGDINFO:
830 case DIOCSDINFO:
831 case DIOCWDINFO:
832 #ifdef __HAVE_OLD_DISKLABEL
833 case ODIOCGDINFO:
834 case ODIOCWDINFO:
835 case ODIOCSDINFO:
836 case ODIOCGDEFLABEL:
837 #endif
838 case DIOCGPART:
839 case DIOCWLABEL:
840 case DIOCGDEFLABEL:
841 case RAIDFRAME_SHUTDOWN:
842 case RAIDFRAME_REWRITEPARITY:
843 case RAIDFRAME_GET_INFO:
844 case RAIDFRAME_RESET_ACCTOTALS:
845 case RAIDFRAME_GET_ACCTOTALS:
846 case RAIDFRAME_KEEP_ACCTOTALS:
847 case RAIDFRAME_GET_SIZE:
848 case RAIDFRAME_FAIL_DISK:
849 case RAIDFRAME_COPYBACK:
850 case RAIDFRAME_CHECK_RECON_STATUS:
851 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
852 case RAIDFRAME_GET_COMPONENT_LABEL:
853 case RAIDFRAME_SET_COMPONENT_LABEL:
854 case RAIDFRAME_ADD_HOT_SPARE:
855 case RAIDFRAME_REMOVE_HOT_SPARE:
856 case RAIDFRAME_INIT_LABELS:
857 case RAIDFRAME_REBUILD_IN_PLACE:
858 case RAIDFRAME_CHECK_PARITY:
859 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
860 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
861 case RAIDFRAME_CHECK_COPYBACK_STATUS:
862 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
863 case RAIDFRAME_SET_AUTOCONFIG:
864 case RAIDFRAME_SET_ROOT:
865 case RAIDFRAME_DELETE_COMPONENT:
866 case RAIDFRAME_INCORPORATE_HOT_SPARE:
867 if ((rs->sc_flags & RAIDF_INITED) == 0)
868 return (ENXIO);
869 }
870
871 switch (cmd) {
872
873 /* configure the system */
874 case RAIDFRAME_CONFIGURE:
875
876 if (raidPtr->valid) {
877 /* There is a valid RAID set running on this unit! */
878 printf("raid%d: Device already configured!\n",unit);
879 return(EINVAL);
880 }
881
882 /* copy-in the configuration information */
883 /* data points to a pointer to the configuration structure */
884
885 u_cfg = *((RF_Config_t **) data);
886 RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
887 if (k_cfg == NULL) {
888 return (ENOMEM);
889 }
890 retcode = copyin((caddr_t) u_cfg, (caddr_t) k_cfg,
891 sizeof(RF_Config_t));
892 if (retcode) {
893 RF_Free(k_cfg, sizeof(RF_Config_t));
894 db1_printf(("rf_ioctl: retcode=%d copyin.1\n",
895 retcode));
896 return (retcode);
897 }
898 /* allocate a buffer for the layout-specific data, and copy it
899 * in */
900 if (k_cfg->layoutSpecificSize) {
901 if (k_cfg->layoutSpecificSize > 10000) {
902 /* sanity check */
903 RF_Free(k_cfg, sizeof(RF_Config_t));
904 return (EINVAL);
905 }
906 RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
907 (u_char *));
908 if (specific_buf == NULL) {
909 RF_Free(k_cfg, sizeof(RF_Config_t));
910 return (ENOMEM);
911 }
912 retcode = copyin(k_cfg->layoutSpecific,
913 (caddr_t) specific_buf,
914 k_cfg->layoutSpecificSize);
915 if (retcode) {
916 RF_Free(k_cfg, sizeof(RF_Config_t));
917 RF_Free(specific_buf,
918 k_cfg->layoutSpecificSize);
919 db1_printf(("rf_ioctl: retcode=%d copyin.2\n",
920 retcode));
921 return (retcode);
922 }
923 } else
924 specific_buf = NULL;
925 k_cfg->layoutSpecific = specific_buf;
926
927 /* should do some kind of sanity check on the configuration.
928 * Store the sum of all the bytes in the last byte? */
929
930 /* configure the system */
931
932 /*
933 * Clear the entire RAID descriptor, just to make sure
934 * there is no stale data left in the case of a
935 * reconfiguration
936 */
937 memset((char *) raidPtr, 0, sizeof(RF_Raid_t));
938 raidPtr->raidid = unit;
939
940 retcode = rf_Configure(raidPtr, k_cfg, NULL);
941
942 if (retcode == 0) {
943
944 /* allow this many simultaneous IO's to
945 this RAID device */
946 raidPtr->openings = RAIDOUTSTANDING;
947
948 raidinit(raidPtr);
949 rf_markalldirty(raidPtr);
950 }
951 /* free the buffers. No return code here. */
952 if (k_cfg->layoutSpecificSize) {
953 RF_Free(specific_buf, k_cfg->layoutSpecificSize);
954 }
955 RF_Free(k_cfg, sizeof(RF_Config_t));
956
957 return (retcode);
958
959 /* shutdown the system */
960 case RAIDFRAME_SHUTDOWN:
961
962 if ((error = raidlock(rs)) != 0)
963 return (error);
964
965 /*
966 * If somebody has a partition mounted, we shouldn't
967 * shutdown.
968 */
969
970 part = DISKPART(dev);
971 pmask = (1 << part);
972 if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
973 ((rs->sc_dkdev.dk_bopenmask & pmask) &&
974 (rs->sc_dkdev.dk_copenmask & pmask))) {
975 raidunlock(rs);
976 return (EBUSY);
977 }
978
979 retcode = rf_Shutdown(raidPtr);
980
981 /* It's no longer initialized... */
982 rs->sc_flags &= ~RAIDF_INITED;
983
984 /* Detach the disk. */
985 disk_detach(&rs->sc_dkdev);
986
987 raidunlock(rs);
988
989 return (retcode);
990 case RAIDFRAME_GET_COMPONENT_LABEL:
991 clabel_ptr = (RF_ComponentLabel_t **) data;
992 /* need to read the component label for the disk indicated
993 by row,column in clabel */
994
995 /* For practice, let's get it directly fromdisk, rather
996 than from the in-core copy */
997 RF_Malloc( clabel, sizeof( RF_ComponentLabel_t ),
998 (RF_ComponentLabel_t *));
999 if (clabel == NULL)
1000 return (ENOMEM);
1001
1002 memset((char *) clabel, 0, sizeof(RF_ComponentLabel_t));
1003
1004 retcode = copyin( *clabel_ptr, clabel,
1005 sizeof(RF_ComponentLabel_t));
1006
1007 if (retcode) {
1008 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
1009 return(retcode);
1010 }
1011
1012 row = clabel->row;
1013 column = clabel->column;
1014
1015 if ((row < 0) || (row >= raidPtr->numRow) ||
1016 (column < 0) || (column >= raidPtr->numCol +
1017 raidPtr->numSpare)) {
1018 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
1019 return(EINVAL);
1020 }
1021
1022 raidread_component_label(raidPtr->Disks[row][column].dev,
1023 raidPtr->raid_cinfo[row][column].ci_vp,
1024 clabel );
1025
1026 retcode = copyout((caddr_t) clabel,
1027 (caddr_t) *clabel_ptr,
1028 sizeof(RF_ComponentLabel_t));
1029 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
1030 return (retcode);
1031
1032 case RAIDFRAME_SET_COMPONENT_LABEL:
1033 clabel = (RF_ComponentLabel_t *) data;
1034
1035 /* XXX check the label for valid stuff... */
1036 /* Note that some things *should not* get modified --
1037 the user should be re-initing the labels instead of
1038 trying to patch things.
1039 */
1040
1041 raidid = raidPtr->raidid;
1042 printf("raid%d: Got component label:\n", raidid);
1043 printf("raid%d: Version: %d\n", raidid, clabel->version);
1044 printf("raid%d: Serial Number: %d\n", raidid, clabel->serial_number);
1045 printf("raid%d: Mod counter: %d\n", raidid, clabel->mod_counter);
1046 printf("raid%d: Row: %d\n", raidid, clabel->row);
1047 printf("raid%d: Column: %d\n", raidid, clabel->column);
1048 printf("raid%d: Num Rows: %d\n", raidid, clabel->num_rows);
1049 printf("raid%d: Num Columns: %d\n", raidid, clabel->num_columns);
1050 printf("raid%d: Clean: %d\n", raidid, clabel->clean);
1051 printf("raid%d: Status: %d\n", raidid, clabel->status);
1052
1053 row = clabel->row;
1054 column = clabel->column;
1055
1056 if ((row < 0) || (row >= raidPtr->numRow) ||
1057 (column < 0) || (column >= raidPtr->numCol)) {
1058 return(EINVAL);
1059 }
1060
1061 /* XXX this isn't allowed to do anything for now :-) */
1062
1063 /* XXX and before it is, we need to fill in the rest
1064 of the fields!?!?!?! */
1065 #if 0
1066 raidwrite_component_label(
1067 raidPtr->Disks[row][column].dev,
1068 raidPtr->raid_cinfo[row][column].ci_vp,
1069 clabel );
1070 #endif
1071 return (0);
1072
1073 case RAIDFRAME_INIT_LABELS:
1074 clabel = (RF_ComponentLabel_t *) data;
1075 /*
1076 we only want the serial number from
1077 the above. We get all the rest of the information
1078 from the config that was used to create this RAID
1079 set.
1080 */
1081
1082 raidPtr->serial_number = clabel->serial_number;
1083
1084 raid_init_component_label(raidPtr, &ci_label);
1085 ci_label.serial_number = clabel->serial_number;
1086
1087 for(row=0;row<raidPtr->numRow;row++) {
1088 ci_label.row = row;
1089 for(column=0;column<raidPtr->numCol;column++) {
1090 diskPtr = &raidPtr->Disks[row][column];
1091 if (!RF_DEAD_DISK(diskPtr->status)) {
1092 ci_label.partitionSize = diskPtr->partitionSize;
1093 ci_label.column = column;
1094 raidwrite_component_label(
1095 raidPtr->Disks[row][column].dev,
1096 raidPtr->raid_cinfo[row][column].ci_vp,
1097 &ci_label );
1098 }
1099 }
1100 }
1101
1102 return (retcode);
1103 case RAIDFRAME_SET_AUTOCONFIG:
1104 d = rf_set_autoconfig(raidPtr, *(int *) data);
1105 printf("raid%d: New autoconfig value is: %d\n",
1106 raidPtr->raidid, d);
1107 *(int *) data = d;
1108 return (retcode);
1109
1110 case RAIDFRAME_SET_ROOT:
1111 d = rf_set_rootpartition(raidPtr, *(int *) data);
1112 printf("raid%d: New rootpartition value is: %d\n",
1113 raidPtr->raidid, d);
1114 *(int *) data = d;
1115 return (retcode);
1116
1117 /* initialize all parity */
1118 case RAIDFRAME_REWRITEPARITY:
1119
1120 if (raidPtr->Layout.map->faultsTolerated == 0) {
1121 /* Parity for RAID 0 is trivially correct */
1122 raidPtr->parity_good = RF_RAID_CLEAN;
1123 return(0);
1124 }
1125
1126 if (raidPtr->parity_rewrite_in_progress == 1) {
1127 /* Re-write is already in progress! */
1128 return(EINVAL);
1129 }
1130
1131 retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
1132 rf_RewriteParityThread,
1133 raidPtr,"raid_parity");
1134 return (retcode);
1135
1136
1137 case RAIDFRAME_ADD_HOT_SPARE:
1138 sparePtr = (RF_SingleComponent_t *) data;
1139 memcpy( &hot_spare, sparePtr, sizeof(RF_SingleComponent_t));
1140 retcode = rf_add_hot_spare(raidPtr, &hot_spare);
1141 return(retcode);
1142
1143 case RAIDFRAME_REMOVE_HOT_SPARE:
1144 return(retcode);
1145
1146 case RAIDFRAME_DELETE_COMPONENT:
1147 componentPtr = (RF_SingleComponent_t *)data;
1148 memcpy( &component, componentPtr,
1149 sizeof(RF_SingleComponent_t));
1150 retcode = rf_delete_component(raidPtr, &component);
1151 return(retcode);
1152
1153 case RAIDFRAME_INCORPORATE_HOT_SPARE:
1154 componentPtr = (RF_SingleComponent_t *)data;
1155 memcpy( &component, componentPtr,
1156 sizeof(RF_SingleComponent_t));
1157 retcode = rf_incorporate_hot_spare(raidPtr, &component);
1158 return(retcode);
1159
1160 case RAIDFRAME_REBUILD_IN_PLACE:
1161
1162 if (raidPtr->Layout.map->faultsTolerated == 0) {
1163 /* Can't do this on a RAID 0!! */
1164 return(EINVAL);
1165 }
1166
1167 if (raidPtr->recon_in_progress == 1) {
1168 /* a reconstruct is already in progress! */
1169 return(EINVAL);
1170 }
1171
1172 componentPtr = (RF_SingleComponent_t *) data;
1173 memcpy( &component, componentPtr,
1174 sizeof(RF_SingleComponent_t));
1175 row = component.row;
1176 column = component.column;
1177 printf("raid%d: Rebuild: %d %d\n", raidPtr->raidid,
1178 row, column);
1179 if ((row < 0) || (row >= raidPtr->numRow) ||
1180 (column < 0) || (column >= raidPtr->numCol)) {
1181 return(EINVAL);
1182 }
1183
1184 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1185 if (rrcopy == NULL)
1186 return(ENOMEM);
1187
1188 rrcopy->raidPtr = (void *) raidPtr;
1189 rrcopy->row = row;
1190 rrcopy->col = column;
1191
1192 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1193 rf_ReconstructInPlaceThread,
1194 rrcopy,"raid_reconip");
1195 return(retcode);
1196
1197 case RAIDFRAME_GET_INFO:
1198 if (!raidPtr->valid)
1199 return (ENODEV);
1200 ucfgp = (RF_DeviceConfig_t **) data;
1201 RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t),
1202 (RF_DeviceConfig_t *));
1203 if (d_cfg == NULL)
1204 return (ENOMEM);
1205 memset((char *) d_cfg, 0, sizeof(RF_DeviceConfig_t));
1206 d_cfg->rows = raidPtr->numRow;
1207 d_cfg->cols = raidPtr->numCol;
1208 d_cfg->ndevs = raidPtr->numRow * raidPtr->numCol;
1209 if (d_cfg->ndevs >= RF_MAX_DISKS) {
1210 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1211 return (ENOMEM);
1212 }
1213 d_cfg->nspares = raidPtr->numSpare;
1214 if (d_cfg->nspares >= RF_MAX_DISKS) {
1215 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1216 return (ENOMEM);
1217 }
1218 d_cfg->maxqdepth = raidPtr->maxQueueDepth;
1219 d = 0;
1220 for (i = 0; i < d_cfg->rows; i++) {
1221 for (j = 0; j < d_cfg->cols; j++) {
1222 d_cfg->devs[d] = raidPtr->Disks[i][j];
1223 d++;
1224 }
1225 }
1226 for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) {
1227 d_cfg->spares[i] = raidPtr->Disks[0][j];
1228 }
1229 retcode = copyout((caddr_t) d_cfg, (caddr_t) * ucfgp,
1230 sizeof(RF_DeviceConfig_t));
1231 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1232
1233 return (retcode);
1234
1235 case RAIDFRAME_CHECK_PARITY:
1236 *(int *) data = raidPtr->parity_good;
1237 return (0);
1238
1239 case RAIDFRAME_RESET_ACCTOTALS:
1240 memset(&raidPtr->acc_totals, 0, sizeof(raidPtr->acc_totals));
1241 return (0);
1242
1243 case RAIDFRAME_GET_ACCTOTALS:
1244 totals = (RF_AccTotals_t *) data;
1245 *totals = raidPtr->acc_totals;
1246 return (0);
1247
1248 case RAIDFRAME_KEEP_ACCTOTALS:
1249 raidPtr->keep_acc_totals = *(int *)data;
1250 return (0);
1251
1252 case RAIDFRAME_GET_SIZE:
1253 *(int *) data = raidPtr->totalSectors;
1254 return (0);
1255
1256 /* fail a disk & optionally start reconstruction */
1257 case RAIDFRAME_FAIL_DISK:
1258
1259 if (raidPtr->Layout.map->faultsTolerated == 0) {
1260 /* Can't do this on a RAID 0!! */
1261 return(EINVAL);
1262 }
1263
1264 rr = (struct rf_recon_req *) data;
1265
1266 if (rr->row < 0 || rr->row >= raidPtr->numRow
1267 || rr->col < 0 || rr->col >= raidPtr->numCol)
1268 return (EINVAL);
1269
1270 printf("raid%d: Failing the disk: row: %d col: %d\n",
1271 unit, rr->row, rr->col);
1272
1273 /* make a copy of the recon request so that we don't rely on
1274 * the user's buffer */
1275 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1276 if (rrcopy == NULL)
1277 return(ENOMEM);
1278 memcpy(rrcopy, rr, sizeof(*rr));
1279 rrcopy->raidPtr = (void *) raidPtr;
1280
1281 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1282 rf_ReconThread,
1283 rrcopy,"raid_recon");
1284 return (0);
1285
1286 /* invoke a copyback operation after recon on whatever disk
1287 * needs it, if any */
1288 case RAIDFRAME_COPYBACK:
1289
1290 if (raidPtr->Layout.map->faultsTolerated == 0) {
1291 /* This makes no sense on a RAID 0!! */
1292 return(EINVAL);
1293 }
1294
1295 if (raidPtr->copyback_in_progress == 1) {
1296 /* Copyback is already in progress! */
1297 return(EINVAL);
1298 }
1299
1300 retcode = RF_CREATE_THREAD(raidPtr->copyback_thread,
1301 rf_CopybackThread,
1302 raidPtr,"raid_copyback");
1303 return (retcode);
1304
1305 /* return the percentage completion of reconstruction */
1306 case RAIDFRAME_CHECK_RECON_STATUS:
1307 if (raidPtr->Layout.map->faultsTolerated == 0) {
1308 /* This makes no sense on a RAID 0, so tell the
1309 user it's done. */
1310 *(int *) data = 100;
1311 return(0);
1312 }
1313 row = 0; /* XXX we only consider a single row... */
1314 if (raidPtr->status[row] != rf_rs_reconstructing)
1315 *(int *) data = 100;
1316 else
1317 *(int *) data = raidPtr->reconControl[row]->percentComplete;
1318 return (0);
1319 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
1320 progressInfoPtr = (RF_ProgressInfo_t **) data;
1321 row = 0; /* XXX we only consider a single row... */
1322 if (raidPtr->status[row] != rf_rs_reconstructing) {
1323 progressInfo.remaining = 0;
1324 progressInfo.completed = 100;
1325 progressInfo.total = 100;
1326 } else {
1327 progressInfo.total =
1328 raidPtr->reconControl[row]->numRUsTotal;
1329 progressInfo.completed =
1330 raidPtr->reconControl[row]->numRUsComplete;
1331 progressInfo.remaining = progressInfo.total -
1332 progressInfo.completed;
1333 }
1334 retcode = copyout((caddr_t) &progressInfo,
1335 (caddr_t) *progressInfoPtr,
1336 sizeof(RF_ProgressInfo_t));
1337 return (retcode);
1338
1339 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1340 if (raidPtr->Layout.map->faultsTolerated == 0) {
1341 /* This makes no sense on a RAID 0, so tell the
1342 user it's done. */
1343 *(int *) data = 100;
1344 return(0);
1345 }
1346 if (raidPtr->parity_rewrite_in_progress == 1) {
1347 *(int *) data = 100 *
1348 raidPtr->parity_rewrite_stripes_done /
1349 raidPtr->Layout.numStripe;
1350 } else {
1351 *(int *) data = 100;
1352 }
1353 return (0);
1354
1355 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
1356 progressInfoPtr = (RF_ProgressInfo_t **) data;
1357 if (raidPtr->parity_rewrite_in_progress == 1) {
1358 progressInfo.total = raidPtr->Layout.numStripe;
1359 progressInfo.completed =
1360 raidPtr->parity_rewrite_stripes_done;
1361 progressInfo.remaining = progressInfo.total -
1362 progressInfo.completed;
1363 } else {
1364 progressInfo.remaining = 0;
1365 progressInfo.completed = 100;
1366 progressInfo.total = 100;
1367 }
1368 retcode = copyout((caddr_t) &progressInfo,
1369 (caddr_t) *progressInfoPtr,
1370 sizeof(RF_ProgressInfo_t));
1371 return (retcode);
1372
1373 case RAIDFRAME_CHECK_COPYBACK_STATUS:
1374 if (raidPtr->Layout.map->faultsTolerated == 0) {
1375 /* This makes no sense on a RAID 0 */
1376 *(int *) data = 100;
1377 return(0);
1378 }
1379 if (raidPtr->copyback_in_progress == 1) {
1380 *(int *) data = 100 * raidPtr->copyback_stripes_done /
1381 raidPtr->Layout.numStripe;
1382 } else {
1383 *(int *) data = 100;
1384 }
1385 return (0);
1386
1387 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
1388 progressInfoPtr = (RF_ProgressInfo_t **) data;
1389 if (raidPtr->copyback_in_progress == 1) {
1390 progressInfo.total = raidPtr->Layout.numStripe;
1391 progressInfo.completed =
1392 raidPtr->copyback_stripes_done;
1393 progressInfo.remaining = progressInfo.total -
1394 progressInfo.completed;
1395 } else {
1396 progressInfo.remaining = 0;
1397 progressInfo.completed = 100;
1398 progressInfo.total = 100;
1399 }
1400 retcode = copyout((caddr_t) &progressInfo,
1401 (caddr_t) *progressInfoPtr,
1402 sizeof(RF_ProgressInfo_t));
1403 return (retcode);
1404
1405 /* the sparetable daemon calls this to wait for the kernel to
1406 * need a spare table. this ioctl does not return until a
1407 * spare table is needed. XXX -- calling mpsleep here in the
1408 * ioctl code is almost certainly wrong and evil. -- XXX XXX
1409 * -- I should either compute the spare table in the kernel,
1410 * or have a different -- XXX XXX -- interface (a different
1411 * character device) for delivering the table -- XXX */
1412 #if 0
1413 case RAIDFRAME_SPARET_WAIT:
1414 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1415 while (!rf_sparet_wait_queue)
1416 mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE);
1417 waitreq = rf_sparet_wait_queue;
1418 rf_sparet_wait_queue = rf_sparet_wait_queue->next;
1419 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1420
1421 /* structure assignment */
1422 *((RF_SparetWait_t *) data) = *waitreq;
1423
1424 RF_Free(waitreq, sizeof(*waitreq));
1425 return (0);
1426
1427 /* wakes up a process waiting on SPARET_WAIT and puts an error
1428 * code in it that will cause the dameon to exit */
1429 case RAIDFRAME_ABORT_SPARET_WAIT:
1430 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1431 waitreq->fcol = -1;
1432 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1433 waitreq->next = rf_sparet_wait_queue;
1434 rf_sparet_wait_queue = waitreq;
1435 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1436 wakeup(&rf_sparet_wait_queue);
1437 return (0);
1438
1439 /* used by the spare table daemon to deliver a spare table
1440 * into the kernel */
1441 case RAIDFRAME_SEND_SPARET:
1442
1443 /* install the spare table */
1444 retcode = rf_SetSpareTable(raidPtr, *(void **) data);
1445
1446 /* respond to the requestor. the return status of the spare
1447 * table installation is passed in the "fcol" field */
1448 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1449 waitreq->fcol = retcode;
1450 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1451 waitreq->next = rf_sparet_resp_queue;
1452 rf_sparet_resp_queue = waitreq;
1453 wakeup(&rf_sparet_resp_queue);
1454 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1455
1456 return (retcode);
1457 #endif
1458
1459 default:
1460 break; /* fall through to the os-specific code below */
1461
1462 }
1463
1464 if (!raidPtr->valid)
1465 return (EINVAL);
1466
1467 /*
1468 * Add support for "regular" device ioctls here.
1469 */
1470
1471 switch (cmd) {
1472 case DIOCGDINFO:
1473 *(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
1474 break;
1475 #ifdef __HAVE_OLD_DISKLABEL
1476 case ODIOCGDINFO:
1477 newlabel = *(rs->sc_dkdev.dk_label);
1478 if (newlabel.d_npartitions > OLDMAXPARTITIONS)
1479 return ENOTTY;
1480 memcpy(data, &newlabel, sizeof (struct olddisklabel));
1481 break;
1482 #endif
1483
1484 case DIOCGPART:
1485 ((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
1486 ((struct partinfo *) data)->part =
1487 &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
1488 break;
1489
1490 case DIOCWDINFO:
1491 case DIOCSDINFO:
1492 #ifdef __HAVE_OLD_DISKLABEL
1493 case ODIOCWDINFO:
1494 case ODIOCSDINFO:
1495 #endif
1496 {
1497 struct disklabel *lp;
1498 #ifdef __HAVE_OLD_DISKLABEL
1499 if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) {
1500 memset(&newlabel, 0, sizeof newlabel);
1501 memcpy(&newlabel, data, sizeof (struct olddisklabel));
1502 lp = &newlabel;
1503 } else
1504 #endif
1505 lp = (struct disklabel *)data;
1506
1507 if ((error = raidlock(rs)) != 0)
1508 return (error);
1509
1510 rs->sc_flags |= RAIDF_LABELLING;
1511
1512 error = setdisklabel(rs->sc_dkdev.dk_label,
1513 lp, 0, rs->sc_dkdev.dk_cpulabel);
1514 if (error == 0) {
1515 if (cmd == DIOCWDINFO
1516 #ifdef __HAVE_OLD_DISKLABEL
1517 || cmd == ODIOCWDINFO
1518 #endif
1519 )
1520 error = writedisklabel(RAIDLABELDEV(dev),
1521 raidstrategy, rs->sc_dkdev.dk_label,
1522 rs->sc_dkdev.dk_cpulabel);
1523 }
1524 rs->sc_flags &= ~RAIDF_LABELLING;
1525
1526 raidunlock(rs);
1527
1528 if (error)
1529 return (error);
1530 break;
1531 }
1532
1533 case DIOCWLABEL:
1534 if (*(int *) data != 0)
1535 rs->sc_flags |= RAIDF_WLABEL;
1536 else
1537 rs->sc_flags &= ~RAIDF_WLABEL;
1538 break;
1539
1540 case DIOCGDEFLABEL:
1541 raidgetdefaultlabel(raidPtr, rs, (struct disklabel *) data);
1542 break;
1543
1544 #ifdef __HAVE_OLD_DISKLABEL
1545 case ODIOCGDEFLABEL:
1546 raidgetdefaultlabel(raidPtr, rs, &newlabel);
1547 if (newlabel.d_npartitions > OLDMAXPARTITIONS)
1548 return ENOTTY;
1549 memcpy(data, &newlabel, sizeof (struct olddisklabel));
1550 break;
1551 #endif
1552
1553 default:
1554 retcode = ENOTTY;
1555 }
1556 return (retcode);
1557
1558 }
1559
1560
1561 /* raidinit -- complete the rest of the initialization for the
1562 RAIDframe device. */
1563
1564
1565 static void
1566 raidinit(raidPtr)
1567 RF_Raid_t *raidPtr;
1568 {
1569 struct raid_softc *rs;
1570 int unit;
1571
1572 unit = raidPtr->raidid;
1573
1574 rs = &raid_softc[unit];
1575
1576 /* XXX should check return code first... */
1577 rs->sc_flags |= RAIDF_INITED;
1578
1579 sprintf(rs->sc_xname, "raid%d", unit); /* XXX doesn't check bounds. */
1580
1581 rs->sc_dkdev.dk_name = rs->sc_xname;
1582
1583 /* disk_attach actually creates space for the CPU disklabel, among
1584 * other things, so it's critical to call this *BEFORE* we try putzing
1585 * with disklabels. */
1586
1587 disk_attach(&rs->sc_dkdev);
1588
1589 /* XXX There may be a weird interaction here between this, and
1590 * protectedSectors, as used in RAIDframe. */
1591
1592 rs->sc_size = raidPtr->totalSectors;
1593
1594 }
1595
1596 /* wake up the daemon & tell it to get us a spare table
1597 * XXX
1598 * the entries in the queues should be tagged with the raidPtr
1599 * so that in the extremely rare case that two recons happen at once,
1600 * we know for which device were requesting a spare table
1601 * XXX
1602 *
1603 * XXX This code is not currently used. GO
1604 */
1605 int
1606 rf_GetSpareTableFromDaemon(req)
1607 RF_SparetWait_t *req;
1608 {
1609 int retcode;
1610
1611 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1612 req->next = rf_sparet_wait_queue;
1613 rf_sparet_wait_queue = req;
1614 wakeup(&rf_sparet_wait_queue);
1615
1616 /* mpsleep unlocks the mutex */
1617 while (!rf_sparet_resp_queue) {
1618 tsleep(&rf_sparet_resp_queue, PRIBIO,
1619 "raidframe getsparetable", 0);
1620 }
1621 req = rf_sparet_resp_queue;
1622 rf_sparet_resp_queue = req->next;
1623 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1624
1625 retcode = req->fcol;
1626 RF_Free(req, sizeof(*req)); /* this is not the same req as we
1627 * alloc'd */
1628 return (retcode);
1629 }
1630
1631 /* a wrapper around rf_DoAccess that extracts appropriate info from the
1632 * bp & passes it down.
1633 * any calls originating in the kernel must use non-blocking I/O
1634 * do some extra sanity checking to return "appropriate" error values for
1635 * certain conditions (to make some standard utilities work)
1636 *
1637 * Formerly known as: rf_DoAccessKernel
1638 */
1639 void
1640 raidstart(raidPtr)
1641 RF_Raid_t *raidPtr;
1642 {
1643 RF_SectorCount_t num_blocks, pb, sum;
1644 RF_RaidAddr_t raid_addr;
1645 int retcode;
1646 struct partition *pp;
1647 daddr_t blocknum;
1648 int unit;
1649 struct raid_softc *rs;
1650 int do_async;
1651 struct buf *bp;
1652
1653 unit = raidPtr->raidid;
1654 rs = &raid_softc[unit];
1655
1656 /* quick check to see if anything has died recently */
1657 RF_LOCK_MUTEX(raidPtr->mutex);
1658 if (raidPtr->numNewFailures > 0) {
1659 rf_update_component_labels(raidPtr,
1660 RF_NORMAL_COMPONENT_UPDATE);
1661 raidPtr->numNewFailures--;
1662 }
1663
1664 /* Check to see if we're at the limit... */
1665 while (raidPtr->openings > 0) {
1666 RF_UNLOCK_MUTEX(raidPtr->mutex);
1667
1668 /* get the next item, if any, from the queue */
1669 if ((bp = BUFQ_GET(&rs->buf_queue)) == NULL) {
1670 /* nothing more to do */
1671 return;
1672 }
1673
1674 /* Ok, for the bp we have here, bp->b_blkno is relative to the
1675 * partition.. Need to make it absolute to the underlying
1676 * device.. */
1677
1678 blocknum = bp->b_blkno;
1679 if (DISKPART(bp->b_dev) != RAW_PART) {
1680 pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
1681 blocknum += pp->p_offset;
1682 }
1683
1684 db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
1685 (int) blocknum));
1686
1687 db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
1688 db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
1689
1690 /* *THIS* is where we adjust what block we're going to...
1691 * but DO NOT TOUCH bp->b_blkno!!! */
1692 raid_addr = blocknum;
1693
1694 num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
1695 pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
1696 sum = raid_addr + num_blocks + pb;
1697 if (1 || rf_debugKernelAccess) {
1698 db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
1699 (int) raid_addr, (int) sum, (int) num_blocks,
1700 (int) pb, (int) bp->b_resid));
1701 }
1702 if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
1703 || (sum < num_blocks) || (sum < pb)) {
1704 bp->b_error = ENOSPC;
1705 bp->b_flags |= B_ERROR;
1706 bp->b_resid = bp->b_bcount;
1707 biodone(bp);
1708 RF_LOCK_MUTEX(raidPtr->mutex);
1709 continue;
1710 }
1711 /*
1712 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
1713 */
1714
1715 if (bp->b_bcount & raidPtr->sectorMask) {
1716 bp->b_error = EINVAL;
1717 bp->b_flags |= B_ERROR;
1718 bp->b_resid = bp->b_bcount;
1719 biodone(bp);
1720 RF_LOCK_MUTEX(raidPtr->mutex);
1721 continue;
1722
1723 }
1724 db1_printf(("Calling DoAccess..\n"));
1725
1726
1727 RF_LOCK_MUTEX(raidPtr->mutex);
1728 raidPtr->openings--;
1729 RF_UNLOCK_MUTEX(raidPtr->mutex);
1730
1731 /*
1732 * Everything is async.
1733 */
1734 do_async = 1;
1735
1736 disk_busy(&rs->sc_dkdev);
1737
1738 /* XXX we're still at splbio() here... do we *really*
1739 need to be? */
1740
1741 /* don't ever condition on bp->b_flags & B_WRITE.
1742 * always condition on B_READ instead */
1743
1744 retcode = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
1745 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
1746 do_async, raid_addr, num_blocks,
1747 bp->b_data, bp, RF_DAG_NONBLOCKING_IO);
1748
1749 RF_LOCK_MUTEX(raidPtr->mutex);
1750 }
1751 RF_UNLOCK_MUTEX(raidPtr->mutex);
1752 }
1753
1754
1755
1756
1757 /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */
1758
1759 int
1760 rf_DispatchKernelIO(queue, req)
1761 RF_DiskQueue_t *queue;
1762 RF_DiskQueueData_t *req;
1763 {
1764 int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
1765 struct buf *bp;
1766 struct raidbuf *raidbp = NULL;
1767
1768 req->queue = queue;
1769
1770 #if DIAGNOSTIC
1771 if (queue->raidPtr->raidid >= numraid) {
1772 printf("Invalid unit number: %d %d\n", unit, numraid);
1773 panic("Invalid Unit number in rf_DispatchKernelIO\n");
1774 }
1775 #endif
1776
1777 bp = req->bp;
1778 #if 1
1779 /* XXX when there is a physical disk failure, someone is passing us a
1780 * buffer that contains old stuff!! Attempt to deal with this problem
1781 * without taking a performance hit... (not sure where the real bug
1782 * is. It's buried in RAIDframe somewhere) :-( GO ) */
1783
1784 if (bp->b_flags & B_ERROR) {
1785 bp->b_flags &= ~B_ERROR;
1786 }
1787 if (bp->b_error != 0) {
1788 bp->b_error = 0;
1789 }
1790 #endif
1791 raidbp = RAIDGETBUF(rs);
1792
1793 /*
1794 * context for raidiodone
1795 */
1796 raidbp->rf_obp = bp;
1797 raidbp->req = req;
1798
1799 LIST_INIT(&raidbp->rf_buf.b_dep);
1800
1801 switch (req->type) {
1802 case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */
1803 /* XXX need to do something extra here.. */
1804 /* I'm leaving this in, as I've never actually seen it used,
1805 * and I'd like folks to report it... GO */
1806 printf(("WAKEUP CALLED\n"));
1807 queue->numOutstanding++;
1808
1809 /* XXX need to glue the original buffer into this?? */
1810
1811 KernelWakeupFunc(&raidbp->rf_buf);
1812 break;
1813
1814 case RF_IO_TYPE_READ:
1815 case RF_IO_TYPE_WRITE:
1816
1817 if (req->tracerec) {
1818 RF_ETIMER_START(req->tracerec->timer);
1819 }
1820 InitBP(&raidbp->rf_buf, queue->rf_cinfo->ci_vp,
1821 op | bp->b_flags, queue->rf_cinfo->ci_dev,
1822 req->sectorOffset, req->numSector,
1823 req->buf, KernelWakeupFunc, (void *) req,
1824 queue->raidPtr->logBytesPerSector, req->b_proc);
1825
1826 if (rf_debugKernelAccess) {
1827 db1_printf(("dispatch: bp->b_blkno = %ld\n",
1828 (long) bp->b_blkno));
1829 }
1830 queue->numOutstanding++;
1831 queue->last_deq_sector = req->sectorOffset;
1832 /* acc wouldn't have been let in if there were any pending
1833 * reqs at any other priority */
1834 queue->curPriority = req->priority;
1835
1836 db1_printf(("Going for %c to unit %d row %d col %d\n",
1837 req->type, queue->raidPtr->raidid,
1838 queue->row, queue->col));
1839 db1_printf(("sector %d count %d (%d bytes) %d\n",
1840 (int) req->sectorOffset, (int) req->numSector,
1841 (int) (req->numSector <<
1842 queue->raidPtr->logBytesPerSector),
1843 (int) queue->raidPtr->logBytesPerSector));
1844 if ((raidbp->rf_buf.b_flags & B_READ) == 0) {
1845 raidbp->rf_buf.b_vp->v_numoutput++;
1846 }
1847 VOP_STRATEGY(&raidbp->rf_buf);
1848
1849 break;
1850
1851 default:
1852 panic("bad req->type in rf_DispatchKernelIO");
1853 }
1854 db1_printf(("Exiting from DispatchKernelIO\n"));
1855
1856 return (0);
1857 }
1858 /* this is the callback function associated with a I/O invoked from
1859 kernel code.
1860 */
1861 static void
1862 KernelWakeupFunc(vbp)
1863 struct buf *vbp;
1864 {
1865 RF_DiskQueueData_t *req = NULL;
1866 RF_DiskQueue_t *queue;
1867 struct raidbuf *raidbp = (struct raidbuf *) vbp;
1868 struct buf *bp;
1869 struct raid_softc *rs;
1870 int unit;
1871 int s;
1872
1873 s = splbio();
1874 db1_printf(("recovering the request queue:\n"));
1875 req = raidbp->req;
1876
1877 bp = raidbp->rf_obp;
1878
1879 queue = (RF_DiskQueue_t *) req->queue;
1880
1881 if (raidbp->rf_buf.b_flags & B_ERROR) {
1882 bp->b_flags |= B_ERROR;
1883 bp->b_error = raidbp->rf_buf.b_error ?
1884 raidbp->rf_buf.b_error : EIO;
1885 }
1886
1887 /* XXX methinks this could be wrong... */
1888 #if 1
1889 bp->b_resid = raidbp->rf_buf.b_resid;
1890 #endif
1891
1892 if (req->tracerec) {
1893 RF_ETIMER_STOP(req->tracerec->timer);
1894 RF_ETIMER_EVAL(req->tracerec->timer);
1895 RF_LOCK_MUTEX(rf_tracing_mutex);
1896 req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1897 req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1898 req->tracerec->num_phys_ios++;
1899 RF_UNLOCK_MUTEX(rf_tracing_mutex);
1900 }
1901 bp->b_bcount = raidbp->rf_buf.b_bcount; /* XXXX ?? */
1902
1903 unit = queue->raidPtr->raidid; /* *Much* simpler :-> */
1904
1905
1906 /* XXX Ok, let's get aggressive... If B_ERROR is set, let's go
1907 * ballistic, and mark the component as hosed... */
1908
1909 if (bp->b_flags & B_ERROR) {
1910 /* Mark the disk as dead */
1911 /* but only mark it once... */
1912 if (queue->raidPtr->Disks[queue->row][queue->col].status ==
1913 rf_ds_optimal) {
1914 printf("raid%d: IO Error. Marking %s as failed.\n",
1915 unit, queue->raidPtr->Disks[queue->row][queue->col].devname);
1916 queue->raidPtr->Disks[queue->row][queue->col].status =
1917 rf_ds_failed;
1918 queue->raidPtr->status[queue->row] = rf_rs_degraded;
1919 queue->raidPtr->numFailures++;
1920 queue->raidPtr->numNewFailures++;
1921 } else { /* Disk is already dead... */
1922 /* printf("Disk already marked as dead!\n"); */
1923 }
1924
1925 }
1926
1927 rs = &raid_softc[unit];
1928 RAIDPUTBUF(rs, raidbp);
1929
1930 rf_DiskIOComplete(queue, req, (bp->b_flags & B_ERROR) ? 1 : 0);
1931 (req->CompleteFunc) (req->argument, (bp->b_flags & B_ERROR) ? 1 : 0);
1932
1933 splx(s);
1934 }
1935
1936
1937
1938 /*
1939 * initialize a buf structure for doing an I/O in the kernel.
1940 */
1941 static void
1942 InitBP(bp, b_vp, rw_flag, dev, startSect, numSect, buf, cbFunc, cbArg,
1943 logBytesPerSector, b_proc)
1944 struct buf *bp;
1945 struct vnode *b_vp;
1946 unsigned rw_flag;
1947 dev_t dev;
1948 RF_SectorNum_t startSect;
1949 RF_SectorCount_t numSect;
1950 caddr_t buf;
1951 void (*cbFunc) (struct buf *);
1952 void *cbArg;
1953 int logBytesPerSector;
1954 struct proc *b_proc;
1955 {
1956 /* bp->b_flags = B_PHYS | rw_flag; */
1957 bp->b_flags = B_CALL | rw_flag; /* XXX need B_PHYS here too??? */
1958 bp->b_bcount = numSect << logBytesPerSector;
1959 bp->b_bufsize = bp->b_bcount;
1960 bp->b_error = 0;
1961 bp->b_dev = dev;
1962 bp->b_data = buf;
1963 bp->b_blkno = startSect;
1964 bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */
1965 if (bp->b_bcount == 0) {
1966 panic("bp->b_bcount is zero in InitBP!!\n");
1967 }
1968 bp->b_proc = b_proc;
1969 bp->b_iodone = cbFunc;
1970 bp->b_vp = b_vp;
1971
1972 }
1973
1974 static void
1975 raidgetdefaultlabel(raidPtr, rs, lp)
1976 RF_Raid_t *raidPtr;
1977 struct raid_softc *rs;
1978 struct disklabel *lp;
1979 {
1980 db1_printf(("Building a default label...\n"));
1981 memset(lp, 0, sizeof(*lp));
1982
1983 /* fabricate a label... */
1984 lp->d_secperunit = raidPtr->totalSectors;
1985 lp->d_secsize = raidPtr->bytesPerSector;
1986 lp->d_nsectors = raidPtr->Layout.dataSectorsPerStripe;
1987 lp->d_ntracks = 4 * raidPtr->numCol;
1988 lp->d_ncylinders = raidPtr->totalSectors /
1989 (lp->d_nsectors * lp->d_ntracks);
1990 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
1991
1992 strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
1993 lp->d_type = DTYPE_RAID;
1994 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
1995 lp->d_rpm = 3600;
1996 lp->d_interleave = 1;
1997 lp->d_flags = 0;
1998
1999 lp->d_partitions[RAW_PART].p_offset = 0;
2000 lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors;
2001 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
2002 lp->d_npartitions = RAW_PART + 1;
2003
2004 lp->d_magic = DISKMAGIC;
2005 lp->d_magic2 = DISKMAGIC;
2006 lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
2007
2008 }
2009 /*
2010 * Read the disklabel from the raid device. If one is not present, fake one
2011 * up.
2012 */
2013 static void
2014 raidgetdisklabel(dev)
2015 dev_t dev;
2016 {
2017 int unit = raidunit(dev);
2018 struct raid_softc *rs = &raid_softc[unit];
2019 char *errstring;
2020 struct disklabel *lp = rs->sc_dkdev.dk_label;
2021 struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel;
2022 RF_Raid_t *raidPtr;
2023
2024 db1_printf(("Getting the disklabel...\n"));
2025
2026 memset(clp, 0, sizeof(*clp));
2027
2028 raidPtr = raidPtrs[unit];
2029
2030 raidgetdefaultlabel(raidPtr, rs, lp);
2031
2032 /*
2033 * Call the generic disklabel extraction routine.
2034 */
2035 errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
2036 rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
2037 if (errstring)
2038 raidmakedisklabel(rs);
2039 else {
2040 int i;
2041 struct partition *pp;
2042
2043 /*
2044 * Sanity check whether the found disklabel is valid.
2045 *
2046 * This is necessary since total size of the raid device
2047 * may vary when an interleave is changed even though exactly
2048 * same componets are used, and old disklabel may used
2049 * if that is found.
2050 */
2051 if (lp->d_secperunit != rs->sc_size)
2052 printf("raid%d: WARNING: %s: "
2053 "total sector size in disklabel (%d) != "
2054 "the size of raid (%ld)\n", unit, rs->sc_xname,
2055 lp->d_secperunit, (long) rs->sc_size);
2056 for (i = 0; i < lp->d_npartitions; i++) {
2057 pp = &lp->d_partitions[i];
2058 if (pp->p_offset + pp->p_size > rs->sc_size)
2059 printf("raid%d: WARNING: %s: end of partition `%c' "
2060 "exceeds the size of raid (%ld)\n",
2061 unit, rs->sc_xname, 'a' + i, (long) rs->sc_size);
2062 }
2063 }
2064
2065 }
2066 /*
2067 * Take care of things one might want to take care of in the event
2068 * that a disklabel isn't present.
2069 */
2070 static void
2071 raidmakedisklabel(rs)
2072 struct raid_softc *rs;
2073 {
2074 struct disklabel *lp = rs->sc_dkdev.dk_label;
2075 db1_printf(("Making a label..\n"));
2076
2077 /*
2078 * For historical reasons, if there's no disklabel present
2079 * the raw partition must be marked FS_BSDFFS.
2080 */
2081
2082 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
2083
2084 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
2085
2086 lp->d_checksum = dkcksum(lp);
2087 }
2088 /*
2089 * Lookup the provided name in the filesystem. If the file exists,
2090 * is a valid block device, and isn't being used by anyone else,
2091 * set *vpp to the file's vnode.
2092 * You'll find the original of this in ccd.c
2093 */
2094 int
2095 raidlookup(path, p, vpp)
2096 char *path;
2097 struct proc *p;
2098 struct vnode **vpp; /* result */
2099 {
2100 struct nameidata nd;
2101 struct vnode *vp;
2102 struct vattr va;
2103 int error;
2104
2105 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, path, p);
2106 if ((error = vn_open(&nd, FREAD | FWRITE, 0)) != 0) {
2107 #if 0
2108 printf("RAIDframe: vn_open returned %d\n", error);
2109 #endif
2110 return (error);
2111 }
2112 vp = nd.ni_vp;
2113 if (vp->v_usecount > 1) {
2114 VOP_UNLOCK(vp, 0);
2115 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2116 return (EBUSY);
2117 }
2118 if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)) != 0) {
2119 VOP_UNLOCK(vp, 0);
2120 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2121 return (error);
2122 }
2123 /* XXX: eventually we should handle VREG, too. */
2124 if (va.va_type != VBLK) {
2125 VOP_UNLOCK(vp, 0);
2126 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2127 return (ENOTBLK);
2128 }
2129 VOP_UNLOCK(vp, 0);
2130 *vpp = vp;
2131 return (0);
2132 }
2133 /*
2134 * Wait interruptibly for an exclusive lock.
2135 *
2136 * XXX
2137 * Several drivers do this; it should be abstracted and made MP-safe.
2138 * (Hmm... where have we seen this warning before :-> GO )
2139 */
2140 static int
2141 raidlock(rs)
2142 struct raid_softc *rs;
2143 {
2144 int error;
2145
2146 while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
2147 rs->sc_flags |= RAIDF_WANTED;
2148 if ((error =
2149 tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
2150 return (error);
2151 }
2152 rs->sc_flags |= RAIDF_LOCKED;
2153 return (0);
2154 }
2155 /*
2156 * Unlock and wake up any waiters.
2157 */
2158 static void
2159 raidunlock(rs)
2160 struct raid_softc *rs;
2161 {
2162
2163 rs->sc_flags &= ~RAIDF_LOCKED;
2164 if ((rs->sc_flags & RAIDF_WANTED) != 0) {
2165 rs->sc_flags &= ~RAIDF_WANTED;
2166 wakeup(rs);
2167 }
2168 }
2169
2170
2171 #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */
2172 #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */
2173
2174 int
2175 raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter)
2176 {
2177 RF_ComponentLabel_t clabel;
2178 raidread_component_label(dev, b_vp, &clabel);
2179 clabel.mod_counter = mod_counter;
2180 clabel.clean = RF_RAID_CLEAN;
2181 raidwrite_component_label(dev, b_vp, &clabel);
2182 return(0);
2183 }
2184
2185
2186 int
2187 raidmarkdirty(dev_t dev, struct vnode *b_vp, int mod_counter)
2188 {
2189 RF_ComponentLabel_t clabel;
2190 raidread_component_label(dev, b_vp, &clabel);
2191 clabel.mod_counter = mod_counter;
2192 clabel.clean = RF_RAID_DIRTY;
2193 raidwrite_component_label(dev, b_vp, &clabel);
2194 return(0);
2195 }
2196
2197 /* ARGSUSED */
2198 int
2199 raidread_component_label(dev, b_vp, clabel)
2200 dev_t dev;
2201 struct vnode *b_vp;
2202 RF_ComponentLabel_t *clabel;
2203 {
2204 struct buf *bp;
2205 const struct bdevsw *bdev;
2206 int error;
2207
2208 /* XXX should probably ensure that we don't try to do this if
2209 someone has changed rf_protected_sectors. */
2210
2211 if (b_vp == NULL) {
2212 /* For whatever reason, this component is not valid.
2213 Don't try to read a component label from it. */
2214 return(EINVAL);
2215 }
2216
2217 /* get a block of the appropriate size... */
2218 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2219 bp->b_dev = dev;
2220
2221 /* get our ducks in a row for the read */
2222 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2223 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2224 bp->b_flags |= B_READ;
2225 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2226
2227 bdev = bdevsw_lookup(bp->b_dev);
2228 if (bdev == NULL)
2229 return (ENXIO);
2230 (*bdev->d_strategy)(bp);
2231
2232 error = biowait(bp);
2233
2234 if (!error) {
2235 memcpy(clabel, bp->b_data,
2236 sizeof(RF_ComponentLabel_t));
2237 #if 0
2238 rf_print_component_label( clabel );
2239 #endif
2240 } else {
2241 #if 0
2242 printf("Failed to read RAID component label!\n");
2243 #endif
2244 }
2245
2246 brelse(bp);
2247 return(error);
2248 }
2249 /* ARGSUSED */
2250 int
2251 raidwrite_component_label(dev, b_vp, clabel)
2252 dev_t dev;
2253 struct vnode *b_vp;
2254 RF_ComponentLabel_t *clabel;
2255 {
2256 struct buf *bp;
2257 const struct bdevsw *bdev;
2258 int error;
2259
2260 /* get a block of the appropriate size... */
2261 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2262 bp->b_dev = dev;
2263
2264 /* get our ducks in a row for the write */
2265 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2266 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2267 bp->b_flags |= B_WRITE;
2268 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2269
2270 memset(bp->b_data, 0, RF_COMPONENT_INFO_SIZE );
2271
2272 memcpy(bp->b_data, clabel, sizeof(RF_ComponentLabel_t));
2273
2274 bdev = bdevsw_lookup(bp->b_dev);
2275 if (bdev == NULL)
2276 return (ENXIO);
2277 (*bdev->d_strategy)(bp);
2278 error = biowait(bp);
2279 brelse(bp);
2280 if (error) {
2281 #if 1
2282 printf("Failed to write RAID component info!\n");
2283 #endif
2284 }
2285
2286 return(error);
2287 }
2288
2289 void
2290 rf_markalldirty(raidPtr)
2291 RF_Raid_t *raidPtr;
2292 {
2293 RF_ComponentLabel_t clabel;
2294 int r,c;
2295
2296 raidPtr->mod_counter++;
2297 for (r = 0; r < raidPtr->numRow; r++) {
2298 for (c = 0; c < raidPtr->numCol; c++) {
2299 /* we don't want to touch (at all) a disk that has
2300 failed */
2301 if (!RF_DEAD_DISK(raidPtr->Disks[r][c].status)) {
2302 raidread_component_label(
2303 raidPtr->Disks[r][c].dev,
2304 raidPtr->raid_cinfo[r][c].ci_vp,
2305 &clabel);
2306 if (clabel.status == rf_ds_spared) {
2307 /* XXX do something special...
2308 but whatever you do, don't
2309 try to access it!! */
2310 } else {
2311 #if 0
2312 clabel.status =
2313 raidPtr->Disks[r][c].status;
2314 raidwrite_component_label(
2315 raidPtr->Disks[r][c].dev,
2316 raidPtr->raid_cinfo[r][c].ci_vp,
2317 &clabel);
2318 #endif
2319 raidmarkdirty(
2320 raidPtr->Disks[r][c].dev,
2321 raidPtr->raid_cinfo[r][c].ci_vp,
2322 raidPtr->mod_counter);
2323 }
2324 }
2325 }
2326 }
2327 /* printf("Component labels marked dirty.\n"); */
2328 #if 0
2329 for( c = 0; c < raidPtr->numSpare ; c++) {
2330 sparecol = raidPtr->numCol + c;
2331 if (raidPtr->Disks[r][sparecol].status == rf_ds_used_spare) {
2332 /*
2333
2334 XXX this is where we get fancy and map this spare
2335 into it's correct spot in the array.
2336
2337 */
2338 /*
2339
2340 we claim this disk is "optimal" if it's
2341 rf_ds_used_spare, as that means it should be
2342 directly substitutable for the disk it replaced.
2343 We note that too...
2344
2345 */
2346
2347 for(i=0;i<raidPtr->numRow;i++) {
2348 for(j=0;j<raidPtr->numCol;j++) {
2349 if ((raidPtr->Disks[i][j].spareRow ==
2350 r) &&
2351 (raidPtr->Disks[i][j].spareCol ==
2352 sparecol)) {
2353 srow = r;
2354 scol = sparecol;
2355 break;
2356 }
2357 }
2358 }
2359
2360 raidread_component_label(
2361 raidPtr->Disks[r][sparecol].dev,
2362 raidPtr->raid_cinfo[r][sparecol].ci_vp,
2363 &clabel);
2364 /* make sure status is noted */
2365 clabel.version = RF_COMPONENT_LABEL_VERSION;
2366 clabel.mod_counter = raidPtr->mod_counter;
2367 clabel.serial_number = raidPtr->serial_number;
2368 clabel.row = srow;
2369 clabel.column = scol;
2370 clabel.num_rows = raidPtr->numRow;
2371 clabel.num_columns = raidPtr->numCol;
2372 clabel.clean = RF_RAID_DIRTY; /* changed in a bit*/
2373 clabel.status = rf_ds_optimal;
2374 raidwrite_component_label(
2375 raidPtr->Disks[r][sparecol].dev,
2376 raidPtr->raid_cinfo[r][sparecol].ci_vp,
2377 &clabel);
2378 raidmarkclean( raidPtr->Disks[r][sparecol].dev,
2379 raidPtr->raid_cinfo[r][sparecol].ci_vp);
2380 }
2381 }
2382
2383 #endif
2384 }
2385
2386
2387 void
2388 rf_update_component_labels(raidPtr, final)
2389 RF_Raid_t *raidPtr;
2390 int final;
2391 {
2392 RF_ComponentLabel_t clabel;
2393 int sparecol;
2394 int r,c;
2395 int i,j;
2396 int srow, scol;
2397
2398 srow = -1;
2399 scol = -1;
2400
2401 /* XXX should do extra checks to make sure things really are clean,
2402 rather than blindly setting the clean bit... */
2403
2404 raidPtr->mod_counter++;
2405
2406 for (r = 0; r < raidPtr->numRow; r++) {
2407 for (c = 0; c < raidPtr->numCol; c++) {
2408 if (raidPtr->Disks[r][c].status == rf_ds_optimal) {
2409 raidread_component_label(
2410 raidPtr->Disks[r][c].dev,
2411 raidPtr->raid_cinfo[r][c].ci_vp,
2412 &clabel);
2413 /* make sure status is noted */
2414 clabel.status = rf_ds_optimal;
2415 /* bump the counter */
2416 clabel.mod_counter = raidPtr->mod_counter;
2417
2418 raidwrite_component_label(
2419 raidPtr->Disks[r][c].dev,
2420 raidPtr->raid_cinfo[r][c].ci_vp,
2421 &clabel);
2422 if (final == RF_FINAL_COMPONENT_UPDATE) {
2423 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2424 raidmarkclean(
2425 raidPtr->Disks[r][c].dev,
2426 raidPtr->raid_cinfo[r][c].ci_vp,
2427 raidPtr->mod_counter);
2428 }
2429 }
2430 }
2431 /* else we don't touch it.. */
2432 }
2433 }
2434
2435 for( c = 0; c < raidPtr->numSpare ; c++) {
2436 sparecol = raidPtr->numCol + c;
2437 /* Need to ensure that the reconstruct actually completed! */
2438 if (raidPtr->Disks[0][sparecol].status == rf_ds_used_spare) {
2439 /*
2440
2441 we claim this disk is "optimal" if it's
2442 rf_ds_used_spare, as that means it should be
2443 directly substitutable for the disk it replaced.
2444 We note that too...
2445
2446 */
2447
2448 for(i=0;i<raidPtr->numRow;i++) {
2449 for(j=0;j<raidPtr->numCol;j++) {
2450 if ((raidPtr->Disks[i][j].spareRow ==
2451 0) &&
2452 (raidPtr->Disks[i][j].spareCol ==
2453 sparecol)) {
2454 srow = i;
2455 scol = j;
2456 break;
2457 }
2458 }
2459 }
2460
2461 /* XXX shouldn't *really* need this... */
2462 raidread_component_label(
2463 raidPtr->Disks[0][sparecol].dev,
2464 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2465 &clabel);
2466 /* make sure status is noted */
2467
2468 raid_init_component_label(raidPtr, &clabel);
2469
2470 clabel.mod_counter = raidPtr->mod_counter;
2471 clabel.row = srow;
2472 clabel.column = scol;
2473 clabel.status = rf_ds_optimal;
2474
2475 raidwrite_component_label(
2476 raidPtr->Disks[0][sparecol].dev,
2477 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2478 &clabel);
2479 if (final == RF_FINAL_COMPONENT_UPDATE) {
2480 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2481 raidmarkclean( raidPtr->Disks[0][sparecol].dev,
2482 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2483 raidPtr->mod_counter);
2484 }
2485 }
2486 }
2487 }
2488 /* printf("Component labels updated\n"); */
2489 }
2490
2491 void
2492 rf_close_component(raidPtr, vp, auto_configured)
2493 RF_Raid_t *raidPtr;
2494 struct vnode *vp;
2495 int auto_configured;
2496 {
2497 struct proc *p;
2498
2499 p = raidPtr->engine_thread;
2500
2501 if (vp != NULL) {
2502 if (auto_configured == 1) {
2503 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2504 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
2505 vput(vp);
2506
2507 } else {
2508 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2509 }
2510 } else {
2511 #if 0
2512 printf("vnode was NULL\n");
2513 #endif
2514 }
2515 }
2516
2517
2518 void
2519 rf_UnconfigureVnodes(raidPtr)
2520 RF_Raid_t *raidPtr;
2521 {
2522 int r,c;
2523 struct proc *p;
2524 struct vnode *vp;
2525 int acd;
2526
2527
2528 /* We take this opportunity to close the vnodes like we should.. */
2529
2530 p = raidPtr->engine_thread;
2531
2532 for (r = 0; r < raidPtr->numRow; r++) {
2533 for (c = 0; c < raidPtr->numCol; c++) {
2534 #if 0
2535 printf("raid%d: Closing vnode for row: %d col: %d\n",
2536 raidPtr->raidid, r, c);
2537 #endif
2538 vp = raidPtr->raid_cinfo[r][c].ci_vp;
2539 acd = raidPtr->Disks[r][c].auto_configured;
2540 rf_close_component(raidPtr, vp, acd);
2541 raidPtr->raid_cinfo[r][c].ci_vp = NULL;
2542 raidPtr->Disks[r][c].auto_configured = 0;
2543 }
2544 }
2545 for (r = 0; r < raidPtr->numSpare; r++) {
2546 #if 0
2547 printf("raid%d: Closing vnode for spare: %d\n",
2548 raidPtr->raidid, r);
2549 #endif
2550 vp = raidPtr->raid_cinfo[0][raidPtr->numCol + r].ci_vp;
2551 acd = raidPtr->Disks[0][raidPtr->numCol + r].auto_configured;
2552 rf_close_component(raidPtr, vp, acd);
2553 raidPtr->raid_cinfo[0][raidPtr->numCol + r].ci_vp = NULL;
2554 raidPtr->Disks[0][raidPtr->numCol + r].auto_configured = 0;
2555 }
2556 }
2557
2558
2559 void
2560 rf_ReconThread(req)
2561 struct rf_recon_req *req;
2562 {
2563 int s;
2564 RF_Raid_t *raidPtr;
2565
2566 s = splbio();
2567 raidPtr = (RF_Raid_t *) req->raidPtr;
2568 raidPtr->recon_in_progress = 1;
2569
2570 rf_FailDisk((RF_Raid_t *) req->raidPtr, req->row, req->col,
2571 ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
2572
2573 /* XXX get rid of this! we don't need it at all.. */
2574 RF_Free(req, sizeof(*req));
2575
2576 raidPtr->recon_in_progress = 0;
2577 splx(s);
2578
2579 /* That's all... */
2580 kthread_exit(0); /* does not return */
2581 }
2582
2583 void
2584 rf_RewriteParityThread(raidPtr)
2585 RF_Raid_t *raidPtr;
2586 {
2587 int retcode;
2588 int s;
2589
2590 raidPtr->parity_rewrite_in_progress = 1;
2591 s = splbio();
2592 retcode = rf_RewriteParity(raidPtr);
2593 splx(s);
2594 if (retcode) {
2595 printf("raid%d: Error re-writing parity!\n",raidPtr->raidid);
2596 } else {
2597 /* set the clean bit! If we shutdown correctly,
2598 the clean bit on each component label will get
2599 set */
2600 raidPtr->parity_good = RF_RAID_CLEAN;
2601 }
2602 raidPtr->parity_rewrite_in_progress = 0;
2603
2604 /* Anyone waiting for us to stop? If so, inform them... */
2605 if (raidPtr->waitShutdown) {
2606 wakeup(&raidPtr->parity_rewrite_in_progress);
2607 }
2608
2609 /* That's all... */
2610 kthread_exit(0); /* does not return */
2611 }
2612
2613
2614 void
2615 rf_CopybackThread(raidPtr)
2616 RF_Raid_t *raidPtr;
2617 {
2618 int s;
2619
2620 raidPtr->copyback_in_progress = 1;
2621 s = splbio();
2622 rf_CopybackReconstructedData(raidPtr);
2623 splx(s);
2624 raidPtr->copyback_in_progress = 0;
2625
2626 /* That's all... */
2627 kthread_exit(0); /* does not return */
2628 }
2629
2630
2631 void
2632 rf_ReconstructInPlaceThread(req)
2633 struct rf_recon_req *req;
2634 {
2635 int retcode;
2636 int s;
2637 RF_Raid_t *raidPtr;
2638
2639 s = splbio();
2640 raidPtr = req->raidPtr;
2641 raidPtr->recon_in_progress = 1;
2642 retcode = rf_ReconstructInPlace(raidPtr, req->row, req->col);
2643 RF_Free(req, sizeof(*req));
2644 raidPtr->recon_in_progress = 0;
2645 splx(s);
2646
2647 /* That's all... */
2648 kthread_exit(0); /* does not return */
2649 }
2650
2651 RF_AutoConfig_t *
2652 rf_find_raid_components()
2653 {
2654 struct vnode *vp;
2655 struct disklabel label;
2656 struct device *dv;
2657 dev_t dev;
2658 int bmajor;
2659 int error;
2660 int i;
2661 int good_one;
2662 RF_ComponentLabel_t *clabel;
2663 RF_AutoConfig_t *ac_list;
2664 RF_AutoConfig_t *ac;
2665
2666
2667 /* initialize the AutoConfig list */
2668 ac_list = NULL;
2669
2670 /* we begin by trolling through *all* the devices on the system */
2671
2672 for (dv = alldevs.tqh_first; dv != NULL;
2673 dv = dv->dv_list.tqe_next) {
2674
2675 /* we are only interested in disks... */
2676 if (dv->dv_class != DV_DISK)
2677 continue;
2678
2679 /* we don't care about floppies... */
2680 if (!strcmp(dv->dv_cfdata->cf_driver->cd_name,"fd")) {
2681 continue;
2682 }
2683
2684 /* we don't care about CD's... */
2685 if (!strcmp(dv->dv_cfdata->cf_driver->cd_name,"cd")) {
2686 continue;
2687 }
2688
2689 /* hdfd is the Atari/Hades floppy driver */
2690 if (!strcmp(dv->dv_cfdata->cf_driver->cd_name,"hdfd")) {
2691 continue;
2692 }
2693 /* fdisa is the Atari/Milan floppy driver */
2694 if (!strcmp(dv->dv_cfdata->cf_driver->cd_name,"fdisa")) {
2695 continue;
2696 }
2697
2698 /* need to find the device_name_to_block_device_major stuff */
2699 bmajor = devsw_name2blk(dv->dv_xname, NULL, 0);
2700
2701 /* get a vnode for the raw partition of this disk */
2702
2703 dev = MAKEDISKDEV(bmajor, dv->dv_unit, RAW_PART);
2704 if (bdevvp(dev, &vp))
2705 panic("RAID can't alloc vnode");
2706
2707 error = VOP_OPEN(vp, FREAD, NOCRED, 0);
2708
2709 if (error) {
2710 /* "Who cares." Continue looking
2711 for something that exists*/
2712 vput(vp);
2713 continue;
2714 }
2715
2716 /* Ok, the disk exists. Go get the disklabel. */
2717 error = VOP_IOCTL(vp, DIOCGDINFO, (caddr_t)&label,
2718 FREAD, NOCRED, 0);
2719 if (error) {
2720 /*
2721 * XXX can't happen - open() would
2722 * have errored out (or faked up one)
2723 */
2724 printf("can't get label for dev %s%c (%d)!?!?\n",
2725 dv->dv_xname, 'a' + RAW_PART, error);
2726 }
2727
2728 /* don't need this any more. We'll allocate it again
2729 a little later if we really do... */
2730 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2731 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
2732 vput(vp);
2733
2734 for (i=0; i < label.d_npartitions; i++) {
2735 /* We only support partitions marked as RAID */
2736 if (label.d_partitions[i].p_fstype != FS_RAID)
2737 continue;
2738
2739 dev = MAKEDISKDEV(bmajor, dv->dv_unit, i);
2740 if (bdevvp(dev, &vp))
2741 panic("RAID can't alloc vnode");
2742
2743 error = VOP_OPEN(vp, FREAD, NOCRED, 0);
2744 if (error) {
2745 /* Whatever... */
2746 vput(vp);
2747 continue;
2748 }
2749
2750 good_one = 0;
2751
2752 clabel = (RF_ComponentLabel_t *)
2753 malloc(sizeof(RF_ComponentLabel_t),
2754 M_RAIDFRAME, M_NOWAIT);
2755 if (clabel == NULL) {
2756 /* XXX CLEANUP HERE */
2757 printf("RAID auto config: out of memory!\n");
2758 return(NULL); /* XXX probably should panic? */
2759 }
2760
2761 if (!raidread_component_label(dev, vp, clabel)) {
2762 /* Got the label. Does it look reasonable? */
2763 if (rf_reasonable_label(clabel) &&
2764 (clabel->partitionSize <=
2765 label.d_partitions[i].p_size)) {
2766 #if DEBUG
2767 printf("Component on: %s%c: %d\n",
2768 dv->dv_xname, 'a'+i,
2769 label.d_partitions[i].p_size);
2770 rf_print_component_label(clabel);
2771 #endif
2772 /* if it's reasonable, add it,
2773 else ignore it. */
2774 ac = (RF_AutoConfig_t *)
2775 malloc(sizeof(RF_AutoConfig_t),
2776 M_RAIDFRAME,
2777 M_NOWAIT);
2778 if (ac == NULL) {
2779 /* XXX should panic?? */
2780 return(NULL);
2781 }
2782
2783 sprintf(ac->devname, "%s%c",
2784 dv->dv_xname, 'a'+i);
2785 ac->dev = dev;
2786 ac->vp = vp;
2787 ac->clabel = clabel;
2788 ac->next = ac_list;
2789 ac_list = ac;
2790 good_one = 1;
2791 }
2792 }
2793 if (!good_one) {
2794 /* cleanup */
2795 free(clabel, M_RAIDFRAME);
2796 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2797 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
2798 vput(vp);
2799 }
2800 }
2801 }
2802 return(ac_list);
2803 }
2804
2805 static int
2806 rf_reasonable_label(clabel)
2807 RF_ComponentLabel_t *clabel;
2808 {
2809
2810 if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) ||
2811 (clabel->version==RF_COMPONENT_LABEL_VERSION)) &&
2812 ((clabel->clean == RF_RAID_CLEAN) ||
2813 (clabel->clean == RF_RAID_DIRTY)) &&
2814 clabel->row >=0 &&
2815 clabel->column >= 0 &&
2816 clabel->num_rows > 0 &&
2817 clabel->num_columns > 0 &&
2818 clabel->row < clabel->num_rows &&
2819 clabel->column < clabel->num_columns &&
2820 clabel->blockSize > 0 &&
2821 clabel->numBlocks > 0) {
2822 /* label looks reasonable enough... */
2823 return(1);
2824 }
2825 return(0);
2826 }
2827
2828
2829 #if 0
2830 void
2831 rf_print_component_label(clabel)
2832 RF_ComponentLabel_t *clabel;
2833 {
2834 printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
2835 clabel->row, clabel->column,
2836 clabel->num_rows, clabel->num_columns);
2837 printf(" Version: %d Serial Number: %d Mod Counter: %d\n",
2838 clabel->version, clabel->serial_number,
2839 clabel->mod_counter);
2840 printf(" Clean: %s Status: %d\n",
2841 clabel->clean ? "Yes" : "No", clabel->status );
2842 printf(" sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
2843 clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
2844 printf(" RAID Level: %c blocksize: %d numBlocks: %d\n",
2845 (char) clabel->parityConfig, clabel->blockSize,
2846 clabel->numBlocks);
2847 printf(" Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No" );
2848 printf(" Contains root partition: %s\n",
2849 clabel->root_partition ? "Yes" : "No" );
2850 printf(" Last configured as: raid%d\n", clabel->last_unit );
2851 #if 0
2852 printf(" Config order: %d\n", clabel->config_order);
2853 #endif
2854
2855 }
2856 #endif
2857
2858 RF_ConfigSet_t *
2859 rf_create_auto_sets(ac_list)
2860 RF_AutoConfig_t *ac_list;
2861 {
2862 RF_AutoConfig_t *ac;
2863 RF_ConfigSet_t *config_sets;
2864 RF_ConfigSet_t *cset;
2865 RF_AutoConfig_t *ac_next;
2866
2867
2868 config_sets = NULL;
2869
2870 /* Go through the AutoConfig list, and figure out which components
2871 belong to what sets. */
2872 ac = ac_list;
2873 while(ac!=NULL) {
2874 /* we're going to putz with ac->next, so save it here
2875 for use at the end of the loop */
2876 ac_next = ac->next;
2877
2878 if (config_sets == NULL) {
2879 /* will need at least this one... */
2880 config_sets = (RF_ConfigSet_t *)
2881 malloc(sizeof(RF_ConfigSet_t),
2882 M_RAIDFRAME, M_NOWAIT);
2883 if (config_sets == NULL) {
2884 panic("rf_create_auto_sets: No memory!\n");
2885 }
2886 /* this one is easy :) */
2887 config_sets->ac = ac;
2888 config_sets->next = NULL;
2889 config_sets->rootable = 0;
2890 ac->next = NULL;
2891 } else {
2892 /* which set does this component fit into? */
2893 cset = config_sets;
2894 while(cset!=NULL) {
2895 if (rf_does_it_fit(cset, ac)) {
2896 /* looks like it matches... */
2897 ac->next = cset->ac;
2898 cset->ac = ac;
2899 break;
2900 }
2901 cset = cset->next;
2902 }
2903 if (cset==NULL) {
2904 /* didn't find a match above... new set..*/
2905 cset = (RF_ConfigSet_t *)
2906 malloc(sizeof(RF_ConfigSet_t),
2907 M_RAIDFRAME, M_NOWAIT);
2908 if (cset == NULL) {
2909 panic("rf_create_auto_sets: No memory!\n");
2910 }
2911 cset->ac = ac;
2912 ac->next = NULL;
2913 cset->next = config_sets;
2914 cset->rootable = 0;
2915 config_sets = cset;
2916 }
2917 }
2918 ac = ac_next;
2919 }
2920
2921
2922 return(config_sets);
2923 }
2924
2925 static int
2926 rf_does_it_fit(cset, ac)
2927 RF_ConfigSet_t *cset;
2928 RF_AutoConfig_t *ac;
2929 {
2930 RF_ComponentLabel_t *clabel1, *clabel2;
2931
2932 /* If this one matches the *first* one in the set, that's good
2933 enough, since the other members of the set would have been
2934 through here too... */
2935 /* note that we are not checking partitionSize here..
2936
2937 Note that we are also not checking the mod_counters here.
2938 If everything else matches execpt the mod_counter, that's
2939 good enough for this test. We will deal with the mod_counters
2940 a little later in the autoconfiguration process.
2941
2942 (clabel1->mod_counter == clabel2->mod_counter) &&
2943
2944 The reason we don't check for this is that failed disks
2945 will have lower modification counts. If those disks are
2946 not added to the set they used to belong to, then they will
2947 form their own set, which may result in 2 different sets,
2948 for example, competing to be configured at raid0, and
2949 perhaps competing to be the root filesystem set. If the
2950 wrong ones get configured, or both attempt to become /,
2951 weird behaviour and or serious lossage will occur. Thus we
2952 need to bring them into the fold here, and kick them out at
2953 a later point.
2954
2955 */
2956
2957 clabel1 = cset->ac->clabel;
2958 clabel2 = ac->clabel;
2959 if ((clabel1->version == clabel2->version) &&
2960 (clabel1->serial_number == clabel2->serial_number) &&
2961 (clabel1->num_rows == clabel2->num_rows) &&
2962 (clabel1->num_columns == clabel2->num_columns) &&
2963 (clabel1->sectPerSU == clabel2->sectPerSU) &&
2964 (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
2965 (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
2966 (clabel1->parityConfig == clabel2->parityConfig) &&
2967 (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
2968 (clabel1->blockSize == clabel2->blockSize) &&
2969 (clabel1->numBlocks == clabel2->numBlocks) &&
2970 (clabel1->autoconfigure == clabel2->autoconfigure) &&
2971 (clabel1->root_partition == clabel2->root_partition) &&
2972 (clabel1->last_unit == clabel2->last_unit) &&
2973 (clabel1->config_order == clabel2->config_order)) {
2974 /* if it get's here, it almost *has* to be a match */
2975 } else {
2976 /* it's not consistent with somebody in the set..
2977 punt */
2978 return(0);
2979 }
2980 /* all was fine.. it must fit... */
2981 return(1);
2982 }
2983
2984 int
2985 rf_have_enough_components(cset)
2986 RF_ConfigSet_t *cset;
2987 {
2988 RF_AutoConfig_t *ac;
2989 RF_AutoConfig_t *auto_config;
2990 RF_ComponentLabel_t *clabel;
2991 int r,c;
2992 int num_rows;
2993 int num_cols;
2994 int num_missing;
2995 int mod_counter;
2996 int mod_counter_found;
2997 int even_pair_failed;
2998 char parity_type;
2999
3000
3001 /* check to see that we have enough 'live' components
3002 of this set. If so, we can configure it if necessary */
3003
3004 num_rows = cset->ac->clabel->num_rows;
3005 num_cols = cset->ac->clabel->num_columns;
3006 parity_type = cset->ac->clabel->parityConfig;
3007
3008 /* XXX Check for duplicate components!?!?!? */
3009
3010 /* Determine what the mod_counter is supposed to be for this set. */
3011
3012 mod_counter_found = 0;
3013 mod_counter = 0;
3014 ac = cset->ac;
3015 while(ac!=NULL) {
3016 if (mod_counter_found==0) {
3017 mod_counter = ac->clabel->mod_counter;
3018 mod_counter_found = 1;
3019 } else {
3020 if (ac->clabel->mod_counter > mod_counter) {
3021 mod_counter = ac->clabel->mod_counter;
3022 }
3023 }
3024 ac = ac->next;
3025 }
3026
3027 num_missing = 0;
3028 auto_config = cset->ac;
3029
3030 for(r=0; r<num_rows; r++) {
3031 even_pair_failed = 0;
3032 for(c=0; c<num_cols; c++) {
3033 ac = auto_config;
3034 while(ac!=NULL) {
3035 if ((ac->clabel->row == r) &&
3036 (ac->clabel->column == c) &&
3037 (ac->clabel->mod_counter == mod_counter)) {
3038 /* it's this one... */
3039 #if DEBUG
3040 printf("Found: %s at %d,%d\n",
3041 ac->devname,r,c);
3042 #endif
3043 break;
3044 }
3045 ac=ac->next;
3046 }
3047 if (ac==NULL) {
3048 /* Didn't find one here! */
3049 /* special case for RAID 1, especially
3050 where there are more than 2
3051 components (where RAIDframe treats
3052 things a little differently :( ) */
3053 if (parity_type == '1') {
3054 if (c%2 == 0) { /* even component */
3055 even_pair_failed = 1;
3056 } else { /* odd component. If
3057 we're failed, and
3058 so is the even
3059 component, it's
3060 "Good Night, Charlie" */
3061 if (even_pair_failed == 1) {
3062 return(0);
3063 }
3064 }
3065 } else {
3066 /* normal accounting */
3067 num_missing++;
3068 }
3069 }
3070 if ((parity_type == '1') && (c%2 == 1)) {
3071 /* Just did an even component, and we didn't
3072 bail.. reset the even_pair_failed flag,
3073 and go on to the next component.... */
3074 even_pair_failed = 0;
3075 }
3076 }
3077 }
3078
3079 clabel = cset->ac->clabel;
3080
3081 if (((clabel->parityConfig == '0') && (num_missing > 0)) ||
3082 ((clabel->parityConfig == '4') && (num_missing > 1)) ||
3083 ((clabel->parityConfig == '5') && (num_missing > 1))) {
3084 /* XXX this needs to be made *much* more general */
3085 /* Too many failures */
3086 return(0);
3087 }
3088 /* otherwise, all is well, and we've got enough to take a kick
3089 at autoconfiguring this set */
3090 return(1);
3091 }
3092
3093 void
3094 rf_create_configuration(ac,config,raidPtr)
3095 RF_AutoConfig_t *ac;
3096 RF_Config_t *config;
3097 RF_Raid_t *raidPtr;
3098 {
3099 RF_ComponentLabel_t *clabel;
3100 int i;
3101
3102 clabel = ac->clabel;
3103
3104 /* 1. Fill in the common stuff */
3105 config->numRow = clabel->num_rows;
3106 config->numCol = clabel->num_columns;
3107 config->numSpare = 0; /* XXX should this be set here? */
3108 config->sectPerSU = clabel->sectPerSU;
3109 config->SUsPerPU = clabel->SUsPerPU;
3110 config->SUsPerRU = clabel->SUsPerRU;
3111 config->parityConfig = clabel->parityConfig;
3112 /* XXX... */
3113 strcpy(config->diskQueueType,"fifo");
3114 config->maxOutstandingDiskReqs = clabel->maxOutstanding;
3115 config->layoutSpecificSize = 0; /* XXX ?? */
3116
3117 while(ac!=NULL) {
3118 /* row/col values will be in range due to the checks
3119 in reasonable_label() */
3120 strcpy(config->devnames[ac->clabel->row][ac->clabel->column],
3121 ac->devname);
3122 ac = ac->next;
3123 }
3124
3125 for(i=0;i<RF_MAXDBGV;i++) {
3126 config->debugVars[i][0] = NULL;
3127 }
3128 }
3129
3130 int
3131 rf_set_autoconfig(raidPtr, new_value)
3132 RF_Raid_t *raidPtr;
3133 int new_value;
3134 {
3135 RF_ComponentLabel_t clabel;
3136 struct vnode *vp;
3137 dev_t dev;
3138 int row, column;
3139
3140 raidPtr->autoconfigure = new_value;
3141 for(row=0; row<raidPtr->numRow; row++) {
3142 for(column=0; column<raidPtr->numCol; column++) {
3143 if (raidPtr->Disks[row][column].status ==
3144 rf_ds_optimal) {
3145 dev = raidPtr->Disks[row][column].dev;
3146 vp = raidPtr->raid_cinfo[row][column].ci_vp;
3147 raidread_component_label(dev, vp, &clabel);
3148 clabel.autoconfigure = new_value;
3149 raidwrite_component_label(dev, vp, &clabel);
3150 }
3151 }
3152 }
3153 return(new_value);
3154 }
3155
3156 int
3157 rf_set_rootpartition(raidPtr, new_value)
3158 RF_Raid_t *raidPtr;
3159 int new_value;
3160 {
3161 RF_ComponentLabel_t clabel;
3162 struct vnode *vp;
3163 dev_t dev;
3164 int row, column;
3165
3166 raidPtr->root_partition = new_value;
3167 for(row=0; row<raidPtr->numRow; row++) {
3168 for(column=0; column<raidPtr->numCol; column++) {
3169 if (raidPtr->Disks[row][column].status ==
3170 rf_ds_optimal) {
3171 dev = raidPtr->Disks[row][column].dev;
3172 vp = raidPtr->raid_cinfo[row][column].ci_vp;
3173 raidread_component_label(dev, vp, &clabel);
3174 clabel.root_partition = new_value;
3175 raidwrite_component_label(dev, vp, &clabel);
3176 }
3177 }
3178 }
3179 return(new_value);
3180 }
3181
3182 void
3183 rf_release_all_vps(cset)
3184 RF_ConfigSet_t *cset;
3185 {
3186 RF_AutoConfig_t *ac;
3187
3188 ac = cset->ac;
3189 while(ac!=NULL) {
3190 /* Close the vp, and give it back */
3191 if (ac->vp) {
3192 vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY);
3193 VOP_CLOSE(ac->vp, FREAD, NOCRED, 0);
3194 vput(ac->vp);
3195 ac->vp = NULL;
3196 }
3197 ac = ac->next;
3198 }
3199 }
3200
3201
3202 void
3203 rf_cleanup_config_set(cset)
3204 RF_ConfigSet_t *cset;
3205 {
3206 RF_AutoConfig_t *ac;
3207 RF_AutoConfig_t *next_ac;
3208
3209 ac = cset->ac;
3210 while(ac!=NULL) {
3211 next_ac = ac->next;
3212 /* nuke the label */
3213 free(ac->clabel, M_RAIDFRAME);
3214 /* cleanup the config structure */
3215 free(ac, M_RAIDFRAME);
3216 /* "next.." */
3217 ac = next_ac;
3218 }
3219 /* and, finally, nuke the config set */
3220 free(cset, M_RAIDFRAME);
3221 }
3222
3223
3224 void
3225 raid_init_component_label(raidPtr, clabel)
3226 RF_Raid_t *raidPtr;
3227 RF_ComponentLabel_t *clabel;
3228 {
3229 /* current version number */
3230 clabel->version = RF_COMPONENT_LABEL_VERSION;
3231 clabel->serial_number = raidPtr->serial_number;
3232 clabel->mod_counter = raidPtr->mod_counter;
3233 clabel->num_rows = raidPtr->numRow;
3234 clabel->num_columns = raidPtr->numCol;
3235 clabel->clean = RF_RAID_DIRTY; /* not clean */
3236 clabel->status = rf_ds_optimal; /* "It's good!" */
3237
3238 clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
3239 clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
3240 clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
3241
3242 clabel->blockSize = raidPtr->bytesPerSector;
3243 clabel->numBlocks = raidPtr->sectorsPerDisk;
3244
3245 /* XXX not portable */
3246 clabel->parityConfig = raidPtr->Layout.map->parityConfig;
3247 clabel->maxOutstanding = raidPtr->maxOutstanding;
3248 clabel->autoconfigure = raidPtr->autoconfigure;
3249 clabel->root_partition = raidPtr->root_partition;
3250 clabel->last_unit = raidPtr->raidid;
3251 clabel->config_order = raidPtr->config_order;
3252 }
3253
3254 int
3255 rf_auto_config_set(cset,unit)
3256 RF_ConfigSet_t *cset;
3257 int *unit;
3258 {
3259 RF_Raid_t *raidPtr;
3260 RF_Config_t *config;
3261 int raidID;
3262 int retcode;
3263
3264 #if DEBUG
3265 printf("RAID autoconfigure\n");
3266 #endif
3267
3268 retcode = 0;
3269 *unit = -1;
3270
3271 /* 1. Create a config structure */
3272
3273 config = (RF_Config_t *)malloc(sizeof(RF_Config_t),
3274 M_RAIDFRAME,
3275 M_NOWAIT);
3276 if (config==NULL) {
3277 printf("Out of mem!?!?\n");
3278 /* XXX do something more intelligent here. */
3279 return(1);
3280 }
3281
3282 memset(config, 0, sizeof(RF_Config_t));
3283
3284 /*
3285 2. Figure out what RAID ID this one is supposed to live at
3286 See if we can get the same RAID dev that it was configured
3287 on last time..
3288 */
3289
3290 raidID = cset->ac->clabel->last_unit;
3291 if ((raidID < 0) || (raidID >= numraid)) {
3292 /* let's not wander off into lala land. */
3293 raidID = numraid - 1;
3294 }
3295 if (raidPtrs[raidID]->valid != 0) {
3296
3297 /*
3298 Nope... Go looking for an alternative...
3299 Start high so we don't immediately use raid0 if that's
3300 not taken.
3301 */
3302
3303 for(raidID = numraid - 1; raidID >= 0; raidID--) {
3304 if (raidPtrs[raidID]->valid == 0) {
3305 /* can use this one! */
3306 break;
3307 }
3308 }
3309 }
3310
3311 if (raidID < 0) {
3312 /* punt... */
3313 printf("Unable to auto configure this set!\n");
3314 printf("(Out of RAID devs!)\n");
3315 return(1);
3316 }
3317
3318 #if DEBUG
3319 printf("Configuring raid%d:\n",raidID);
3320 #endif
3321
3322 raidPtr = raidPtrs[raidID];
3323
3324 /* XXX all this stuff should be done SOMEWHERE ELSE! */
3325 raidPtr->raidid = raidID;
3326 raidPtr->openings = RAIDOUTSTANDING;
3327
3328 /* 3. Build the configuration structure */
3329 rf_create_configuration(cset->ac, config, raidPtr);
3330
3331 /* 4. Do the configuration */
3332 retcode = rf_Configure(raidPtr, config, cset->ac);
3333
3334 if (retcode == 0) {
3335
3336 raidinit(raidPtrs[raidID]);
3337
3338 rf_markalldirty(raidPtrs[raidID]);
3339 raidPtrs[raidID]->autoconfigure = 1; /* XXX do this here? */
3340 if (cset->ac->clabel->root_partition==1) {
3341 /* everything configured just fine. Make a note
3342 that this set is eligible to be root. */
3343 cset->rootable = 1;
3344 /* XXX do this here? */
3345 raidPtrs[raidID]->root_partition = 1;
3346 }
3347 }
3348
3349 /* 5. Cleanup */
3350 free(config, M_RAIDFRAME);
3351
3352 *unit = raidID;
3353 return(retcode);
3354 }
3355
3356 void
3357 rf_disk_unbusy(desc)
3358 RF_RaidAccessDesc_t *desc;
3359 {
3360 struct buf *bp;
3361
3362 bp = (struct buf *)desc->bp;
3363 disk_unbusy(&raid_softc[desc->raidPtr->raidid].sc_dkdev,
3364 (bp->b_bcount - bp->b_resid));
3365 }
3366