rf_netbsdkintf.c revision 1.134 1 /* $NetBSD: rf_netbsdkintf.c,v 1.134 2002/09/22 03:44:42 oster Exp $ */
2 /*-
3 * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
4 * All rights reserved.
5 *
6 * This code is derived from software contributed to The NetBSD Foundation
7 * by Greg Oster; Jason R. Thorpe.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed by the NetBSD
20 * Foundation, Inc. and its contributors.
21 * 4. Neither the name of The NetBSD Foundation nor the names of its
22 * contributors may be used to endorse or promote products derived
23 * from this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
26 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
36 */
37
38 /*
39 * Copyright (c) 1988 University of Utah.
40 * Copyright (c) 1990, 1993
41 * The Regents of the University of California. All rights reserved.
42 *
43 * This code is derived from software contributed to Berkeley by
44 * the Systems Programming Group of the University of Utah Computer
45 * Science Department.
46 *
47 * Redistribution and use in source and binary forms, with or without
48 * modification, are permitted provided that the following conditions
49 * are met:
50 * 1. Redistributions of source code must retain the above copyright
51 * notice, this list of conditions and the following disclaimer.
52 * 2. Redistributions in binary form must reproduce the above copyright
53 * notice, this list of conditions and the following disclaimer in the
54 * documentation and/or other materials provided with the distribution.
55 * 3. All advertising materials mentioning features or use of this software
56 * must display the following acknowledgement:
57 * This product includes software developed by the University of
58 * California, Berkeley and its contributors.
59 * 4. Neither the name of the University nor the names of its contributors
60 * may be used to endorse or promote products derived from this software
61 * without specific prior written permission.
62 *
63 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
64 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
65 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
66 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
67 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
68 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
69 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
70 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
71 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
72 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
73 * SUCH DAMAGE.
74 *
75 * from: Utah $Hdr: cd.c 1.6 90/11/28$
76 *
77 * @(#)cd.c 8.2 (Berkeley) 11/16/93
78 */
79
80
81
82
83 /*
84 * Copyright (c) 1995 Carnegie-Mellon University.
85 * All rights reserved.
86 *
87 * Authors: Mark Holland, Jim Zelenka
88 *
89 * Permission to use, copy, modify and distribute this software and
90 * its documentation is hereby granted, provided that both the copyright
91 * notice and this permission notice appear in all copies of the
92 * software, derivative works or modified versions, and any portions
93 * thereof, and that both notices appear in supporting documentation.
94 *
95 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
96 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
97 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
98 *
99 * Carnegie Mellon requests users of this software to return to
100 *
101 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
102 * School of Computer Science
103 * Carnegie Mellon University
104 * Pittsburgh PA 15213-3890
105 *
106 * any improvements or extensions that they make and grant Carnegie the
107 * rights to redistribute these changes.
108 */
109
110 /***********************************************************
111 *
112 * rf_kintf.c -- the kernel interface routines for RAIDframe
113 *
114 ***********************************************************/
115
116 #include <sys/cdefs.h>
117 __KERNEL_RCSID(0, "$NetBSD: rf_netbsdkintf.c,v 1.134 2002/09/22 03:44:42 oster Exp $");
118
119 #include <sys/param.h>
120 #include <sys/errno.h>
121 #include <sys/pool.h>
122 #include <sys/queue.h>
123 #include <sys/disk.h>
124 #include <sys/device.h>
125 #include <sys/stat.h>
126 #include <sys/ioctl.h>
127 #include <sys/fcntl.h>
128 #include <sys/systm.h>
129 #include <sys/namei.h>
130 #include <sys/vnode.h>
131 #include <sys/disklabel.h>
132 #include <sys/conf.h>
133 #include <sys/lock.h>
134 #include <sys/buf.h>
135 #include <sys/user.h>
136 #include <sys/reboot.h>
137
138 #include <dev/raidframe/raidframevar.h>
139 #include <dev/raidframe/raidframeio.h>
140 #include "raid.h"
141 #include "opt_raid_autoconfig.h"
142 #include "rf_raid.h"
143 #include "rf_copyback.h"
144 #include "rf_dag.h"
145 #include "rf_dagflags.h"
146 #include "rf_desc.h"
147 #include "rf_diskqueue.h"
148 #include "rf_etimer.h"
149 #include "rf_general.h"
150 #include "rf_kintf.h"
151 #include "rf_options.h"
152 #include "rf_driver.h"
153 #include "rf_parityscan.h"
154 #include "rf_threadstuff.h"
155
156 #ifdef DEBUG
157 int rf_kdebug_level = 0;
158 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
159 #else /* DEBUG */
160 #define db1_printf(a) { }
161 #endif /* DEBUG */
162
163 static RF_Raid_t **raidPtrs; /* global raid device descriptors */
164
165 RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex)
166
167 static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a
168 * spare table */
169 static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from
170 * installation process */
171
172 /* prototypes */
173 static void KernelWakeupFunc(struct buf * bp);
174 static void InitBP(struct buf * bp, struct vnode *, unsigned rw_flag,
175 dev_t dev, RF_SectorNum_t startSect,
176 RF_SectorCount_t numSect, caddr_t buf,
177 void (*cbFunc) (struct buf *), void *cbArg,
178 int logBytesPerSector, struct proc * b_proc);
179 static void raidinit(RF_Raid_t *);
180
181 void raidattach(int);
182
183 dev_type_open(raidopen);
184 dev_type_close(raidclose);
185 dev_type_read(raidread);
186 dev_type_write(raidwrite);
187 dev_type_ioctl(raidioctl);
188 dev_type_strategy(raidstrategy);
189 dev_type_dump(raiddump);
190 dev_type_size(raidsize);
191
192 const struct bdevsw raid_bdevsw = {
193 raidopen, raidclose, raidstrategy, raidioctl,
194 raiddump, raidsize, D_DISK
195 };
196
197 const struct cdevsw raid_cdevsw = {
198 raidopen, raidclose, raidread, raidwrite, raidioctl,
199 nostop, notty, nopoll, nommap, D_DISK
200 };
201
202 /*
203 * Pilfered from ccd.c
204 */
205
206 struct raidbuf {
207 struct buf rf_buf; /* new I/O buf. MUST BE FIRST!!! */
208 struct buf *rf_obp; /* ptr. to original I/O buf */
209 int rf_flags; /* misc. flags */
210 RF_DiskQueueData_t *req;/* the request that this was part of.. */
211 };
212
213 /* component buffer pool */
214 struct pool raidframe_cbufpool;
215
216 #define RAIDGETBUF(rs) pool_get(&raidframe_cbufpool, PR_NOWAIT)
217 #define RAIDPUTBUF(rs, cbp) pool_put(&raidframe_cbufpool, cbp)
218
219 /* XXX Not sure if the following should be replacing the raidPtrs above,
220 or if it should be used in conjunction with that...
221 */
222
223 struct raid_softc {
224 int sc_flags; /* flags */
225 int sc_cflags; /* configuration flags */
226 size_t sc_size; /* size of the raid device */
227 char sc_xname[20]; /* XXX external name */
228 struct disk sc_dkdev; /* generic disk device info */
229 struct bufq_state buf_queue; /* used for the device queue */
230 };
231 /* sc_flags */
232 #define RAIDF_INITED 0x01 /* unit has been initialized */
233 #define RAIDF_WLABEL 0x02 /* label area is writable */
234 #define RAIDF_LABELLING 0x04 /* unit is currently being labelled */
235 #define RAIDF_WANTED 0x40 /* someone is waiting to obtain a lock */
236 #define RAIDF_LOCKED 0x80 /* unit is locked */
237
238 #define raidunit(x) DISKUNIT(x)
239 int numraid = 0;
240
241 /*
242 * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
243 * Be aware that large numbers can allow the driver to consume a lot of
244 * kernel memory, especially on writes, and in degraded mode reads.
245 *
246 * For example: with a stripe width of 64 blocks (32k) and 5 disks,
247 * a single 64K write will typically require 64K for the old data,
248 * 64K for the old parity, and 64K for the new parity, for a total
249 * of 192K (if the parity buffer is not re-used immediately).
250 * Even it if is used immediately, that's still 128K, which when multiplied
251 * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
252 *
253 * Now in degraded mode, for example, a 64K read on the above setup may
254 * require data reconstruction, which will require *all* of the 4 remaining
255 * disks to participate -- 4 * 32K/disk == 128K again.
256 */
257
258 #ifndef RAIDOUTSTANDING
259 #define RAIDOUTSTANDING 6
260 #endif
261
262 #define RAIDLABELDEV(dev) \
263 (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
264
265 /* declared here, and made public, for the benefit of KVM stuff.. */
266 struct raid_softc *raid_softc;
267
268 static void raidgetdefaultlabel(RF_Raid_t *, struct raid_softc *,
269 struct disklabel *);
270 static void raidgetdisklabel(dev_t);
271 static void raidmakedisklabel(struct raid_softc *);
272
273 static int raidlock(struct raid_softc *);
274 static void raidunlock(struct raid_softc *);
275
276 static void rf_markalldirty(RF_Raid_t *);
277
278 struct device *raidrootdev;
279
280 void rf_ReconThread(struct rf_recon_req *);
281 /* XXX what I want is: */
282 /*void rf_ReconThread(RF_Raid_t *raidPtr); */
283 void rf_RewriteParityThread(RF_Raid_t *raidPtr);
284 void rf_CopybackThread(RF_Raid_t *raidPtr);
285 void rf_ReconstructInPlaceThread(struct rf_recon_req *);
286 void rf_buildroothack(void *);
287
288 RF_AutoConfig_t *rf_find_raid_components(void);
289 RF_ConfigSet_t *rf_create_auto_sets(RF_AutoConfig_t *);
290 static int rf_does_it_fit(RF_ConfigSet_t *,RF_AutoConfig_t *);
291 static int rf_reasonable_label(RF_ComponentLabel_t *);
292 void rf_create_configuration(RF_AutoConfig_t *,RF_Config_t *, RF_Raid_t *);
293 int rf_set_autoconfig(RF_Raid_t *, int);
294 int rf_set_rootpartition(RF_Raid_t *, int);
295 void rf_release_all_vps(RF_ConfigSet_t *);
296 void rf_cleanup_config_set(RF_ConfigSet_t *);
297 int rf_have_enough_components(RF_ConfigSet_t *);
298 int rf_auto_config_set(RF_ConfigSet_t *, int *);
299
300 static int raidautoconfig = 0; /* Debugging, mostly. Set to 0 to not
301 allow autoconfig to take place.
302 Note that this is overridden by having
303 RAID_AUTOCONFIG as an option in the
304 kernel config file. */
305
306 void
307 raidattach(num)
308 int num;
309 {
310 int raidID;
311 int i, rc;
312 RF_AutoConfig_t *ac_list; /* autoconfig list */
313 RF_ConfigSet_t *config_sets;
314
315 #ifdef DEBUG
316 printf("raidattach: Asked for %d units\n", num);
317 #endif
318
319 if (num <= 0) {
320 #ifdef DIAGNOSTIC
321 panic("raidattach: count <= 0");
322 #endif
323 return;
324 }
325 /* This is where all the initialization stuff gets done. */
326
327 numraid = num;
328
329 /* Make some space for requested number of units... */
330
331 RF_Calloc(raidPtrs, num, sizeof(RF_Raid_t *), (RF_Raid_t **));
332 if (raidPtrs == NULL) {
333 panic("raidPtrs is NULL!!\n");
334 }
335
336 /* Initialize the component buffer pool. */
337 pool_init(&raidframe_cbufpool, sizeof(struct raidbuf), 0,
338 0, 0, "raidpl", NULL);
339
340 rc = rf_mutex_init(&rf_sparet_wait_mutex);
341 if (rc) {
342 RF_PANIC();
343 }
344
345 rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
346
347 for (i = 0; i < num; i++)
348 raidPtrs[i] = NULL;
349 rc = rf_BootRaidframe();
350 if (rc == 0)
351 printf("Kernelized RAIDframe activated\n");
352 else
353 panic("Serious error booting RAID!!\n");
354
355 /* put together some datastructures like the CCD device does.. This
356 * lets us lock the device and what-not when it gets opened. */
357
358 raid_softc = (struct raid_softc *)
359 malloc(num * sizeof(struct raid_softc),
360 M_RAIDFRAME, M_NOWAIT);
361 if (raid_softc == NULL) {
362 printf("WARNING: no memory for RAIDframe driver\n");
363 return;
364 }
365
366 memset(raid_softc, 0, num * sizeof(struct raid_softc));
367
368 raidrootdev = (struct device *)malloc(num * sizeof(struct device),
369 M_RAIDFRAME, M_NOWAIT);
370 if (raidrootdev == NULL) {
371 panic("No memory for RAIDframe driver!!?!?!\n");
372 }
373
374 for (raidID = 0; raidID < num; raidID++) {
375 bufq_alloc(&raid_softc[raidID].buf_queue, BUFQ_FCFS);
376
377 raidrootdev[raidID].dv_class = DV_DISK;
378 raidrootdev[raidID].dv_cfdata = NULL;
379 raidrootdev[raidID].dv_unit = raidID;
380 raidrootdev[raidID].dv_parent = NULL;
381 raidrootdev[raidID].dv_flags = 0;
382 sprintf(raidrootdev[raidID].dv_xname,"raid%d",raidID);
383
384 RF_Calloc(raidPtrs[raidID], 1, sizeof(RF_Raid_t),
385 (RF_Raid_t *));
386 if (raidPtrs[raidID] == NULL) {
387 printf("WARNING: raidPtrs[%d] is NULL\n", raidID);
388 numraid = raidID;
389 return;
390 }
391 }
392
393 #ifdef RAID_AUTOCONFIG
394 raidautoconfig = 1;
395 #endif
396
397 if (raidautoconfig) {
398 /* 1. locate all RAID components on the system */
399
400 #if DEBUG
401 printf("Searching for raid components...\n");
402 #endif
403 ac_list = rf_find_raid_components();
404
405 /* 2. sort them into their respective sets */
406
407 config_sets = rf_create_auto_sets(ac_list);
408
409 /* 3. evaluate each set and configure the valid ones
410 This gets done in rf_buildroothack() */
411
412 /* schedule the creation of the thread to do the
413 "/ on RAID" stuff */
414
415 kthread_create(rf_buildroothack,config_sets);
416
417 }
418
419 }
420
421 void
422 rf_buildroothack(arg)
423 void *arg;
424 {
425 RF_ConfigSet_t *config_sets = arg;
426 RF_ConfigSet_t *cset;
427 RF_ConfigSet_t *next_cset;
428 int retcode;
429 int raidID;
430 int rootID;
431 int num_root;
432
433 rootID = 0;
434 num_root = 0;
435 cset = config_sets;
436 while(cset != NULL ) {
437 next_cset = cset->next;
438 if (rf_have_enough_components(cset) &&
439 cset->ac->clabel->autoconfigure==1) {
440 retcode = rf_auto_config_set(cset,&raidID);
441 if (!retcode) {
442 if (cset->rootable) {
443 rootID = raidID;
444 num_root++;
445 }
446 } else {
447 /* The autoconfig didn't work :( */
448 #if DEBUG
449 printf("Autoconfig failed with code %d for raid%d\n", retcode, raidID);
450 #endif
451 rf_release_all_vps(cset);
452 }
453 } else {
454 /* we're not autoconfiguring this set...
455 release the associated resources */
456 rf_release_all_vps(cset);
457 }
458 /* cleanup */
459 rf_cleanup_config_set(cset);
460 cset = next_cset;
461 }
462
463 /* we found something bootable... */
464
465 if (num_root == 1) {
466 booted_device = &raidrootdev[rootID];
467 } else if (num_root > 1) {
468 /* we can't guess.. require the user to answer... */
469 boothowto |= RB_ASKNAME;
470 }
471 }
472
473
474 int
475 raidsize(dev)
476 dev_t dev;
477 {
478 struct raid_softc *rs;
479 struct disklabel *lp;
480 int part, unit, omask, size;
481
482 unit = raidunit(dev);
483 if (unit >= numraid)
484 return (-1);
485 rs = &raid_softc[unit];
486
487 if ((rs->sc_flags & RAIDF_INITED) == 0)
488 return (-1);
489
490 part = DISKPART(dev);
491 omask = rs->sc_dkdev.dk_openmask & (1 << part);
492 lp = rs->sc_dkdev.dk_label;
493
494 if (omask == 0 && raidopen(dev, 0, S_IFBLK, curproc))
495 return (-1);
496
497 if (lp->d_partitions[part].p_fstype != FS_SWAP)
498 size = -1;
499 else
500 size = lp->d_partitions[part].p_size *
501 (lp->d_secsize / DEV_BSIZE);
502
503 if (omask == 0 && raidclose(dev, 0, S_IFBLK, curproc))
504 return (-1);
505
506 return (size);
507
508 }
509
510 int
511 raiddump(dev, blkno, va, size)
512 dev_t dev;
513 daddr_t blkno;
514 caddr_t va;
515 size_t size;
516 {
517 /* Not implemented. */
518 return ENXIO;
519 }
520 /* ARGSUSED */
521 int
522 raidopen(dev, flags, fmt, p)
523 dev_t dev;
524 int flags, fmt;
525 struct proc *p;
526 {
527 int unit = raidunit(dev);
528 struct raid_softc *rs;
529 struct disklabel *lp;
530 int part, pmask;
531 int error = 0;
532
533 if (unit >= numraid)
534 return (ENXIO);
535 rs = &raid_softc[unit];
536
537 if ((error = raidlock(rs)) != 0)
538 return (error);
539 lp = rs->sc_dkdev.dk_label;
540
541 part = DISKPART(dev);
542 pmask = (1 << part);
543
544 db1_printf(("Opening raid device number: %d partition: %d\n",
545 unit, part));
546
547
548 if ((rs->sc_flags & RAIDF_INITED) &&
549 (rs->sc_dkdev.dk_openmask == 0))
550 raidgetdisklabel(dev);
551
552 /* make sure that this partition exists */
553
554 if (part != RAW_PART) {
555 db1_printf(("Not a raw partition..\n"));
556 if (((rs->sc_flags & RAIDF_INITED) == 0) ||
557 ((part >= lp->d_npartitions) ||
558 (lp->d_partitions[part].p_fstype == FS_UNUSED))) {
559 error = ENXIO;
560 raidunlock(rs);
561 db1_printf(("Bailing out...\n"));
562 return (error);
563 }
564 }
565 /* Prevent this unit from being unconfigured while open. */
566 switch (fmt) {
567 case S_IFCHR:
568 rs->sc_dkdev.dk_copenmask |= pmask;
569 break;
570
571 case S_IFBLK:
572 rs->sc_dkdev.dk_bopenmask |= pmask;
573 break;
574 }
575
576 if ((rs->sc_dkdev.dk_openmask == 0) &&
577 ((rs->sc_flags & RAIDF_INITED) != 0)) {
578 /* First one... mark things as dirty... Note that we *MUST*
579 have done a configure before this. I DO NOT WANT TO BE
580 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
581 THAT THEY BELONG TOGETHER!!!!! */
582 /* XXX should check to see if we're only open for reading
583 here... If so, we needn't do this, but then need some
584 other way of keeping track of what's happened.. */
585
586 rf_markalldirty( raidPtrs[unit] );
587 }
588
589
590 rs->sc_dkdev.dk_openmask =
591 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
592
593 raidunlock(rs);
594
595 return (error);
596
597
598 }
599 /* ARGSUSED */
600 int
601 raidclose(dev, flags, fmt, p)
602 dev_t dev;
603 int flags, fmt;
604 struct proc *p;
605 {
606 int unit = raidunit(dev);
607 struct raid_softc *rs;
608 int error = 0;
609 int part;
610
611 if (unit >= numraid)
612 return (ENXIO);
613 rs = &raid_softc[unit];
614
615 if ((error = raidlock(rs)) != 0)
616 return (error);
617
618 part = DISKPART(dev);
619
620 /* ...that much closer to allowing unconfiguration... */
621 switch (fmt) {
622 case S_IFCHR:
623 rs->sc_dkdev.dk_copenmask &= ~(1 << part);
624 break;
625
626 case S_IFBLK:
627 rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
628 break;
629 }
630 rs->sc_dkdev.dk_openmask =
631 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
632
633 if ((rs->sc_dkdev.dk_openmask == 0) &&
634 ((rs->sc_flags & RAIDF_INITED) != 0)) {
635 /* Last one... device is not unconfigured yet.
636 Device shutdown has taken care of setting the
637 clean bits if RAIDF_INITED is not set
638 mark things as clean... */
639 #if 0
640 printf("Last one on raid%d. Updating status.\n",unit);
641 #endif
642 rf_update_component_labels(raidPtrs[unit],
643 RF_FINAL_COMPONENT_UPDATE);
644 if (doing_shutdown) {
645 /* last one, and we're going down, so
646 lights out for this RAID set too. */
647 error = rf_Shutdown(raidPtrs[unit]);
648
649 /* It's no longer initialized... */
650 rs->sc_flags &= ~RAIDF_INITED;
651
652 /* Detach the disk. */
653 disk_detach(&rs->sc_dkdev);
654 }
655 }
656
657 raidunlock(rs);
658 return (0);
659
660 }
661
662 void
663 raidstrategy(bp)
664 struct buf *bp;
665 {
666 int s;
667
668 unsigned int raidID = raidunit(bp->b_dev);
669 RF_Raid_t *raidPtr;
670 struct raid_softc *rs = &raid_softc[raidID];
671 struct disklabel *lp;
672 int wlabel;
673
674 if ((rs->sc_flags & RAIDF_INITED) ==0) {
675 bp->b_error = ENXIO;
676 bp->b_flags |= B_ERROR;
677 bp->b_resid = bp->b_bcount;
678 biodone(bp);
679 return;
680 }
681 if (raidID >= numraid || !raidPtrs[raidID]) {
682 bp->b_error = ENODEV;
683 bp->b_flags |= B_ERROR;
684 bp->b_resid = bp->b_bcount;
685 biodone(bp);
686 return;
687 }
688 raidPtr = raidPtrs[raidID];
689 if (!raidPtr->valid) {
690 bp->b_error = ENODEV;
691 bp->b_flags |= B_ERROR;
692 bp->b_resid = bp->b_bcount;
693 biodone(bp);
694 return;
695 }
696 if (bp->b_bcount == 0) {
697 db1_printf(("b_bcount is zero..\n"));
698 biodone(bp);
699 return;
700 }
701 lp = rs->sc_dkdev.dk_label;
702
703 /*
704 * Do bounds checking and adjust transfer. If there's an
705 * error, the bounds check will flag that for us.
706 */
707
708 wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
709 if (DISKPART(bp->b_dev) != RAW_PART)
710 if (bounds_check_with_label(bp, lp, wlabel) <= 0) {
711 db1_printf(("Bounds check failed!!:%d %d\n",
712 (int) bp->b_blkno, (int) wlabel));
713 biodone(bp);
714 return;
715 }
716 s = splbio();
717
718 bp->b_resid = 0;
719
720 /* stuff it onto our queue */
721 BUFQ_PUT(&rs->buf_queue, bp);
722
723 raidstart(raidPtrs[raidID]);
724
725 splx(s);
726 }
727 /* ARGSUSED */
728 int
729 raidread(dev, uio, flags)
730 dev_t dev;
731 struct uio *uio;
732 int flags;
733 {
734 int unit = raidunit(dev);
735 struct raid_softc *rs;
736 int part;
737
738 if (unit >= numraid)
739 return (ENXIO);
740 rs = &raid_softc[unit];
741
742 if ((rs->sc_flags & RAIDF_INITED) == 0)
743 return (ENXIO);
744 part = DISKPART(dev);
745
746 db1_printf(("raidread: unit: %d partition: %d\n", unit, part));
747
748 return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
749
750 }
751 /* ARGSUSED */
752 int
753 raidwrite(dev, uio, flags)
754 dev_t dev;
755 struct uio *uio;
756 int flags;
757 {
758 int unit = raidunit(dev);
759 struct raid_softc *rs;
760
761 if (unit >= numraid)
762 return (ENXIO);
763 rs = &raid_softc[unit];
764
765 if ((rs->sc_flags & RAIDF_INITED) == 0)
766 return (ENXIO);
767 db1_printf(("raidwrite\n"));
768 return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
769
770 }
771
772 int
773 raidioctl(dev, cmd, data, flag, p)
774 dev_t dev;
775 u_long cmd;
776 caddr_t data;
777 int flag;
778 struct proc *p;
779 {
780 int unit = raidunit(dev);
781 int error = 0;
782 int part, pmask;
783 struct raid_softc *rs;
784 RF_Config_t *k_cfg, *u_cfg;
785 RF_Raid_t *raidPtr;
786 RF_RaidDisk_t *diskPtr;
787 RF_AccTotals_t *totals;
788 RF_DeviceConfig_t *d_cfg, **ucfgp;
789 u_char *specific_buf;
790 int retcode = 0;
791 int row;
792 int column;
793 int raidid;
794 struct rf_recon_req *rrcopy, *rr;
795 RF_ComponentLabel_t *clabel;
796 RF_ComponentLabel_t ci_label;
797 RF_ComponentLabel_t **clabel_ptr;
798 RF_SingleComponent_t *sparePtr,*componentPtr;
799 RF_SingleComponent_t hot_spare;
800 RF_SingleComponent_t component;
801 RF_ProgressInfo_t progressInfo, **progressInfoPtr;
802 int i, j, d;
803 #ifdef __HAVE_OLD_DISKLABEL
804 struct disklabel newlabel;
805 #endif
806
807 if (unit >= numraid)
808 return (ENXIO);
809 rs = &raid_softc[unit];
810 raidPtr = raidPtrs[unit];
811
812 db1_printf(("raidioctl: %d %d %d %d\n", (int) dev,
813 (int) DISKPART(dev), (int) unit, (int) cmd));
814
815 /* Must be open for writes for these commands... */
816 switch (cmd) {
817 case DIOCSDINFO:
818 case DIOCWDINFO:
819 #ifdef __HAVE_OLD_DISKLABEL
820 case ODIOCWDINFO:
821 case ODIOCSDINFO:
822 #endif
823 case DIOCWLABEL:
824 if ((flag & FWRITE) == 0)
825 return (EBADF);
826 }
827
828 /* Must be initialized for these... */
829 switch (cmd) {
830 case DIOCGDINFO:
831 case DIOCSDINFO:
832 case DIOCWDINFO:
833 #ifdef __HAVE_OLD_DISKLABEL
834 case ODIOCGDINFO:
835 case ODIOCWDINFO:
836 case ODIOCSDINFO:
837 case ODIOCGDEFLABEL:
838 #endif
839 case DIOCGPART:
840 case DIOCWLABEL:
841 case DIOCGDEFLABEL:
842 case RAIDFRAME_SHUTDOWN:
843 case RAIDFRAME_REWRITEPARITY:
844 case RAIDFRAME_GET_INFO:
845 case RAIDFRAME_RESET_ACCTOTALS:
846 case RAIDFRAME_GET_ACCTOTALS:
847 case RAIDFRAME_KEEP_ACCTOTALS:
848 case RAIDFRAME_GET_SIZE:
849 case RAIDFRAME_FAIL_DISK:
850 case RAIDFRAME_COPYBACK:
851 case RAIDFRAME_CHECK_RECON_STATUS:
852 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
853 case RAIDFRAME_GET_COMPONENT_LABEL:
854 case RAIDFRAME_SET_COMPONENT_LABEL:
855 case RAIDFRAME_ADD_HOT_SPARE:
856 case RAIDFRAME_REMOVE_HOT_SPARE:
857 case RAIDFRAME_INIT_LABELS:
858 case RAIDFRAME_REBUILD_IN_PLACE:
859 case RAIDFRAME_CHECK_PARITY:
860 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
861 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
862 case RAIDFRAME_CHECK_COPYBACK_STATUS:
863 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
864 case RAIDFRAME_SET_AUTOCONFIG:
865 case RAIDFRAME_SET_ROOT:
866 case RAIDFRAME_DELETE_COMPONENT:
867 case RAIDFRAME_INCORPORATE_HOT_SPARE:
868 if ((rs->sc_flags & RAIDF_INITED) == 0)
869 return (ENXIO);
870 }
871
872 switch (cmd) {
873
874 /* configure the system */
875 case RAIDFRAME_CONFIGURE:
876
877 if (raidPtr->valid) {
878 /* There is a valid RAID set running on this unit! */
879 printf("raid%d: Device already configured!\n",unit);
880 return(EINVAL);
881 }
882
883 /* copy-in the configuration information */
884 /* data points to a pointer to the configuration structure */
885
886 u_cfg = *((RF_Config_t **) data);
887 RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
888 if (k_cfg == NULL) {
889 return (ENOMEM);
890 }
891 retcode = copyin((caddr_t) u_cfg, (caddr_t) k_cfg,
892 sizeof(RF_Config_t));
893 if (retcode) {
894 RF_Free(k_cfg, sizeof(RF_Config_t));
895 db1_printf(("rf_ioctl: retcode=%d copyin.1\n",
896 retcode));
897 return (retcode);
898 }
899 /* allocate a buffer for the layout-specific data, and copy it
900 * in */
901 if (k_cfg->layoutSpecificSize) {
902 if (k_cfg->layoutSpecificSize > 10000) {
903 /* sanity check */
904 RF_Free(k_cfg, sizeof(RF_Config_t));
905 return (EINVAL);
906 }
907 RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
908 (u_char *));
909 if (specific_buf == NULL) {
910 RF_Free(k_cfg, sizeof(RF_Config_t));
911 return (ENOMEM);
912 }
913 retcode = copyin(k_cfg->layoutSpecific,
914 (caddr_t) specific_buf,
915 k_cfg->layoutSpecificSize);
916 if (retcode) {
917 RF_Free(k_cfg, sizeof(RF_Config_t));
918 RF_Free(specific_buf,
919 k_cfg->layoutSpecificSize);
920 db1_printf(("rf_ioctl: retcode=%d copyin.2\n",
921 retcode));
922 return (retcode);
923 }
924 } else
925 specific_buf = NULL;
926 k_cfg->layoutSpecific = specific_buf;
927
928 /* should do some kind of sanity check on the configuration.
929 * Store the sum of all the bytes in the last byte? */
930
931 /* configure the system */
932
933 /*
934 * Clear the entire RAID descriptor, just to make sure
935 * there is no stale data left in the case of a
936 * reconfiguration
937 */
938 memset((char *) raidPtr, 0, sizeof(RF_Raid_t));
939 raidPtr->raidid = unit;
940
941 retcode = rf_Configure(raidPtr, k_cfg, NULL);
942
943 if (retcode == 0) {
944
945 /* allow this many simultaneous IO's to
946 this RAID device */
947 raidPtr->openings = RAIDOUTSTANDING;
948
949 raidinit(raidPtr);
950 rf_markalldirty(raidPtr);
951 }
952 /* free the buffers. No return code here. */
953 if (k_cfg->layoutSpecificSize) {
954 RF_Free(specific_buf, k_cfg->layoutSpecificSize);
955 }
956 RF_Free(k_cfg, sizeof(RF_Config_t));
957
958 return (retcode);
959
960 /* shutdown the system */
961 case RAIDFRAME_SHUTDOWN:
962
963 if ((error = raidlock(rs)) != 0)
964 return (error);
965
966 /*
967 * If somebody has a partition mounted, we shouldn't
968 * shutdown.
969 */
970
971 part = DISKPART(dev);
972 pmask = (1 << part);
973 if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
974 ((rs->sc_dkdev.dk_bopenmask & pmask) &&
975 (rs->sc_dkdev.dk_copenmask & pmask))) {
976 raidunlock(rs);
977 return (EBUSY);
978 }
979
980 retcode = rf_Shutdown(raidPtr);
981
982 /* It's no longer initialized... */
983 rs->sc_flags &= ~RAIDF_INITED;
984
985 /* Detach the disk. */
986 disk_detach(&rs->sc_dkdev);
987
988 raidunlock(rs);
989
990 return (retcode);
991 case RAIDFRAME_GET_COMPONENT_LABEL:
992 clabel_ptr = (RF_ComponentLabel_t **) data;
993 /* need to read the component label for the disk indicated
994 by row,column in clabel */
995
996 /* For practice, let's get it directly fromdisk, rather
997 than from the in-core copy */
998 RF_Malloc( clabel, sizeof( RF_ComponentLabel_t ),
999 (RF_ComponentLabel_t *));
1000 if (clabel == NULL)
1001 return (ENOMEM);
1002
1003 memset((char *) clabel, 0, sizeof(RF_ComponentLabel_t));
1004
1005 retcode = copyin( *clabel_ptr, clabel,
1006 sizeof(RF_ComponentLabel_t));
1007
1008 if (retcode) {
1009 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
1010 return(retcode);
1011 }
1012
1013 row = clabel->row;
1014 column = clabel->column;
1015
1016 if ((row < 0) || (row >= raidPtr->numRow) ||
1017 (column < 0) || (column >= raidPtr->numCol +
1018 raidPtr->numSpare)) {
1019 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
1020 return(EINVAL);
1021 }
1022
1023 raidread_component_label(raidPtr->Disks[row][column].dev,
1024 raidPtr->raid_cinfo[row][column].ci_vp,
1025 clabel );
1026
1027 retcode = copyout((caddr_t) clabel,
1028 (caddr_t) *clabel_ptr,
1029 sizeof(RF_ComponentLabel_t));
1030 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
1031 return (retcode);
1032
1033 case RAIDFRAME_SET_COMPONENT_LABEL:
1034 clabel = (RF_ComponentLabel_t *) data;
1035
1036 /* XXX check the label for valid stuff... */
1037 /* Note that some things *should not* get modified --
1038 the user should be re-initing the labels instead of
1039 trying to patch things.
1040 */
1041
1042 raidid = raidPtr->raidid;
1043 printf("raid%d: Got component label:\n", raidid);
1044 printf("raid%d: Version: %d\n", raidid, clabel->version);
1045 printf("raid%d: Serial Number: %d\n", raidid, clabel->serial_number);
1046 printf("raid%d: Mod counter: %d\n", raidid, clabel->mod_counter);
1047 printf("raid%d: Row: %d\n", raidid, clabel->row);
1048 printf("raid%d: Column: %d\n", raidid, clabel->column);
1049 printf("raid%d: Num Rows: %d\n", raidid, clabel->num_rows);
1050 printf("raid%d: Num Columns: %d\n", raidid, clabel->num_columns);
1051 printf("raid%d: Clean: %d\n", raidid, clabel->clean);
1052 printf("raid%d: Status: %d\n", raidid, clabel->status);
1053
1054 row = clabel->row;
1055 column = clabel->column;
1056
1057 if ((row < 0) || (row >= raidPtr->numRow) ||
1058 (column < 0) || (column >= raidPtr->numCol)) {
1059 return(EINVAL);
1060 }
1061
1062 /* XXX this isn't allowed to do anything for now :-) */
1063
1064 /* XXX and before it is, we need to fill in the rest
1065 of the fields!?!?!?! */
1066 #if 0
1067 raidwrite_component_label(
1068 raidPtr->Disks[row][column].dev,
1069 raidPtr->raid_cinfo[row][column].ci_vp,
1070 clabel );
1071 #endif
1072 return (0);
1073
1074 case RAIDFRAME_INIT_LABELS:
1075 clabel = (RF_ComponentLabel_t *) data;
1076 /*
1077 we only want the serial number from
1078 the above. We get all the rest of the information
1079 from the config that was used to create this RAID
1080 set.
1081 */
1082
1083 raidPtr->serial_number = clabel->serial_number;
1084
1085 raid_init_component_label(raidPtr, &ci_label);
1086 ci_label.serial_number = clabel->serial_number;
1087
1088 for(row=0;row<raidPtr->numRow;row++) {
1089 ci_label.row = row;
1090 for(column=0;column<raidPtr->numCol;column++) {
1091 diskPtr = &raidPtr->Disks[row][column];
1092 if (!RF_DEAD_DISK(diskPtr->status)) {
1093 ci_label.partitionSize = diskPtr->partitionSize;
1094 ci_label.column = column;
1095 raidwrite_component_label(
1096 raidPtr->Disks[row][column].dev,
1097 raidPtr->raid_cinfo[row][column].ci_vp,
1098 &ci_label );
1099 }
1100 }
1101 }
1102
1103 return (retcode);
1104 case RAIDFRAME_SET_AUTOCONFIG:
1105 d = rf_set_autoconfig(raidPtr, *(int *) data);
1106 printf("raid%d: New autoconfig value is: %d\n",
1107 raidPtr->raidid, d);
1108 *(int *) data = d;
1109 return (retcode);
1110
1111 case RAIDFRAME_SET_ROOT:
1112 d = rf_set_rootpartition(raidPtr, *(int *) data);
1113 printf("raid%d: New rootpartition value is: %d\n",
1114 raidPtr->raidid, d);
1115 *(int *) data = d;
1116 return (retcode);
1117
1118 /* initialize all parity */
1119 case RAIDFRAME_REWRITEPARITY:
1120
1121 if (raidPtr->Layout.map->faultsTolerated == 0) {
1122 /* Parity for RAID 0 is trivially correct */
1123 raidPtr->parity_good = RF_RAID_CLEAN;
1124 return(0);
1125 }
1126
1127 if (raidPtr->parity_rewrite_in_progress == 1) {
1128 /* Re-write is already in progress! */
1129 return(EINVAL);
1130 }
1131
1132 retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
1133 rf_RewriteParityThread,
1134 raidPtr,"raid_parity");
1135 return (retcode);
1136
1137
1138 case RAIDFRAME_ADD_HOT_SPARE:
1139 sparePtr = (RF_SingleComponent_t *) data;
1140 memcpy( &hot_spare, sparePtr, sizeof(RF_SingleComponent_t));
1141 retcode = rf_add_hot_spare(raidPtr, &hot_spare);
1142 return(retcode);
1143
1144 case RAIDFRAME_REMOVE_HOT_SPARE:
1145 return(retcode);
1146
1147 case RAIDFRAME_DELETE_COMPONENT:
1148 componentPtr = (RF_SingleComponent_t *)data;
1149 memcpy( &component, componentPtr,
1150 sizeof(RF_SingleComponent_t));
1151 retcode = rf_delete_component(raidPtr, &component);
1152 return(retcode);
1153
1154 case RAIDFRAME_INCORPORATE_HOT_SPARE:
1155 componentPtr = (RF_SingleComponent_t *)data;
1156 memcpy( &component, componentPtr,
1157 sizeof(RF_SingleComponent_t));
1158 retcode = rf_incorporate_hot_spare(raidPtr, &component);
1159 return(retcode);
1160
1161 case RAIDFRAME_REBUILD_IN_PLACE:
1162
1163 if (raidPtr->Layout.map->faultsTolerated == 0) {
1164 /* Can't do this on a RAID 0!! */
1165 return(EINVAL);
1166 }
1167
1168 if (raidPtr->recon_in_progress == 1) {
1169 /* a reconstruct is already in progress! */
1170 return(EINVAL);
1171 }
1172
1173 componentPtr = (RF_SingleComponent_t *) data;
1174 memcpy( &component, componentPtr,
1175 sizeof(RF_SingleComponent_t));
1176 row = component.row;
1177 column = component.column;
1178 printf("raid%d: Rebuild: %d %d\n", raidPtr->raidid,
1179 row, column);
1180 if ((row < 0) || (row >= raidPtr->numRow) ||
1181 (column < 0) || (column >= raidPtr->numCol)) {
1182 return(EINVAL);
1183 }
1184
1185 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1186 if (rrcopy == NULL)
1187 return(ENOMEM);
1188
1189 rrcopy->raidPtr = (void *) raidPtr;
1190 rrcopy->row = row;
1191 rrcopy->col = column;
1192
1193 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1194 rf_ReconstructInPlaceThread,
1195 rrcopy,"raid_reconip");
1196 return(retcode);
1197
1198 case RAIDFRAME_GET_INFO:
1199 if (!raidPtr->valid)
1200 return (ENODEV);
1201 ucfgp = (RF_DeviceConfig_t **) data;
1202 RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t),
1203 (RF_DeviceConfig_t *));
1204 if (d_cfg == NULL)
1205 return (ENOMEM);
1206 memset((char *) d_cfg, 0, sizeof(RF_DeviceConfig_t));
1207 d_cfg->rows = raidPtr->numRow;
1208 d_cfg->cols = raidPtr->numCol;
1209 d_cfg->ndevs = raidPtr->numRow * raidPtr->numCol;
1210 if (d_cfg->ndevs >= RF_MAX_DISKS) {
1211 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1212 return (ENOMEM);
1213 }
1214 d_cfg->nspares = raidPtr->numSpare;
1215 if (d_cfg->nspares >= RF_MAX_DISKS) {
1216 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1217 return (ENOMEM);
1218 }
1219 d_cfg->maxqdepth = raidPtr->maxQueueDepth;
1220 d = 0;
1221 for (i = 0; i < d_cfg->rows; i++) {
1222 for (j = 0; j < d_cfg->cols; j++) {
1223 d_cfg->devs[d] = raidPtr->Disks[i][j];
1224 d++;
1225 }
1226 }
1227 for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) {
1228 d_cfg->spares[i] = raidPtr->Disks[0][j];
1229 }
1230 retcode = copyout((caddr_t) d_cfg, (caddr_t) * ucfgp,
1231 sizeof(RF_DeviceConfig_t));
1232 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1233
1234 return (retcode);
1235
1236 case RAIDFRAME_CHECK_PARITY:
1237 *(int *) data = raidPtr->parity_good;
1238 return (0);
1239
1240 case RAIDFRAME_RESET_ACCTOTALS:
1241 memset(&raidPtr->acc_totals, 0, sizeof(raidPtr->acc_totals));
1242 return (0);
1243
1244 case RAIDFRAME_GET_ACCTOTALS:
1245 totals = (RF_AccTotals_t *) data;
1246 *totals = raidPtr->acc_totals;
1247 return (0);
1248
1249 case RAIDFRAME_KEEP_ACCTOTALS:
1250 raidPtr->keep_acc_totals = *(int *)data;
1251 return (0);
1252
1253 case RAIDFRAME_GET_SIZE:
1254 *(int *) data = raidPtr->totalSectors;
1255 return (0);
1256
1257 /* fail a disk & optionally start reconstruction */
1258 case RAIDFRAME_FAIL_DISK:
1259
1260 if (raidPtr->Layout.map->faultsTolerated == 0) {
1261 /* Can't do this on a RAID 0!! */
1262 return(EINVAL);
1263 }
1264
1265 rr = (struct rf_recon_req *) data;
1266
1267 if (rr->row < 0 || rr->row >= raidPtr->numRow
1268 || rr->col < 0 || rr->col >= raidPtr->numCol)
1269 return (EINVAL);
1270
1271 printf("raid%d: Failing the disk: row: %d col: %d\n",
1272 unit, rr->row, rr->col);
1273
1274 /* make a copy of the recon request so that we don't rely on
1275 * the user's buffer */
1276 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1277 if (rrcopy == NULL)
1278 return(ENOMEM);
1279 memcpy(rrcopy, rr, sizeof(*rr));
1280 rrcopy->raidPtr = (void *) raidPtr;
1281
1282 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1283 rf_ReconThread,
1284 rrcopy,"raid_recon");
1285 return (0);
1286
1287 /* invoke a copyback operation after recon on whatever disk
1288 * needs it, if any */
1289 case RAIDFRAME_COPYBACK:
1290
1291 if (raidPtr->Layout.map->faultsTolerated == 0) {
1292 /* This makes no sense on a RAID 0!! */
1293 return(EINVAL);
1294 }
1295
1296 if (raidPtr->copyback_in_progress == 1) {
1297 /* Copyback is already in progress! */
1298 return(EINVAL);
1299 }
1300
1301 retcode = RF_CREATE_THREAD(raidPtr->copyback_thread,
1302 rf_CopybackThread,
1303 raidPtr,"raid_copyback");
1304 return (retcode);
1305
1306 /* return the percentage completion of reconstruction */
1307 case RAIDFRAME_CHECK_RECON_STATUS:
1308 if (raidPtr->Layout.map->faultsTolerated == 0) {
1309 /* This makes no sense on a RAID 0, so tell the
1310 user it's done. */
1311 *(int *) data = 100;
1312 return(0);
1313 }
1314 row = 0; /* XXX we only consider a single row... */
1315 if (raidPtr->status[row] != rf_rs_reconstructing)
1316 *(int *) data = 100;
1317 else
1318 *(int *) data = raidPtr->reconControl[row]->percentComplete;
1319 return (0);
1320 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
1321 progressInfoPtr = (RF_ProgressInfo_t **) data;
1322 row = 0; /* XXX we only consider a single row... */
1323 if (raidPtr->status[row] != rf_rs_reconstructing) {
1324 progressInfo.remaining = 0;
1325 progressInfo.completed = 100;
1326 progressInfo.total = 100;
1327 } else {
1328 progressInfo.total =
1329 raidPtr->reconControl[row]->numRUsTotal;
1330 progressInfo.completed =
1331 raidPtr->reconControl[row]->numRUsComplete;
1332 progressInfo.remaining = progressInfo.total -
1333 progressInfo.completed;
1334 }
1335 retcode = copyout((caddr_t) &progressInfo,
1336 (caddr_t) *progressInfoPtr,
1337 sizeof(RF_ProgressInfo_t));
1338 return (retcode);
1339
1340 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1341 if (raidPtr->Layout.map->faultsTolerated == 0) {
1342 /* This makes no sense on a RAID 0, so tell the
1343 user it's done. */
1344 *(int *) data = 100;
1345 return(0);
1346 }
1347 if (raidPtr->parity_rewrite_in_progress == 1) {
1348 *(int *) data = 100 *
1349 raidPtr->parity_rewrite_stripes_done /
1350 raidPtr->Layout.numStripe;
1351 } else {
1352 *(int *) data = 100;
1353 }
1354 return (0);
1355
1356 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
1357 progressInfoPtr = (RF_ProgressInfo_t **) data;
1358 if (raidPtr->parity_rewrite_in_progress == 1) {
1359 progressInfo.total = raidPtr->Layout.numStripe;
1360 progressInfo.completed =
1361 raidPtr->parity_rewrite_stripes_done;
1362 progressInfo.remaining = progressInfo.total -
1363 progressInfo.completed;
1364 } else {
1365 progressInfo.remaining = 0;
1366 progressInfo.completed = 100;
1367 progressInfo.total = 100;
1368 }
1369 retcode = copyout((caddr_t) &progressInfo,
1370 (caddr_t) *progressInfoPtr,
1371 sizeof(RF_ProgressInfo_t));
1372 return (retcode);
1373
1374 case RAIDFRAME_CHECK_COPYBACK_STATUS:
1375 if (raidPtr->Layout.map->faultsTolerated == 0) {
1376 /* This makes no sense on a RAID 0 */
1377 *(int *) data = 100;
1378 return(0);
1379 }
1380 if (raidPtr->copyback_in_progress == 1) {
1381 *(int *) data = 100 * raidPtr->copyback_stripes_done /
1382 raidPtr->Layout.numStripe;
1383 } else {
1384 *(int *) data = 100;
1385 }
1386 return (0);
1387
1388 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
1389 progressInfoPtr = (RF_ProgressInfo_t **) data;
1390 if (raidPtr->copyback_in_progress == 1) {
1391 progressInfo.total = raidPtr->Layout.numStripe;
1392 progressInfo.completed =
1393 raidPtr->copyback_stripes_done;
1394 progressInfo.remaining = progressInfo.total -
1395 progressInfo.completed;
1396 } else {
1397 progressInfo.remaining = 0;
1398 progressInfo.completed = 100;
1399 progressInfo.total = 100;
1400 }
1401 retcode = copyout((caddr_t) &progressInfo,
1402 (caddr_t) *progressInfoPtr,
1403 sizeof(RF_ProgressInfo_t));
1404 return (retcode);
1405
1406 /* the sparetable daemon calls this to wait for the kernel to
1407 * need a spare table. this ioctl does not return until a
1408 * spare table is needed. XXX -- calling mpsleep here in the
1409 * ioctl code is almost certainly wrong and evil. -- XXX XXX
1410 * -- I should either compute the spare table in the kernel,
1411 * or have a different -- XXX XXX -- interface (a different
1412 * character device) for delivering the table -- XXX */
1413 #if 0
1414 case RAIDFRAME_SPARET_WAIT:
1415 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1416 while (!rf_sparet_wait_queue)
1417 mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE);
1418 waitreq = rf_sparet_wait_queue;
1419 rf_sparet_wait_queue = rf_sparet_wait_queue->next;
1420 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1421
1422 /* structure assignment */
1423 *((RF_SparetWait_t *) data) = *waitreq;
1424
1425 RF_Free(waitreq, sizeof(*waitreq));
1426 return (0);
1427
1428 /* wakes up a process waiting on SPARET_WAIT and puts an error
1429 * code in it that will cause the dameon to exit */
1430 case RAIDFRAME_ABORT_SPARET_WAIT:
1431 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1432 waitreq->fcol = -1;
1433 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1434 waitreq->next = rf_sparet_wait_queue;
1435 rf_sparet_wait_queue = waitreq;
1436 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1437 wakeup(&rf_sparet_wait_queue);
1438 return (0);
1439
1440 /* used by the spare table daemon to deliver a spare table
1441 * into the kernel */
1442 case RAIDFRAME_SEND_SPARET:
1443
1444 /* install the spare table */
1445 retcode = rf_SetSpareTable(raidPtr, *(void **) data);
1446
1447 /* respond to the requestor. the return status of the spare
1448 * table installation is passed in the "fcol" field */
1449 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1450 waitreq->fcol = retcode;
1451 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1452 waitreq->next = rf_sparet_resp_queue;
1453 rf_sparet_resp_queue = waitreq;
1454 wakeup(&rf_sparet_resp_queue);
1455 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1456
1457 return (retcode);
1458 #endif
1459
1460 default:
1461 break; /* fall through to the os-specific code below */
1462
1463 }
1464
1465 if (!raidPtr->valid)
1466 return (EINVAL);
1467
1468 /*
1469 * Add support for "regular" device ioctls here.
1470 */
1471
1472 switch (cmd) {
1473 case DIOCGDINFO:
1474 *(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
1475 break;
1476 #ifdef __HAVE_OLD_DISKLABEL
1477 case ODIOCGDINFO:
1478 newlabel = *(rs->sc_dkdev.dk_label);
1479 if (newlabel.d_npartitions > OLDMAXPARTITIONS)
1480 return ENOTTY;
1481 memcpy(data, &newlabel, sizeof (struct olddisklabel));
1482 break;
1483 #endif
1484
1485 case DIOCGPART:
1486 ((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
1487 ((struct partinfo *) data)->part =
1488 &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
1489 break;
1490
1491 case DIOCWDINFO:
1492 case DIOCSDINFO:
1493 #ifdef __HAVE_OLD_DISKLABEL
1494 case ODIOCWDINFO:
1495 case ODIOCSDINFO:
1496 #endif
1497 {
1498 struct disklabel *lp;
1499 #ifdef __HAVE_OLD_DISKLABEL
1500 if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) {
1501 memset(&newlabel, 0, sizeof newlabel);
1502 memcpy(&newlabel, data, sizeof (struct olddisklabel));
1503 lp = &newlabel;
1504 } else
1505 #endif
1506 lp = (struct disklabel *)data;
1507
1508 if ((error = raidlock(rs)) != 0)
1509 return (error);
1510
1511 rs->sc_flags |= RAIDF_LABELLING;
1512
1513 error = setdisklabel(rs->sc_dkdev.dk_label,
1514 lp, 0, rs->sc_dkdev.dk_cpulabel);
1515 if (error == 0) {
1516 if (cmd == DIOCWDINFO
1517 #ifdef __HAVE_OLD_DISKLABEL
1518 || cmd == ODIOCWDINFO
1519 #endif
1520 )
1521 error = writedisklabel(RAIDLABELDEV(dev),
1522 raidstrategy, rs->sc_dkdev.dk_label,
1523 rs->sc_dkdev.dk_cpulabel);
1524 }
1525 rs->sc_flags &= ~RAIDF_LABELLING;
1526
1527 raidunlock(rs);
1528
1529 if (error)
1530 return (error);
1531 break;
1532 }
1533
1534 case DIOCWLABEL:
1535 if (*(int *) data != 0)
1536 rs->sc_flags |= RAIDF_WLABEL;
1537 else
1538 rs->sc_flags &= ~RAIDF_WLABEL;
1539 break;
1540
1541 case DIOCGDEFLABEL:
1542 raidgetdefaultlabel(raidPtr, rs, (struct disklabel *) data);
1543 break;
1544
1545 #ifdef __HAVE_OLD_DISKLABEL
1546 case ODIOCGDEFLABEL:
1547 raidgetdefaultlabel(raidPtr, rs, &newlabel);
1548 if (newlabel.d_npartitions > OLDMAXPARTITIONS)
1549 return ENOTTY;
1550 memcpy(data, &newlabel, sizeof (struct olddisklabel));
1551 break;
1552 #endif
1553
1554 default:
1555 retcode = ENOTTY;
1556 }
1557 return (retcode);
1558
1559 }
1560
1561
1562 /* raidinit -- complete the rest of the initialization for the
1563 RAIDframe device. */
1564
1565
1566 static void
1567 raidinit(raidPtr)
1568 RF_Raid_t *raidPtr;
1569 {
1570 struct raid_softc *rs;
1571 int unit;
1572
1573 unit = raidPtr->raidid;
1574
1575 rs = &raid_softc[unit];
1576
1577 /* XXX should check return code first... */
1578 rs->sc_flags |= RAIDF_INITED;
1579
1580 sprintf(rs->sc_xname, "raid%d", unit); /* XXX doesn't check bounds. */
1581
1582 rs->sc_dkdev.dk_name = rs->sc_xname;
1583
1584 /* disk_attach actually creates space for the CPU disklabel, among
1585 * other things, so it's critical to call this *BEFORE* we try putzing
1586 * with disklabels. */
1587
1588 disk_attach(&rs->sc_dkdev);
1589
1590 /* XXX There may be a weird interaction here between this, and
1591 * protectedSectors, as used in RAIDframe. */
1592
1593 rs->sc_size = raidPtr->totalSectors;
1594
1595 }
1596
1597 /* wake up the daemon & tell it to get us a spare table
1598 * XXX
1599 * the entries in the queues should be tagged with the raidPtr
1600 * so that in the extremely rare case that two recons happen at once,
1601 * we know for which device were requesting a spare table
1602 * XXX
1603 *
1604 * XXX This code is not currently used. GO
1605 */
1606 int
1607 rf_GetSpareTableFromDaemon(req)
1608 RF_SparetWait_t *req;
1609 {
1610 int retcode;
1611
1612 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1613 req->next = rf_sparet_wait_queue;
1614 rf_sparet_wait_queue = req;
1615 wakeup(&rf_sparet_wait_queue);
1616
1617 /* mpsleep unlocks the mutex */
1618 while (!rf_sparet_resp_queue) {
1619 tsleep(&rf_sparet_resp_queue, PRIBIO,
1620 "raidframe getsparetable", 0);
1621 }
1622 req = rf_sparet_resp_queue;
1623 rf_sparet_resp_queue = req->next;
1624 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1625
1626 retcode = req->fcol;
1627 RF_Free(req, sizeof(*req)); /* this is not the same req as we
1628 * alloc'd */
1629 return (retcode);
1630 }
1631
1632 /* a wrapper around rf_DoAccess that extracts appropriate info from the
1633 * bp & passes it down.
1634 * any calls originating in the kernel must use non-blocking I/O
1635 * do some extra sanity checking to return "appropriate" error values for
1636 * certain conditions (to make some standard utilities work)
1637 *
1638 * Formerly known as: rf_DoAccessKernel
1639 */
1640 void
1641 raidstart(raidPtr)
1642 RF_Raid_t *raidPtr;
1643 {
1644 RF_SectorCount_t num_blocks, pb, sum;
1645 RF_RaidAddr_t raid_addr;
1646 int retcode;
1647 struct partition *pp;
1648 daddr_t blocknum;
1649 int unit;
1650 struct raid_softc *rs;
1651 int do_async;
1652 struct buf *bp;
1653
1654 unit = raidPtr->raidid;
1655 rs = &raid_softc[unit];
1656
1657 /* quick check to see if anything has died recently */
1658 RF_LOCK_MUTEX(raidPtr->mutex);
1659 if (raidPtr->numNewFailures > 0) {
1660 rf_update_component_labels(raidPtr,
1661 RF_NORMAL_COMPONENT_UPDATE);
1662 raidPtr->numNewFailures--;
1663 }
1664
1665 /* Check to see if we're at the limit... */
1666 while (raidPtr->openings > 0) {
1667 RF_UNLOCK_MUTEX(raidPtr->mutex);
1668
1669 /* get the next item, if any, from the queue */
1670 if ((bp = BUFQ_GET(&rs->buf_queue)) == NULL) {
1671 /* nothing more to do */
1672 return;
1673 }
1674
1675 /* Ok, for the bp we have here, bp->b_blkno is relative to the
1676 * partition.. Need to make it absolute to the underlying
1677 * device.. */
1678
1679 blocknum = bp->b_blkno;
1680 if (DISKPART(bp->b_dev) != RAW_PART) {
1681 pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
1682 blocknum += pp->p_offset;
1683 }
1684
1685 db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
1686 (int) blocknum));
1687
1688 db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
1689 db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
1690
1691 /* *THIS* is where we adjust what block we're going to...
1692 * but DO NOT TOUCH bp->b_blkno!!! */
1693 raid_addr = blocknum;
1694
1695 num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
1696 pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
1697 sum = raid_addr + num_blocks + pb;
1698 if (1 || rf_debugKernelAccess) {
1699 db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
1700 (int) raid_addr, (int) sum, (int) num_blocks,
1701 (int) pb, (int) bp->b_resid));
1702 }
1703 if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
1704 || (sum < num_blocks) || (sum < pb)) {
1705 bp->b_error = ENOSPC;
1706 bp->b_flags |= B_ERROR;
1707 bp->b_resid = bp->b_bcount;
1708 biodone(bp);
1709 RF_LOCK_MUTEX(raidPtr->mutex);
1710 continue;
1711 }
1712 /*
1713 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
1714 */
1715
1716 if (bp->b_bcount & raidPtr->sectorMask) {
1717 bp->b_error = EINVAL;
1718 bp->b_flags |= B_ERROR;
1719 bp->b_resid = bp->b_bcount;
1720 biodone(bp);
1721 RF_LOCK_MUTEX(raidPtr->mutex);
1722 continue;
1723
1724 }
1725 db1_printf(("Calling DoAccess..\n"));
1726
1727
1728 RF_LOCK_MUTEX(raidPtr->mutex);
1729 raidPtr->openings--;
1730 RF_UNLOCK_MUTEX(raidPtr->mutex);
1731
1732 /*
1733 * Everything is async.
1734 */
1735 do_async = 1;
1736
1737 disk_busy(&rs->sc_dkdev);
1738
1739 /* XXX we're still at splbio() here... do we *really*
1740 need to be? */
1741
1742 /* don't ever condition on bp->b_flags & B_WRITE.
1743 * always condition on B_READ instead */
1744
1745 retcode = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
1746 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
1747 do_async, raid_addr, num_blocks,
1748 bp->b_data, bp, RF_DAG_NONBLOCKING_IO);
1749
1750 RF_LOCK_MUTEX(raidPtr->mutex);
1751 }
1752 RF_UNLOCK_MUTEX(raidPtr->mutex);
1753 }
1754
1755
1756
1757
1758 /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */
1759
1760 int
1761 rf_DispatchKernelIO(queue, req)
1762 RF_DiskQueue_t *queue;
1763 RF_DiskQueueData_t *req;
1764 {
1765 int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
1766 struct buf *bp;
1767 struct raidbuf *raidbp = NULL;
1768
1769 req->queue = queue;
1770
1771 #if DIAGNOSTIC
1772 if (queue->raidPtr->raidid >= numraid) {
1773 printf("Invalid unit number: %d %d\n", unit, numraid);
1774 panic("Invalid Unit number in rf_DispatchKernelIO\n");
1775 }
1776 #endif
1777
1778 bp = req->bp;
1779 #if 1
1780 /* XXX when there is a physical disk failure, someone is passing us a
1781 * buffer that contains old stuff!! Attempt to deal with this problem
1782 * without taking a performance hit... (not sure where the real bug
1783 * is. It's buried in RAIDframe somewhere) :-( GO ) */
1784
1785 if (bp->b_flags & B_ERROR) {
1786 bp->b_flags &= ~B_ERROR;
1787 }
1788 if (bp->b_error != 0) {
1789 bp->b_error = 0;
1790 }
1791 #endif
1792 raidbp = RAIDGETBUF(rs);
1793
1794 raidbp->rf_flags = 0; /* XXX not really used anywhere... */
1795
1796 /*
1797 * context for raidiodone
1798 */
1799 raidbp->rf_obp = bp;
1800 raidbp->req = req;
1801
1802 LIST_INIT(&raidbp->rf_buf.b_dep);
1803
1804 switch (req->type) {
1805 case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */
1806 /* XXX need to do something extra here.. */
1807 /* I'm leaving this in, as I've never actually seen it used,
1808 * and I'd like folks to report it... GO */
1809 printf(("WAKEUP CALLED\n"));
1810 queue->numOutstanding++;
1811
1812 /* XXX need to glue the original buffer into this?? */
1813
1814 KernelWakeupFunc(&raidbp->rf_buf);
1815 break;
1816
1817 case RF_IO_TYPE_READ:
1818 case RF_IO_TYPE_WRITE:
1819
1820 if (req->tracerec) {
1821 RF_ETIMER_START(req->tracerec->timer);
1822 }
1823 InitBP(&raidbp->rf_buf, queue->rf_cinfo->ci_vp,
1824 op | bp->b_flags, queue->rf_cinfo->ci_dev,
1825 req->sectorOffset, req->numSector,
1826 req->buf, KernelWakeupFunc, (void *) req,
1827 queue->raidPtr->logBytesPerSector, req->b_proc);
1828
1829 if (rf_debugKernelAccess) {
1830 db1_printf(("dispatch: bp->b_blkno = %ld\n",
1831 (long) bp->b_blkno));
1832 }
1833 queue->numOutstanding++;
1834 queue->last_deq_sector = req->sectorOffset;
1835 /* acc wouldn't have been let in if there were any pending
1836 * reqs at any other priority */
1837 queue->curPriority = req->priority;
1838
1839 db1_printf(("Going for %c to unit %d row %d col %d\n",
1840 req->type, queue->raidPtr->raidid,
1841 queue->row, queue->col));
1842 db1_printf(("sector %d count %d (%d bytes) %d\n",
1843 (int) req->sectorOffset, (int) req->numSector,
1844 (int) (req->numSector <<
1845 queue->raidPtr->logBytesPerSector),
1846 (int) queue->raidPtr->logBytesPerSector));
1847 if ((raidbp->rf_buf.b_flags & B_READ) == 0) {
1848 raidbp->rf_buf.b_vp->v_numoutput++;
1849 }
1850 VOP_STRATEGY(&raidbp->rf_buf);
1851
1852 break;
1853
1854 default:
1855 panic("bad req->type in rf_DispatchKernelIO");
1856 }
1857 db1_printf(("Exiting from DispatchKernelIO\n"));
1858
1859 return (0);
1860 }
1861 /* this is the callback function associated with a I/O invoked from
1862 kernel code.
1863 */
1864 static void
1865 KernelWakeupFunc(vbp)
1866 struct buf *vbp;
1867 {
1868 RF_DiskQueueData_t *req = NULL;
1869 RF_DiskQueue_t *queue;
1870 struct raidbuf *raidbp = (struct raidbuf *) vbp;
1871 struct buf *bp;
1872 struct raid_softc *rs;
1873 int unit;
1874 int s;
1875
1876 s = splbio();
1877 db1_printf(("recovering the request queue:\n"));
1878 req = raidbp->req;
1879
1880 bp = raidbp->rf_obp;
1881
1882 queue = (RF_DiskQueue_t *) req->queue;
1883
1884 if (raidbp->rf_buf.b_flags & B_ERROR) {
1885 bp->b_flags |= B_ERROR;
1886 bp->b_error = raidbp->rf_buf.b_error ?
1887 raidbp->rf_buf.b_error : EIO;
1888 }
1889
1890 /* XXX methinks this could be wrong... */
1891 #if 1
1892 bp->b_resid = raidbp->rf_buf.b_resid;
1893 #endif
1894
1895 if (req->tracerec) {
1896 RF_ETIMER_STOP(req->tracerec->timer);
1897 RF_ETIMER_EVAL(req->tracerec->timer);
1898 RF_LOCK_MUTEX(rf_tracing_mutex);
1899 req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1900 req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1901 req->tracerec->num_phys_ios++;
1902 RF_UNLOCK_MUTEX(rf_tracing_mutex);
1903 }
1904 bp->b_bcount = raidbp->rf_buf.b_bcount; /* XXXX ?? */
1905
1906 unit = queue->raidPtr->raidid; /* *Much* simpler :-> */
1907
1908
1909 /* XXX Ok, let's get aggressive... If B_ERROR is set, let's go
1910 * ballistic, and mark the component as hosed... */
1911
1912 if (bp->b_flags & B_ERROR) {
1913 /* Mark the disk as dead */
1914 /* but only mark it once... */
1915 if (queue->raidPtr->Disks[queue->row][queue->col].status ==
1916 rf_ds_optimal) {
1917 printf("raid%d: IO Error. Marking %s as failed.\n",
1918 unit, queue->raidPtr->Disks[queue->row][queue->col].devname);
1919 queue->raidPtr->Disks[queue->row][queue->col].status =
1920 rf_ds_failed;
1921 queue->raidPtr->status[queue->row] = rf_rs_degraded;
1922 queue->raidPtr->numFailures++;
1923 queue->raidPtr->numNewFailures++;
1924 } else { /* Disk is already dead... */
1925 /* printf("Disk already marked as dead!\n"); */
1926 }
1927
1928 }
1929
1930 rs = &raid_softc[unit];
1931 RAIDPUTBUF(rs, raidbp);
1932
1933 rf_DiskIOComplete(queue, req, (bp->b_flags & B_ERROR) ? 1 : 0);
1934 (req->CompleteFunc) (req->argument, (bp->b_flags & B_ERROR) ? 1 : 0);
1935
1936 splx(s);
1937 }
1938
1939
1940
1941 /*
1942 * initialize a buf structure for doing an I/O in the kernel.
1943 */
1944 static void
1945 InitBP(bp, b_vp, rw_flag, dev, startSect, numSect, buf, cbFunc, cbArg,
1946 logBytesPerSector, b_proc)
1947 struct buf *bp;
1948 struct vnode *b_vp;
1949 unsigned rw_flag;
1950 dev_t dev;
1951 RF_SectorNum_t startSect;
1952 RF_SectorCount_t numSect;
1953 caddr_t buf;
1954 void (*cbFunc) (struct buf *);
1955 void *cbArg;
1956 int logBytesPerSector;
1957 struct proc *b_proc;
1958 {
1959 /* bp->b_flags = B_PHYS | rw_flag; */
1960 bp->b_flags = B_CALL | rw_flag; /* XXX need B_PHYS here too??? */
1961 bp->b_bcount = numSect << logBytesPerSector;
1962 bp->b_bufsize = bp->b_bcount;
1963 bp->b_error = 0;
1964 bp->b_dev = dev;
1965 bp->b_data = buf;
1966 bp->b_blkno = startSect;
1967 bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */
1968 if (bp->b_bcount == 0) {
1969 panic("bp->b_bcount is zero in InitBP!!\n");
1970 }
1971 bp->b_proc = b_proc;
1972 bp->b_iodone = cbFunc;
1973 bp->b_vp = b_vp;
1974
1975 }
1976
1977 static void
1978 raidgetdefaultlabel(raidPtr, rs, lp)
1979 RF_Raid_t *raidPtr;
1980 struct raid_softc *rs;
1981 struct disklabel *lp;
1982 {
1983 db1_printf(("Building a default label...\n"));
1984 memset(lp, 0, sizeof(*lp));
1985
1986 /* fabricate a label... */
1987 lp->d_secperunit = raidPtr->totalSectors;
1988 lp->d_secsize = raidPtr->bytesPerSector;
1989 lp->d_nsectors = raidPtr->Layout.dataSectorsPerStripe;
1990 lp->d_ntracks = 4 * raidPtr->numCol;
1991 lp->d_ncylinders = raidPtr->totalSectors /
1992 (lp->d_nsectors * lp->d_ntracks);
1993 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
1994
1995 strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
1996 lp->d_type = DTYPE_RAID;
1997 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
1998 lp->d_rpm = 3600;
1999 lp->d_interleave = 1;
2000 lp->d_flags = 0;
2001
2002 lp->d_partitions[RAW_PART].p_offset = 0;
2003 lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors;
2004 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
2005 lp->d_npartitions = RAW_PART + 1;
2006
2007 lp->d_magic = DISKMAGIC;
2008 lp->d_magic2 = DISKMAGIC;
2009 lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
2010
2011 }
2012 /*
2013 * Read the disklabel from the raid device. If one is not present, fake one
2014 * up.
2015 */
2016 static void
2017 raidgetdisklabel(dev)
2018 dev_t dev;
2019 {
2020 int unit = raidunit(dev);
2021 struct raid_softc *rs = &raid_softc[unit];
2022 char *errstring;
2023 struct disklabel *lp = rs->sc_dkdev.dk_label;
2024 struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel;
2025 RF_Raid_t *raidPtr;
2026
2027 db1_printf(("Getting the disklabel...\n"));
2028
2029 memset(clp, 0, sizeof(*clp));
2030
2031 raidPtr = raidPtrs[unit];
2032
2033 raidgetdefaultlabel(raidPtr, rs, lp);
2034
2035 /*
2036 * Call the generic disklabel extraction routine.
2037 */
2038 errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
2039 rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
2040 if (errstring)
2041 raidmakedisklabel(rs);
2042 else {
2043 int i;
2044 struct partition *pp;
2045
2046 /*
2047 * Sanity check whether the found disklabel is valid.
2048 *
2049 * This is necessary since total size of the raid device
2050 * may vary when an interleave is changed even though exactly
2051 * same componets are used, and old disklabel may used
2052 * if that is found.
2053 */
2054 if (lp->d_secperunit != rs->sc_size)
2055 printf("raid%d: WARNING: %s: "
2056 "total sector size in disklabel (%d) != "
2057 "the size of raid (%ld)\n", unit, rs->sc_xname,
2058 lp->d_secperunit, (long) rs->sc_size);
2059 for (i = 0; i < lp->d_npartitions; i++) {
2060 pp = &lp->d_partitions[i];
2061 if (pp->p_offset + pp->p_size > rs->sc_size)
2062 printf("raid%d: WARNING: %s: end of partition `%c' "
2063 "exceeds the size of raid (%ld)\n",
2064 unit, rs->sc_xname, 'a' + i, (long) rs->sc_size);
2065 }
2066 }
2067
2068 }
2069 /*
2070 * Take care of things one might want to take care of in the event
2071 * that a disklabel isn't present.
2072 */
2073 static void
2074 raidmakedisklabel(rs)
2075 struct raid_softc *rs;
2076 {
2077 struct disklabel *lp = rs->sc_dkdev.dk_label;
2078 db1_printf(("Making a label..\n"));
2079
2080 /*
2081 * For historical reasons, if there's no disklabel present
2082 * the raw partition must be marked FS_BSDFFS.
2083 */
2084
2085 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
2086
2087 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
2088
2089 lp->d_checksum = dkcksum(lp);
2090 }
2091 /*
2092 * Lookup the provided name in the filesystem. If the file exists,
2093 * is a valid block device, and isn't being used by anyone else,
2094 * set *vpp to the file's vnode.
2095 * You'll find the original of this in ccd.c
2096 */
2097 int
2098 raidlookup(path, p, vpp)
2099 char *path;
2100 struct proc *p;
2101 struct vnode **vpp; /* result */
2102 {
2103 struct nameidata nd;
2104 struct vnode *vp;
2105 struct vattr va;
2106 int error;
2107
2108 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, path, p);
2109 if ((error = vn_open(&nd, FREAD | FWRITE, 0)) != 0) {
2110 #if 0
2111 printf("RAIDframe: vn_open returned %d\n", error);
2112 #endif
2113 return (error);
2114 }
2115 vp = nd.ni_vp;
2116 if (vp->v_usecount > 1) {
2117 VOP_UNLOCK(vp, 0);
2118 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2119 return (EBUSY);
2120 }
2121 if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)) != 0) {
2122 VOP_UNLOCK(vp, 0);
2123 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2124 return (error);
2125 }
2126 /* XXX: eventually we should handle VREG, too. */
2127 if (va.va_type != VBLK) {
2128 VOP_UNLOCK(vp, 0);
2129 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2130 return (ENOTBLK);
2131 }
2132 VOP_UNLOCK(vp, 0);
2133 *vpp = vp;
2134 return (0);
2135 }
2136 /*
2137 * Wait interruptibly for an exclusive lock.
2138 *
2139 * XXX
2140 * Several drivers do this; it should be abstracted and made MP-safe.
2141 * (Hmm... where have we seen this warning before :-> GO )
2142 */
2143 static int
2144 raidlock(rs)
2145 struct raid_softc *rs;
2146 {
2147 int error;
2148
2149 while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
2150 rs->sc_flags |= RAIDF_WANTED;
2151 if ((error =
2152 tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
2153 return (error);
2154 }
2155 rs->sc_flags |= RAIDF_LOCKED;
2156 return (0);
2157 }
2158 /*
2159 * Unlock and wake up any waiters.
2160 */
2161 static void
2162 raidunlock(rs)
2163 struct raid_softc *rs;
2164 {
2165
2166 rs->sc_flags &= ~RAIDF_LOCKED;
2167 if ((rs->sc_flags & RAIDF_WANTED) != 0) {
2168 rs->sc_flags &= ~RAIDF_WANTED;
2169 wakeup(rs);
2170 }
2171 }
2172
2173
2174 #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */
2175 #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */
2176
2177 int
2178 raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter)
2179 {
2180 RF_ComponentLabel_t clabel;
2181 raidread_component_label(dev, b_vp, &clabel);
2182 clabel.mod_counter = mod_counter;
2183 clabel.clean = RF_RAID_CLEAN;
2184 raidwrite_component_label(dev, b_vp, &clabel);
2185 return(0);
2186 }
2187
2188
2189 int
2190 raidmarkdirty(dev_t dev, struct vnode *b_vp, int mod_counter)
2191 {
2192 RF_ComponentLabel_t clabel;
2193 raidread_component_label(dev, b_vp, &clabel);
2194 clabel.mod_counter = mod_counter;
2195 clabel.clean = RF_RAID_DIRTY;
2196 raidwrite_component_label(dev, b_vp, &clabel);
2197 return(0);
2198 }
2199
2200 /* ARGSUSED */
2201 int
2202 raidread_component_label(dev, b_vp, clabel)
2203 dev_t dev;
2204 struct vnode *b_vp;
2205 RF_ComponentLabel_t *clabel;
2206 {
2207 struct buf *bp;
2208 const struct bdevsw *bdev;
2209 int error;
2210
2211 /* XXX should probably ensure that we don't try to do this if
2212 someone has changed rf_protected_sectors. */
2213
2214 if (b_vp == NULL) {
2215 /* For whatever reason, this component is not valid.
2216 Don't try to read a component label from it. */
2217 return(EINVAL);
2218 }
2219
2220 /* get a block of the appropriate size... */
2221 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2222 bp->b_dev = dev;
2223
2224 /* get our ducks in a row for the read */
2225 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2226 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2227 bp->b_flags |= B_READ;
2228 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2229
2230 bdev = bdevsw_lookup(bp->b_dev);
2231 if (bdev == NULL)
2232 return (ENXIO);
2233 (*bdev->d_strategy)(bp);
2234
2235 error = biowait(bp);
2236
2237 if (!error) {
2238 memcpy(clabel, bp->b_data,
2239 sizeof(RF_ComponentLabel_t));
2240 #if 0
2241 rf_print_component_label( clabel );
2242 #endif
2243 } else {
2244 #if 0
2245 printf("Failed to read RAID component label!\n");
2246 #endif
2247 }
2248
2249 brelse(bp);
2250 return(error);
2251 }
2252 /* ARGSUSED */
2253 int
2254 raidwrite_component_label(dev, b_vp, clabel)
2255 dev_t dev;
2256 struct vnode *b_vp;
2257 RF_ComponentLabel_t *clabel;
2258 {
2259 struct buf *bp;
2260 const struct bdevsw *bdev;
2261 int error;
2262
2263 /* get a block of the appropriate size... */
2264 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2265 bp->b_dev = dev;
2266
2267 /* get our ducks in a row for the write */
2268 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2269 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2270 bp->b_flags |= B_WRITE;
2271 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2272
2273 memset(bp->b_data, 0, RF_COMPONENT_INFO_SIZE );
2274
2275 memcpy(bp->b_data, clabel, sizeof(RF_ComponentLabel_t));
2276
2277 bdev = bdevsw_lookup(bp->b_dev);
2278 if (bdev == NULL)
2279 return (ENXIO);
2280 (*bdev->d_strategy)(bp);
2281 error = biowait(bp);
2282 brelse(bp);
2283 if (error) {
2284 #if 1
2285 printf("Failed to write RAID component info!\n");
2286 #endif
2287 }
2288
2289 return(error);
2290 }
2291
2292 void
2293 rf_markalldirty(raidPtr)
2294 RF_Raid_t *raidPtr;
2295 {
2296 RF_ComponentLabel_t clabel;
2297 int r,c;
2298
2299 raidPtr->mod_counter++;
2300 for (r = 0; r < raidPtr->numRow; r++) {
2301 for (c = 0; c < raidPtr->numCol; c++) {
2302 /* we don't want to touch (at all) a disk that has
2303 failed */
2304 if (!RF_DEAD_DISK(raidPtr->Disks[r][c].status)) {
2305 raidread_component_label(
2306 raidPtr->Disks[r][c].dev,
2307 raidPtr->raid_cinfo[r][c].ci_vp,
2308 &clabel);
2309 if (clabel.status == rf_ds_spared) {
2310 /* XXX do something special...
2311 but whatever you do, don't
2312 try to access it!! */
2313 } else {
2314 #if 0
2315 clabel.status =
2316 raidPtr->Disks[r][c].status;
2317 raidwrite_component_label(
2318 raidPtr->Disks[r][c].dev,
2319 raidPtr->raid_cinfo[r][c].ci_vp,
2320 &clabel);
2321 #endif
2322 raidmarkdirty(
2323 raidPtr->Disks[r][c].dev,
2324 raidPtr->raid_cinfo[r][c].ci_vp,
2325 raidPtr->mod_counter);
2326 }
2327 }
2328 }
2329 }
2330 /* printf("Component labels marked dirty.\n"); */
2331 #if 0
2332 for( c = 0; c < raidPtr->numSpare ; c++) {
2333 sparecol = raidPtr->numCol + c;
2334 if (raidPtr->Disks[r][sparecol].status == rf_ds_used_spare) {
2335 /*
2336
2337 XXX this is where we get fancy and map this spare
2338 into it's correct spot in the array.
2339
2340 */
2341 /*
2342
2343 we claim this disk is "optimal" if it's
2344 rf_ds_used_spare, as that means it should be
2345 directly substitutable for the disk it replaced.
2346 We note that too...
2347
2348 */
2349
2350 for(i=0;i<raidPtr->numRow;i++) {
2351 for(j=0;j<raidPtr->numCol;j++) {
2352 if ((raidPtr->Disks[i][j].spareRow ==
2353 r) &&
2354 (raidPtr->Disks[i][j].spareCol ==
2355 sparecol)) {
2356 srow = r;
2357 scol = sparecol;
2358 break;
2359 }
2360 }
2361 }
2362
2363 raidread_component_label(
2364 raidPtr->Disks[r][sparecol].dev,
2365 raidPtr->raid_cinfo[r][sparecol].ci_vp,
2366 &clabel);
2367 /* make sure status is noted */
2368 clabel.version = RF_COMPONENT_LABEL_VERSION;
2369 clabel.mod_counter = raidPtr->mod_counter;
2370 clabel.serial_number = raidPtr->serial_number;
2371 clabel.row = srow;
2372 clabel.column = scol;
2373 clabel.num_rows = raidPtr->numRow;
2374 clabel.num_columns = raidPtr->numCol;
2375 clabel.clean = RF_RAID_DIRTY; /* changed in a bit*/
2376 clabel.status = rf_ds_optimal;
2377 raidwrite_component_label(
2378 raidPtr->Disks[r][sparecol].dev,
2379 raidPtr->raid_cinfo[r][sparecol].ci_vp,
2380 &clabel);
2381 raidmarkclean( raidPtr->Disks[r][sparecol].dev,
2382 raidPtr->raid_cinfo[r][sparecol].ci_vp);
2383 }
2384 }
2385
2386 #endif
2387 }
2388
2389
2390 void
2391 rf_update_component_labels(raidPtr, final)
2392 RF_Raid_t *raidPtr;
2393 int final;
2394 {
2395 RF_ComponentLabel_t clabel;
2396 int sparecol;
2397 int r,c;
2398 int i,j;
2399 int srow, scol;
2400
2401 srow = -1;
2402 scol = -1;
2403
2404 /* XXX should do extra checks to make sure things really are clean,
2405 rather than blindly setting the clean bit... */
2406
2407 raidPtr->mod_counter++;
2408
2409 for (r = 0; r < raidPtr->numRow; r++) {
2410 for (c = 0; c < raidPtr->numCol; c++) {
2411 if (raidPtr->Disks[r][c].status == rf_ds_optimal) {
2412 raidread_component_label(
2413 raidPtr->Disks[r][c].dev,
2414 raidPtr->raid_cinfo[r][c].ci_vp,
2415 &clabel);
2416 /* make sure status is noted */
2417 clabel.status = rf_ds_optimal;
2418 /* bump the counter */
2419 clabel.mod_counter = raidPtr->mod_counter;
2420
2421 raidwrite_component_label(
2422 raidPtr->Disks[r][c].dev,
2423 raidPtr->raid_cinfo[r][c].ci_vp,
2424 &clabel);
2425 if (final == RF_FINAL_COMPONENT_UPDATE) {
2426 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2427 raidmarkclean(
2428 raidPtr->Disks[r][c].dev,
2429 raidPtr->raid_cinfo[r][c].ci_vp,
2430 raidPtr->mod_counter);
2431 }
2432 }
2433 }
2434 /* else we don't touch it.. */
2435 }
2436 }
2437
2438 for( c = 0; c < raidPtr->numSpare ; c++) {
2439 sparecol = raidPtr->numCol + c;
2440 /* Need to ensure that the reconstruct actually completed! */
2441 if (raidPtr->Disks[0][sparecol].status == rf_ds_used_spare) {
2442 /*
2443
2444 we claim this disk is "optimal" if it's
2445 rf_ds_used_spare, as that means it should be
2446 directly substitutable for the disk it replaced.
2447 We note that too...
2448
2449 */
2450
2451 for(i=0;i<raidPtr->numRow;i++) {
2452 for(j=0;j<raidPtr->numCol;j++) {
2453 if ((raidPtr->Disks[i][j].spareRow ==
2454 0) &&
2455 (raidPtr->Disks[i][j].spareCol ==
2456 sparecol)) {
2457 srow = i;
2458 scol = j;
2459 break;
2460 }
2461 }
2462 }
2463
2464 /* XXX shouldn't *really* need this... */
2465 raidread_component_label(
2466 raidPtr->Disks[0][sparecol].dev,
2467 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2468 &clabel);
2469 /* make sure status is noted */
2470
2471 raid_init_component_label(raidPtr, &clabel);
2472
2473 clabel.mod_counter = raidPtr->mod_counter;
2474 clabel.row = srow;
2475 clabel.column = scol;
2476 clabel.status = rf_ds_optimal;
2477
2478 raidwrite_component_label(
2479 raidPtr->Disks[0][sparecol].dev,
2480 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2481 &clabel);
2482 if (final == RF_FINAL_COMPONENT_UPDATE) {
2483 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2484 raidmarkclean( raidPtr->Disks[0][sparecol].dev,
2485 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2486 raidPtr->mod_counter);
2487 }
2488 }
2489 }
2490 }
2491 /* printf("Component labels updated\n"); */
2492 }
2493
2494 void
2495 rf_close_component(raidPtr, vp, auto_configured)
2496 RF_Raid_t *raidPtr;
2497 struct vnode *vp;
2498 int auto_configured;
2499 {
2500 struct proc *p;
2501
2502 p = raidPtr->engine_thread;
2503
2504 if (vp != NULL) {
2505 if (auto_configured == 1) {
2506 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2507 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
2508 vput(vp);
2509
2510 } else {
2511 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2512 }
2513 } else {
2514 #if 0
2515 printf("vnode was NULL\n");
2516 #endif
2517 }
2518 }
2519
2520
2521 void
2522 rf_UnconfigureVnodes(raidPtr)
2523 RF_Raid_t *raidPtr;
2524 {
2525 int r,c;
2526 struct proc *p;
2527 struct vnode *vp;
2528 int acd;
2529
2530
2531 /* We take this opportunity to close the vnodes like we should.. */
2532
2533 p = raidPtr->engine_thread;
2534
2535 for (r = 0; r < raidPtr->numRow; r++) {
2536 for (c = 0; c < raidPtr->numCol; c++) {
2537 #if 0
2538 printf("raid%d: Closing vnode for row: %d col: %d\n",
2539 raidPtr->raidid, r, c);
2540 #endif
2541 vp = raidPtr->raid_cinfo[r][c].ci_vp;
2542 acd = raidPtr->Disks[r][c].auto_configured;
2543 rf_close_component(raidPtr, vp, acd);
2544 raidPtr->raid_cinfo[r][c].ci_vp = NULL;
2545 raidPtr->Disks[r][c].auto_configured = 0;
2546 }
2547 }
2548 for (r = 0; r < raidPtr->numSpare; r++) {
2549 #if 0
2550 printf("raid%d: Closing vnode for spare: %d\n",
2551 raidPtr->raidid, r);
2552 #endif
2553 vp = raidPtr->raid_cinfo[0][raidPtr->numCol + r].ci_vp;
2554 acd = raidPtr->Disks[0][raidPtr->numCol + r].auto_configured;
2555 rf_close_component(raidPtr, vp, acd);
2556 raidPtr->raid_cinfo[0][raidPtr->numCol + r].ci_vp = NULL;
2557 raidPtr->Disks[0][raidPtr->numCol + r].auto_configured = 0;
2558 }
2559 }
2560
2561
2562 void
2563 rf_ReconThread(req)
2564 struct rf_recon_req *req;
2565 {
2566 int s;
2567 RF_Raid_t *raidPtr;
2568
2569 s = splbio();
2570 raidPtr = (RF_Raid_t *) req->raidPtr;
2571 raidPtr->recon_in_progress = 1;
2572
2573 rf_FailDisk((RF_Raid_t *) req->raidPtr, req->row, req->col,
2574 ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
2575
2576 /* XXX get rid of this! we don't need it at all.. */
2577 RF_Free(req, sizeof(*req));
2578
2579 raidPtr->recon_in_progress = 0;
2580 splx(s);
2581
2582 /* That's all... */
2583 kthread_exit(0); /* does not return */
2584 }
2585
2586 void
2587 rf_RewriteParityThread(raidPtr)
2588 RF_Raid_t *raidPtr;
2589 {
2590 int retcode;
2591 int s;
2592
2593 raidPtr->parity_rewrite_in_progress = 1;
2594 s = splbio();
2595 retcode = rf_RewriteParity(raidPtr);
2596 splx(s);
2597 if (retcode) {
2598 printf("raid%d: Error re-writing parity!\n",raidPtr->raidid);
2599 } else {
2600 /* set the clean bit! If we shutdown correctly,
2601 the clean bit on each component label will get
2602 set */
2603 raidPtr->parity_good = RF_RAID_CLEAN;
2604 }
2605 raidPtr->parity_rewrite_in_progress = 0;
2606
2607 /* Anyone waiting for us to stop? If so, inform them... */
2608 if (raidPtr->waitShutdown) {
2609 wakeup(&raidPtr->parity_rewrite_in_progress);
2610 }
2611
2612 /* That's all... */
2613 kthread_exit(0); /* does not return */
2614 }
2615
2616
2617 void
2618 rf_CopybackThread(raidPtr)
2619 RF_Raid_t *raidPtr;
2620 {
2621 int s;
2622
2623 raidPtr->copyback_in_progress = 1;
2624 s = splbio();
2625 rf_CopybackReconstructedData(raidPtr);
2626 splx(s);
2627 raidPtr->copyback_in_progress = 0;
2628
2629 /* That's all... */
2630 kthread_exit(0); /* does not return */
2631 }
2632
2633
2634 void
2635 rf_ReconstructInPlaceThread(req)
2636 struct rf_recon_req *req;
2637 {
2638 int retcode;
2639 int s;
2640 RF_Raid_t *raidPtr;
2641
2642 s = splbio();
2643 raidPtr = req->raidPtr;
2644 raidPtr->recon_in_progress = 1;
2645 retcode = rf_ReconstructInPlace(raidPtr, req->row, req->col);
2646 RF_Free(req, sizeof(*req));
2647 raidPtr->recon_in_progress = 0;
2648 splx(s);
2649
2650 /* That's all... */
2651 kthread_exit(0); /* does not return */
2652 }
2653
2654 RF_AutoConfig_t *
2655 rf_find_raid_components()
2656 {
2657 struct vnode *vp;
2658 struct disklabel label;
2659 struct device *dv;
2660 dev_t dev;
2661 int bmajor;
2662 int error;
2663 int i;
2664 int good_one;
2665 RF_ComponentLabel_t *clabel;
2666 RF_AutoConfig_t *ac_list;
2667 RF_AutoConfig_t *ac;
2668
2669
2670 /* initialize the AutoConfig list */
2671 ac_list = NULL;
2672
2673 /* we begin by trolling through *all* the devices on the system */
2674
2675 for (dv = alldevs.tqh_first; dv != NULL;
2676 dv = dv->dv_list.tqe_next) {
2677
2678 /* we are only interested in disks... */
2679 if (dv->dv_class != DV_DISK)
2680 continue;
2681
2682 /* we don't care about floppies... */
2683 if (!strcmp(dv->dv_cfdata->cf_driver->cd_name,"fd")) {
2684 continue;
2685 }
2686
2687 /* we don't care about CD's... */
2688 if (!strcmp(dv->dv_cfdata->cf_driver->cd_name,"cd")) {
2689 continue;
2690 }
2691
2692 /* hdfd is the Atari/Hades floppy driver */
2693 if (!strcmp(dv->dv_cfdata->cf_driver->cd_name,"hdfd")) {
2694 continue;
2695 }
2696 /* fdisa is the Atari/Milan floppy driver */
2697 if (!strcmp(dv->dv_cfdata->cf_driver->cd_name,"fdisa")) {
2698 continue;
2699 }
2700
2701 /* need to find the device_name_to_block_device_major stuff */
2702 bmajor = devsw_name2blk(dv->dv_xname, NULL, 0);
2703
2704 /* get a vnode for the raw partition of this disk */
2705
2706 dev = MAKEDISKDEV(bmajor, dv->dv_unit, RAW_PART);
2707 if (bdevvp(dev, &vp))
2708 panic("RAID can't alloc vnode");
2709
2710 error = VOP_OPEN(vp, FREAD, NOCRED, 0);
2711
2712 if (error) {
2713 /* "Who cares." Continue looking
2714 for something that exists*/
2715 vput(vp);
2716 continue;
2717 }
2718
2719 /* Ok, the disk exists. Go get the disklabel. */
2720 error = VOP_IOCTL(vp, DIOCGDINFO, (caddr_t)&label,
2721 FREAD, NOCRED, 0);
2722 if (error) {
2723 /*
2724 * XXX can't happen - open() would
2725 * have errored out (or faked up one)
2726 */
2727 printf("can't get label for dev %s%c (%d)!?!?\n",
2728 dv->dv_xname, 'a' + RAW_PART, error);
2729 }
2730
2731 /* don't need this any more. We'll allocate it again
2732 a little later if we really do... */
2733 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2734 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
2735 vput(vp);
2736
2737 for (i=0; i < label.d_npartitions; i++) {
2738 /* We only support partitions marked as RAID */
2739 if (label.d_partitions[i].p_fstype != FS_RAID)
2740 continue;
2741
2742 dev = MAKEDISKDEV(bmajor, dv->dv_unit, i);
2743 if (bdevvp(dev, &vp))
2744 panic("RAID can't alloc vnode");
2745
2746 error = VOP_OPEN(vp, FREAD, NOCRED, 0);
2747 if (error) {
2748 /* Whatever... */
2749 vput(vp);
2750 continue;
2751 }
2752
2753 good_one = 0;
2754
2755 clabel = (RF_ComponentLabel_t *)
2756 malloc(sizeof(RF_ComponentLabel_t),
2757 M_RAIDFRAME, M_NOWAIT);
2758 if (clabel == NULL) {
2759 /* XXX CLEANUP HERE */
2760 printf("RAID auto config: out of memory!\n");
2761 return(NULL); /* XXX probably should panic? */
2762 }
2763
2764 if (!raidread_component_label(dev, vp, clabel)) {
2765 /* Got the label. Does it look reasonable? */
2766 if (rf_reasonable_label(clabel) &&
2767 (clabel->partitionSize <=
2768 label.d_partitions[i].p_size)) {
2769 #if DEBUG
2770 printf("Component on: %s%c: %d\n",
2771 dv->dv_xname, 'a'+i,
2772 label.d_partitions[i].p_size);
2773 rf_print_component_label(clabel);
2774 #endif
2775 /* if it's reasonable, add it,
2776 else ignore it. */
2777 ac = (RF_AutoConfig_t *)
2778 malloc(sizeof(RF_AutoConfig_t),
2779 M_RAIDFRAME,
2780 M_NOWAIT);
2781 if (ac == NULL) {
2782 /* XXX should panic?? */
2783 return(NULL);
2784 }
2785
2786 sprintf(ac->devname, "%s%c",
2787 dv->dv_xname, 'a'+i);
2788 ac->dev = dev;
2789 ac->vp = vp;
2790 ac->clabel = clabel;
2791 ac->next = ac_list;
2792 ac_list = ac;
2793 good_one = 1;
2794 }
2795 }
2796 if (!good_one) {
2797 /* cleanup */
2798 free(clabel, M_RAIDFRAME);
2799 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2800 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
2801 vput(vp);
2802 }
2803 }
2804 }
2805 return(ac_list);
2806 }
2807
2808 static int
2809 rf_reasonable_label(clabel)
2810 RF_ComponentLabel_t *clabel;
2811 {
2812
2813 if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) ||
2814 (clabel->version==RF_COMPONENT_LABEL_VERSION)) &&
2815 ((clabel->clean == RF_RAID_CLEAN) ||
2816 (clabel->clean == RF_RAID_DIRTY)) &&
2817 clabel->row >=0 &&
2818 clabel->column >= 0 &&
2819 clabel->num_rows > 0 &&
2820 clabel->num_columns > 0 &&
2821 clabel->row < clabel->num_rows &&
2822 clabel->column < clabel->num_columns &&
2823 clabel->blockSize > 0 &&
2824 clabel->numBlocks > 0) {
2825 /* label looks reasonable enough... */
2826 return(1);
2827 }
2828 return(0);
2829 }
2830
2831
2832 #if 0
2833 void
2834 rf_print_component_label(clabel)
2835 RF_ComponentLabel_t *clabel;
2836 {
2837 printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
2838 clabel->row, clabel->column,
2839 clabel->num_rows, clabel->num_columns);
2840 printf(" Version: %d Serial Number: %d Mod Counter: %d\n",
2841 clabel->version, clabel->serial_number,
2842 clabel->mod_counter);
2843 printf(" Clean: %s Status: %d\n",
2844 clabel->clean ? "Yes" : "No", clabel->status );
2845 printf(" sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
2846 clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
2847 printf(" RAID Level: %c blocksize: %d numBlocks: %d\n",
2848 (char) clabel->parityConfig, clabel->blockSize,
2849 clabel->numBlocks);
2850 printf(" Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No" );
2851 printf(" Contains root partition: %s\n",
2852 clabel->root_partition ? "Yes" : "No" );
2853 printf(" Last configured as: raid%d\n", clabel->last_unit );
2854 #if 0
2855 printf(" Config order: %d\n", clabel->config_order);
2856 #endif
2857
2858 }
2859 #endif
2860
2861 RF_ConfigSet_t *
2862 rf_create_auto_sets(ac_list)
2863 RF_AutoConfig_t *ac_list;
2864 {
2865 RF_AutoConfig_t *ac;
2866 RF_ConfigSet_t *config_sets;
2867 RF_ConfigSet_t *cset;
2868 RF_AutoConfig_t *ac_next;
2869
2870
2871 config_sets = NULL;
2872
2873 /* Go through the AutoConfig list, and figure out which components
2874 belong to what sets. */
2875 ac = ac_list;
2876 while(ac!=NULL) {
2877 /* we're going to putz with ac->next, so save it here
2878 for use at the end of the loop */
2879 ac_next = ac->next;
2880
2881 if (config_sets == NULL) {
2882 /* will need at least this one... */
2883 config_sets = (RF_ConfigSet_t *)
2884 malloc(sizeof(RF_ConfigSet_t),
2885 M_RAIDFRAME, M_NOWAIT);
2886 if (config_sets == NULL) {
2887 panic("rf_create_auto_sets: No memory!\n");
2888 }
2889 /* this one is easy :) */
2890 config_sets->ac = ac;
2891 config_sets->next = NULL;
2892 config_sets->rootable = 0;
2893 ac->next = NULL;
2894 } else {
2895 /* which set does this component fit into? */
2896 cset = config_sets;
2897 while(cset!=NULL) {
2898 if (rf_does_it_fit(cset, ac)) {
2899 /* looks like it matches... */
2900 ac->next = cset->ac;
2901 cset->ac = ac;
2902 break;
2903 }
2904 cset = cset->next;
2905 }
2906 if (cset==NULL) {
2907 /* didn't find a match above... new set..*/
2908 cset = (RF_ConfigSet_t *)
2909 malloc(sizeof(RF_ConfigSet_t),
2910 M_RAIDFRAME, M_NOWAIT);
2911 if (cset == NULL) {
2912 panic("rf_create_auto_sets: No memory!\n");
2913 }
2914 cset->ac = ac;
2915 ac->next = NULL;
2916 cset->next = config_sets;
2917 cset->rootable = 0;
2918 config_sets = cset;
2919 }
2920 }
2921 ac = ac_next;
2922 }
2923
2924
2925 return(config_sets);
2926 }
2927
2928 static int
2929 rf_does_it_fit(cset, ac)
2930 RF_ConfigSet_t *cset;
2931 RF_AutoConfig_t *ac;
2932 {
2933 RF_ComponentLabel_t *clabel1, *clabel2;
2934
2935 /* If this one matches the *first* one in the set, that's good
2936 enough, since the other members of the set would have been
2937 through here too... */
2938 /* note that we are not checking partitionSize here..
2939
2940 Note that we are also not checking the mod_counters here.
2941 If everything else matches execpt the mod_counter, that's
2942 good enough for this test. We will deal with the mod_counters
2943 a little later in the autoconfiguration process.
2944
2945 (clabel1->mod_counter == clabel2->mod_counter) &&
2946
2947 The reason we don't check for this is that failed disks
2948 will have lower modification counts. If those disks are
2949 not added to the set they used to belong to, then they will
2950 form their own set, which may result in 2 different sets,
2951 for example, competing to be configured at raid0, and
2952 perhaps competing to be the root filesystem set. If the
2953 wrong ones get configured, or both attempt to become /,
2954 weird behaviour and or serious lossage will occur. Thus we
2955 need to bring them into the fold here, and kick them out at
2956 a later point.
2957
2958 */
2959
2960 clabel1 = cset->ac->clabel;
2961 clabel2 = ac->clabel;
2962 if ((clabel1->version == clabel2->version) &&
2963 (clabel1->serial_number == clabel2->serial_number) &&
2964 (clabel1->num_rows == clabel2->num_rows) &&
2965 (clabel1->num_columns == clabel2->num_columns) &&
2966 (clabel1->sectPerSU == clabel2->sectPerSU) &&
2967 (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
2968 (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
2969 (clabel1->parityConfig == clabel2->parityConfig) &&
2970 (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
2971 (clabel1->blockSize == clabel2->blockSize) &&
2972 (clabel1->numBlocks == clabel2->numBlocks) &&
2973 (clabel1->autoconfigure == clabel2->autoconfigure) &&
2974 (clabel1->root_partition == clabel2->root_partition) &&
2975 (clabel1->last_unit == clabel2->last_unit) &&
2976 (clabel1->config_order == clabel2->config_order)) {
2977 /* if it get's here, it almost *has* to be a match */
2978 } else {
2979 /* it's not consistent with somebody in the set..
2980 punt */
2981 return(0);
2982 }
2983 /* all was fine.. it must fit... */
2984 return(1);
2985 }
2986
2987 int
2988 rf_have_enough_components(cset)
2989 RF_ConfigSet_t *cset;
2990 {
2991 RF_AutoConfig_t *ac;
2992 RF_AutoConfig_t *auto_config;
2993 RF_ComponentLabel_t *clabel;
2994 int r,c;
2995 int num_rows;
2996 int num_cols;
2997 int num_missing;
2998 int mod_counter;
2999 int mod_counter_found;
3000 int even_pair_failed;
3001 char parity_type;
3002
3003
3004 /* check to see that we have enough 'live' components
3005 of this set. If so, we can configure it if necessary */
3006
3007 num_rows = cset->ac->clabel->num_rows;
3008 num_cols = cset->ac->clabel->num_columns;
3009 parity_type = cset->ac->clabel->parityConfig;
3010
3011 /* XXX Check for duplicate components!?!?!? */
3012
3013 /* Determine what the mod_counter is supposed to be for this set. */
3014
3015 mod_counter_found = 0;
3016 mod_counter = 0;
3017 ac = cset->ac;
3018 while(ac!=NULL) {
3019 if (mod_counter_found==0) {
3020 mod_counter = ac->clabel->mod_counter;
3021 mod_counter_found = 1;
3022 } else {
3023 if (ac->clabel->mod_counter > mod_counter) {
3024 mod_counter = ac->clabel->mod_counter;
3025 }
3026 }
3027 ac = ac->next;
3028 }
3029
3030 num_missing = 0;
3031 auto_config = cset->ac;
3032
3033 for(r=0; r<num_rows; r++) {
3034 even_pair_failed = 0;
3035 for(c=0; c<num_cols; c++) {
3036 ac = auto_config;
3037 while(ac!=NULL) {
3038 if ((ac->clabel->row == r) &&
3039 (ac->clabel->column == c) &&
3040 (ac->clabel->mod_counter == mod_counter)) {
3041 /* it's this one... */
3042 #if DEBUG
3043 printf("Found: %s at %d,%d\n",
3044 ac->devname,r,c);
3045 #endif
3046 break;
3047 }
3048 ac=ac->next;
3049 }
3050 if (ac==NULL) {
3051 /* Didn't find one here! */
3052 /* special case for RAID 1, especially
3053 where there are more than 2
3054 components (where RAIDframe treats
3055 things a little differently :( ) */
3056 if (parity_type == '1') {
3057 if (c%2 == 0) { /* even component */
3058 even_pair_failed = 1;
3059 } else { /* odd component. If
3060 we're failed, and
3061 so is the even
3062 component, it's
3063 "Good Night, Charlie" */
3064 if (even_pair_failed == 1) {
3065 return(0);
3066 }
3067 }
3068 } else {
3069 /* normal accounting */
3070 num_missing++;
3071 }
3072 }
3073 if ((parity_type == '1') && (c%2 == 1)) {
3074 /* Just did an even component, and we didn't
3075 bail.. reset the even_pair_failed flag,
3076 and go on to the next component.... */
3077 even_pair_failed = 0;
3078 }
3079 }
3080 }
3081
3082 clabel = cset->ac->clabel;
3083
3084 if (((clabel->parityConfig == '0') && (num_missing > 0)) ||
3085 ((clabel->parityConfig == '4') && (num_missing > 1)) ||
3086 ((clabel->parityConfig == '5') && (num_missing > 1))) {
3087 /* XXX this needs to be made *much* more general */
3088 /* Too many failures */
3089 return(0);
3090 }
3091 /* otherwise, all is well, and we've got enough to take a kick
3092 at autoconfiguring this set */
3093 return(1);
3094 }
3095
3096 void
3097 rf_create_configuration(ac,config,raidPtr)
3098 RF_AutoConfig_t *ac;
3099 RF_Config_t *config;
3100 RF_Raid_t *raidPtr;
3101 {
3102 RF_ComponentLabel_t *clabel;
3103 int i;
3104
3105 clabel = ac->clabel;
3106
3107 /* 1. Fill in the common stuff */
3108 config->numRow = clabel->num_rows;
3109 config->numCol = clabel->num_columns;
3110 config->numSpare = 0; /* XXX should this be set here? */
3111 config->sectPerSU = clabel->sectPerSU;
3112 config->SUsPerPU = clabel->SUsPerPU;
3113 config->SUsPerRU = clabel->SUsPerRU;
3114 config->parityConfig = clabel->parityConfig;
3115 /* XXX... */
3116 strcpy(config->diskQueueType,"fifo");
3117 config->maxOutstandingDiskReqs = clabel->maxOutstanding;
3118 config->layoutSpecificSize = 0; /* XXX ?? */
3119
3120 while(ac!=NULL) {
3121 /* row/col values will be in range due to the checks
3122 in reasonable_label() */
3123 strcpy(config->devnames[ac->clabel->row][ac->clabel->column],
3124 ac->devname);
3125 ac = ac->next;
3126 }
3127
3128 for(i=0;i<RF_MAXDBGV;i++) {
3129 config->debugVars[i][0] = NULL;
3130 }
3131 }
3132
3133 int
3134 rf_set_autoconfig(raidPtr, new_value)
3135 RF_Raid_t *raidPtr;
3136 int new_value;
3137 {
3138 RF_ComponentLabel_t clabel;
3139 struct vnode *vp;
3140 dev_t dev;
3141 int row, column;
3142
3143 raidPtr->autoconfigure = new_value;
3144 for(row=0; row<raidPtr->numRow; row++) {
3145 for(column=0; column<raidPtr->numCol; column++) {
3146 if (raidPtr->Disks[row][column].status ==
3147 rf_ds_optimal) {
3148 dev = raidPtr->Disks[row][column].dev;
3149 vp = raidPtr->raid_cinfo[row][column].ci_vp;
3150 raidread_component_label(dev, vp, &clabel);
3151 clabel.autoconfigure = new_value;
3152 raidwrite_component_label(dev, vp, &clabel);
3153 }
3154 }
3155 }
3156 return(new_value);
3157 }
3158
3159 int
3160 rf_set_rootpartition(raidPtr, new_value)
3161 RF_Raid_t *raidPtr;
3162 int new_value;
3163 {
3164 RF_ComponentLabel_t clabel;
3165 struct vnode *vp;
3166 dev_t dev;
3167 int row, column;
3168
3169 raidPtr->root_partition = new_value;
3170 for(row=0; row<raidPtr->numRow; row++) {
3171 for(column=0; column<raidPtr->numCol; column++) {
3172 if (raidPtr->Disks[row][column].status ==
3173 rf_ds_optimal) {
3174 dev = raidPtr->Disks[row][column].dev;
3175 vp = raidPtr->raid_cinfo[row][column].ci_vp;
3176 raidread_component_label(dev, vp, &clabel);
3177 clabel.root_partition = new_value;
3178 raidwrite_component_label(dev, vp, &clabel);
3179 }
3180 }
3181 }
3182 return(new_value);
3183 }
3184
3185 void
3186 rf_release_all_vps(cset)
3187 RF_ConfigSet_t *cset;
3188 {
3189 RF_AutoConfig_t *ac;
3190
3191 ac = cset->ac;
3192 while(ac!=NULL) {
3193 /* Close the vp, and give it back */
3194 if (ac->vp) {
3195 vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY);
3196 VOP_CLOSE(ac->vp, FREAD, NOCRED, 0);
3197 vput(ac->vp);
3198 ac->vp = NULL;
3199 }
3200 ac = ac->next;
3201 }
3202 }
3203
3204
3205 void
3206 rf_cleanup_config_set(cset)
3207 RF_ConfigSet_t *cset;
3208 {
3209 RF_AutoConfig_t *ac;
3210 RF_AutoConfig_t *next_ac;
3211
3212 ac = cset->ac;
3213 while(ac!=NULL) {
3214 next_ac = ac->next;
3215 /* nuke the label */
3216 free(ac->clabel, M_RAIDFRAME);
3217 /* cleanup the config structure */
3218 free(ac, M_RAIDFRAME);
3219 /* "next.." */
3220 ac = next_ac;
3221 }
3222 /* and, finally, nuke the config set */
3223 free(cset, M_RAIDFRAME);
3224 }
3225
3226
3227 void
3228 raid_init_component_label(raidPtr, clabel)
3229 RF_Raid_t *raidPtr;
3230 RF_ComponentLabel_t *clabel;
3231 {
3232 /* current version number */
3233 clabel->version = RF_COMPONENT_LABEL_VERSION;
3234 clabel->serial_number = raidPtr->serial_number;
3235 clabel->mod_counter = raidPtr->mod_counter;
3236 clabel->num_rows = raidPtr->numRow;
3237 clabel->num_columns = raidPtr->numCol;
3238 clabel->clean = RF_RAID_DIRTY; /* not clean */
3239 clabel->status = rf_ds_optimal; /* "It's good!" */
3240
3241 clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
3242 clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
3243 clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
3244
3245 clabel->blockSize = raidPtr->bytesPerSector;
3246 clabel->numBlocks = raidPtr->sectorsPerDisk;
3247
3248 /* XXX not portable */
3249 clabel->parityConfig = raidPtr->Layout.map->parityConfig;
3250 clabel->maxOutstanding = raidPtr->maxOutstanding;
3251 clabel->autoconfigure = raidPtr->autoconfigure;
3252 clabel->root_partition = raidPtr->root_partition;
3253 clabel->last_unit = raidPtr->raidid;
3254 clabel->config_order = raidPtr->config_order;
3255 }
3256
3257 int
3258 rf_auto_config_set(cset,unit)
3259 RF_ConfigSet_t *cset;
3260 int *unit;
3261 {
3262 RF_Raid_t *raidPtr;
3263 RF_Config_t *config;
3264 int raidID;
3265 int retcode;
3266
3267 #if DEBUG
3268 printf("RAID autoconfigure\n");
3269 #endif
3270
3271 retcode = 0;
3272 *unit = -1;
3273
3274 /* 1. Create a config structure */
3275
3276 config = (RF_Config_t *)malloc(sizeof(RF_Config_t),
3277 M_RAIDFRAME,
3278 M_NOWAIT);
3279 if (config==NULL) {
3280 printf("Out of mem!?!?\n");
3281 /* XXX do something more intelligent here. */
3282 return(1);
3283 }
3284
3285 memset(config, 0, sizeof(RF_Config_t));
3286
3287 /*
3288 2. Figure out what RAID ID this one is supposed to live at
3289 See if we can get the same RAID dev that it was configured
3290 on last time..
3291 */
3292
3293 raidID = cset->ac->clabel->last_unit;
3294 if ((raidID < 0) || (raidID >= numraid)) {
3295 /* let's not wander off into lala land. */
3296 raidID = numraid - 1;
3297 }
3298 if (raidPtrs[raidID]->valid != 0) {
3299
3300 /*
3301 Nope... Go looking for an alternative...
3302 Start high so we don't immediately use raid0 if that's
3303 not taken.
3304 */
3305
3306 for(raidID = numraid - 1; raidID >= 0; raidID--) {
3307 if (raidPtrs[raidID]->valid == 0) {
3308 /* can use this one! */
3309 break;
3310 }
3311 }
3312 }
3313
3314 if (raidID < 0) {
3315 /* punt... */
3316 printf("Unable to auto configure this set!\n");
3317 printf("(Out of RAID devs!)\n");
3318 return(1);
3319 }
3320
3321 #if DEBUG
3322 printf("Configuring raid%d:\n",raidID);
3323 #endif
3324
3325 raidPtr = raidPtrs[raidID];
3326
3327 /* XXX all this stuff should be done SOMEWHERE ELSE! */
3328 raidPtr->raidid = raidID;
3329 raidPtr->openings = RAIDOUTSTANDING;
3330
3331 /* 3. Build the configuration structure */
3332 rf_create_configuration(cset->ac, config, raidPtr);
3333
3334 /* 4. Do the configuration */
3335 retcode = rf_Configure(raidPtr, config, cset->ac);
3336
3337 if (retcode == 0) {
3338
3339 raidinit(raidPtrs[raidID]);
3340
3341 rf_markalldirty(raidPtrs[raidID]);
3342 raidPtrs[raidID]->autoconfigure = 1; /* XXX do this here? */
3343 if (cset->ac->clabel->root_partition==1) {
3344 /* everything configured just fine. Make a note
3345 that this set is eligible to be root. */
3346 cset->rootable = 1;
3347 /* XXX do this here? */
3348 raidPtrs[raidID]->root_partition = 1;
3349 }
3350 }
3351
3352 /* 5. Cleanup */
3353 free(config, M_RAIDFRAME);
3354
3355 *unit = raidID;
3356 return(retcode);
3357 }
3358
3359 void
3360 rf_disk_unbusy(desc)
3361 RF_RaidAccessDesc_t *desc;
3362 {
3363 struct buf *bp;
3364
3365 bp = (struct buf *)desc->bp;
3366 disk_unbusy(&raid_softc[desc->raidPtr->raidid].sc_dkdev,
3367 (bp->b_bcount - bp->b_resid));
3368 }
3369