rf_netbsdkintf.c revision 1.167 1 /* $NetBSD: rf_netbsdkintf.c,v 1.167 2003/12/29 03:33:48 oster Exp $ */
2 /*-
3 * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
4 * All rights reserved.
5 *
6 * This code is derived from software contributed to The NetBSD Foundation
7 * by Greg Oster; Jason R. Thorpe.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed by the NetBSD
20 * Foundation, Inc. and its contributors.
21 * 4. Neither the name of The NetBSD Foundation nor the names of its
22 * contributors may be used to endorse or promote products derived
23 * from this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
26 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
36 */
37
38 /*
39 * Copyright (c) 1990, 1993
40 * The Regents of the University of California. All rights reserved.
41 *
42 * This code is derived from software contributed to Berkeley by
43 * the Systems Programming Group of the University of Utah Computer
44 * Science Department.
45 *
46 * Redistribution and use in source and binary forms, with or without
47 * modification, are permitted provided that the following conditions
48 * are met:
49 * 1. Redistributions of source code must retain the above copyright
50 * notice, this list of conditions and the following disclaimer.
51 * 2. Redistributions in binary form must reproduce the above copyright
52 * notice, this list of conditions and the following disclaimer in the
53 * documentation and/or other materials provided with the distribution.
54 * 3. Neither the name of the University nor the names of its contributors
55 * may be used to endorse or promote products derived from this software
56 * without specific prior written permission.
57 *
58 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
59 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
60 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
61 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
62 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
63 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
64 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
65 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
66 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
67 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
68 * SUCH DAMAGE.
69 *
70 * from: Utah $Hdr: cd.c 1.6 90/11/28$
71 *
72 * @(#)cd.c 8.2 (Berkeley) 11/16/93
73 */
74
75 /*
76 * Copyright (c) 1988 University of Utah.
77 *
78 * This code is derived from software contributed to Berkeley by
79 * the Systems Programming Group of the University of Utah Computer
80 * Science Department.
81 *
82 * Redistribution and use in source and binary forms, with or without
83 * modification, are permitted provided that the following conditions
84 * are met:
85 * 1. Redistributions of source code must retain the above copyright
86 * notice, this list of conditions and the following disclaimer.
87 * 2. Redistributions in binary form must reproduce the above copyright
88 * notice, this list of conditions and the following disclaimer in the
89 * documentation and/or other materials provided with the distribution.
90 * 3. All advertising materials mentioning features or use of this software
91 * must display the following acknowledgement:
92 * This product includes software developed by the University of
93 * California, Berkeley and its contributors.
94 * 4. Neither the name of the University nor the names of its contributors
95 * may be used to endorse or promote products derived from this software
96 * without specific prior written permission.
97 *
98 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
99 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
100 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
101 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
102 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
103 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
104 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
105 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
106 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
107 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
108 * SUCH DAMAGE.
109 *
110 * from: Utah $Hdr: cd.c 1.6 90/11/28$
111 *
112 * @(#)cd.c 8.2 (Berkeley) 11/16/93
113 */
114
115 /*
116 * Copyright (c) 1995 Carnegie-Mellon University.
117 * All rights reserved.
118 *
119 * Authors: Mark Holland, Jim Zelenka
120 *
121 * Permission to use, copy, modify and distribute this software and
122 * its documentation is hereby granted, provided that both the copyright
123 * notice and this permission notice appear in all copies of the
124 * software, derivative works or modified versions, and any portions
125 * thereof, and that both notices appear in supporting documentation.
126 *
127 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
128 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
129 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
130 *
131 * Carnegie Mellon requests users of this software to return to
132 *
133 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
134 * School of Computer Science
135 * Carnegie Mellon University
136 * Pittsburgh PA 15213-3890
137 *
138 * any improvements or extensions that they make and grant Carnegie the
139 * rights to redistribute these changes.
140 */
141
142 /***********************************************************
143 *
144 * rf_kintf.c -- the kernel interface routines for RAIDframe
145 *
146 ***********************************************************/
147
148 #include <sys/cdefs.h>
149 __KERNEL_RCSID(0, "$NetBSD: rf_netbsdkintf.c,v 1.167 2003/12/29 03:33:48 oster Exp $");
150
151 #include <sys/param.h>
152 #include <sys/errno.h>
153 #include <sys/pool.h>
154 #include <sys/proc.h>
155 #include <sys/queue.h>
156 #include <sys/disk.h>
157 #include <sys/device.h>
158 #include <sys/stat.h>
159 #include <sys/ioctl.h>
160 #include <sys/fcntl.h>
161 #include <sys/systm.h>
162 #include <sys/namei.h>
163 #include <sys/vnode.h>
164 #include <sys/disklabel.h>
165 #include <sys/conf.h>
166 #include <sys/lock.h>
167 #include <sys/buf.h>
168 #include <sys/user.h>
169 #include <sys/reboot.h>
170
171 #include <dev/raidframe/raidframevar.h>
172 #include <dev/raidframe/raidframeio.h>
173 #include "raid.h"
174 #include "opt_raid_autoconfig.h"
175 #include "rf_raid.h"
176 #include "rf_copyback.h"
177 #include "rf_dag.h"
178 #include "rf_dagflags.h"
179 #include "rf_desc.h"
180 #include "rf_diskqueue.h"
181 #include "rf_etimer.h"
182 #include "rf_general.h"
183 #include "rf_kintf.h"
184 #include "rf_options.h"
185 #include "rf_driver.h"
186 #include "rf_parityscan.h"
187 #include "rf_threadstuff.h"
188
189 #ifdef DEBUG
190 int rf_kdebug_level = 0;
191 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
192 #else /* DEBUG */
193 #define db1_printf(a) { }
194 #endif /* DEBUG */
195
196 static RF_Raid_t **raidPtrs; /* global raid device descriptors */
197
198 RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex)
199
200 static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a
201 * spare table */
202 static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from
203 * installation process */
204
205 MALLOC_DEFINE(M_RAIDFRAME, "RAIDframe", "RAIDframe structures");
206
207 /* prototypes */
208 static void KernelWakeupFunc(struct buf * bp);
209 static void InitBP(struct buf * bp, struct vnode *, unsigned rw_flag,
210 dev_t dev, RF_SectorNum_t startSect,
211 RF_SectorCount_t numSect, caddr_t buf,
212 void (*cbFunc) (struct buf *), void *cbArg,
213 int logBytesPerSector, struct proc * b_proc);
214 static void raidinit(RF_Raid_t *);
215
216 void raidattach(int);
217
218 dev_type_open(raidopen);
219 dev_type_close(raidclose);
220 dev_type_read(raidread);
221 dev_type_write(raidwrite);
222 dev_type_ioctl(raidioctl);
223 dev_type_strategy(raidstrategy);
224 dev_type_dump(raiddump);
225 dev_type_size(raidsize);
226
227 const struct bdevsw raid_bdevsw = {
228 raidopen, raidclose, raidstrategy, raidioctl,
229 raiddump, raidsize, D_DISK
230 };
231
232 const struct cdevsw raid_cdevsw = {
233 raidopen, raidclose, raidread, raidwrite, raidioctl,
234 nostop, notty, nopoll, nommap, nokqfilter, D_DISK
235 };
236
237 /*
238 * Pilfered from ccd.c
239 */
240
241 struct raidbuf {
242 struct buf rf_buf; /* new I/O buf. MUST BE FIRST!!! */
243 struct buf *rf_obp; /* ptr. to original I/O buf */
244 RF_DiskQueueData_t *req;/* the request that this was part of.. */
245 };
246
247 /* component buffer pool */
248 struct pool raidframe_cbufpool;
249
250 /* XXX Not sure if the following should be replacing the raidPtrs above,
251 or if it should be used in conjunction with that...
252 */
253
254 struct raid_softc {
255 int sc_flags; /* flags */
256 int sc_cflags; /* configuration flags */
257 size_t sc_size; /* size of the raid device */
258 char sc_xname[20]; /* XXX external name */
259 struct disk sc_dkdev; /* generic disk device info */
260 struct bufq_state buf_queue; /* used for the device queue */
261 };
262 /* sc_flags */
263 #define RAIDF_INITED 0x01 /* unit has been initialized */
264 #define RAIDF_WLABEL 0x02 /* label area is writable */
265 #define RAIDF_LABELLING 0x04 /* unit is currently being labelled */
266 #define RAIDF_WANTED 0x40 /* someone is waiting to obtain a lock */
267 #define RAIDF_LOCKED 0x80 /* unit is locked */
268
269 #define raidunit(x) DISKUNIT(x)
270 int numraid = 0;
271
272 /*
273 * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
274 * Be aware that large numbers can allow the driver to consume a lot of
275 * kernel memory, especially on writes, and in degraded mode reads.
276 *
277 * For example: with a stripe width of 64 blocks (32k) and 5 disks,
278 * a single 64K write will typically require 64K for the old data,
279 * 64K for the old parity, and 64K for the new parity, for a total
280 * of 192K (if the parity buffer is not re-used immediately).
281 * Even it if is used immediately, that's still 128K, which when multiplied
282 * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
283 *
284 * Now in degraded mode, for example, a 64K read on the above setup may
285 * require data reconstruction, which will require *all* of the 4 remaining
286 * disks to participate -- 4 * 32K/disk == 128K again.
287 */
288
289 #ifndef RAIDOUTSTANDING
290 #define RAIDOUTSTANDING 6
291 #endif
292
293 #define RAIDLABELDEV(dev) \
294 (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
295
296 /* declared here, and made public, for the benefit of KVM stuff.. */
297 struct raid_softc *raid_softc;
298
299 static void raidgetdefaultlabel(RF_Raid_t *, struct raid_softc *,
300 struct disklabel *);
301 static void raidgetdisklabel(dev_t);
302 static void raidmakedisklabel(struct raid_softc *);
303
304 static int raidlock(struct raid_softc *);
305 static void raidunlock(struct raid_softc *);
306
307 static void rf_markalldirty(RF_Raid_t *);
308
309 struct device *raidrootdev;
310
311 void rf_ReconThread(struct rf_recon_req *);
312 /* XXX what I want is: */
313 /*void rf_ReconThread(RF_Raid_t *raidPtr); */
314 void rf_RewriteParityThread(RF_Raid_t *raidPtr);
315 void rf_CopybackThread(RF_Raid_t *raidPtr);
316 void rf_ReconstructInPlaceThread(struct rf_recon_req *);
317 int rf_autoconfig(struct device *self);
318 void rf_buildroothack(RF_ConfigSet_t *);
319
320 RF_AutoConfig_t *rf_find_raid_components(void);
321 RF_ConfigSet_t *rf_create_auto_sets(RF_AutoConfig_t *);
322 static int rf_does_it_fit(RF_ConfigSet_t *,RF_AutoConfig_t *);
323 static int rf_reasonable_label(RF_ComponentLabel_t *);
324 void rf_create_configuration(RF_AutoConfig_t *,RF_Config_t *, RF_Raid_t *);
325 int rf_set_autoconfig(RF_Raid_t *, int);
326 int rf_set_rootpartition(RF_Raid_t *, int);
327 void rf_release_all_vps(RF_ConfigSet_t *);
328 void rf_cleanup_config_set(RF_ConfigSet_t *);
329 int rf_have_enough_components(RF_ConfigSet_t *);
330 int rf_auto_config_set(RF_ConfigSet_t *, int *);
331
332 static int raidautoconfig = 0; /* Debugging, mostly. Set to 0 to not
333 allow autoconfig to take place.
334 Note that this is overridden by having
335 RAID_AUTOCONFIG as an option in the
336 kernel config file. */
337
338 void
339 raidattach(num)
340 int num;
341 {
342 int raidID;
343 int i, rc;
344
345 #ifdef DEBUG
346 printf("raidattach: Asked for %d units\n", num);
347 #endif
348
349 if (num <= 0) {
350 #ifdef DIAGNOSTIC
351 panic("raidattach: count <= 0");
352 #endif
353 return;
354 }
355 /* This is where all the initialization stuff gets done. */
356
357 numraid = num;
358
359 /* Make some space for requested number of units... */
360
361 RF_Malloc(raidPtrs, num * sizeof(RF_Raid_t *), (RF_Raid_t **));
362 if (raidPtrs == NULL) {
363 panic("raidPtrs is NULL!!");
364 }
365
366 /* Initialize the component buffer pool. */
367 pool_init(&raidframe_cbufpool, sizeof(struct raidbuf), 0,
368 0, 0, "raidpl", NULL);
369
370 rc = rf_mutex_init(&rf_sparet_wait_mutex);
371 if (rc) {
372 RF_PANIC();
373 }
374
375 rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
376
377 for (i = 0; i < num; i++)
378 raidPtrs[i] = NULL;
379 rc = rf_BootRaidframe();
380 if (rc == 0)
381 printf("Kernelized RAIDframe activated\n");
382 else
383 panic("Serious error booting RAID!!");
384
385 /* put together some datastructures like the CCD device does.. This
386 * lets us lock the device and what-not when it gets opened. */
387
388 raid_softc = (struct raid_softc *)
389 malloc(num * sizeof(struct raid_softc),
390 M_RAIDFRAME, M_NOWAIT);
391 if (raid_softc == NULL) {
392 printf("WARNING: no memory for RAIDframe driver\n");
393 return;
394 }
395
396 memset(raid_softc, 0, num * sizeof(struct raid_softc));
397
398 raidrootdev = (struct device *)malloc(num * sizeof(struct device),
399 M_RAIDFRAME, M_NOWAIT);
400 if (raidrootdev == NULL) {
401 panic("No memory for RAIDframe driver!!?!?!");
402 }
403
404 for (raidID = 0; raidID < num; raidID++) {
405 bufq_alloc(&raid_softc[raidID].buf_queue, BUFQ_FCFS);
406
407 raidrootdev[raidID].dv_class = DV_DISK;
408 raidrootdev[raidID].dv_cfdata = NULL;
409 raidrootdev[raidID].dv_unit = raidID;
410 raidrootdev[raidID].dv_parent = NULL;
411 raidrootdev[raidID].dv_flags = 0;
412 sprintf(raidrootdev[raidID].dv_xname,"raid%d",raidID);
413
414 RF_Malloc(raidPtrs[raidID], sizeof(RF_Raid_t),
415 (RF_Raid_t *));
416 if (raidPtrs[raidID] == NULL) {
417 printf("WARNING: raidPtrs[%d] is NULL\n", raidID);
418 numraid = raidID;
419 return;
420 }
421 }
422
423 #ifdef RAID_AUTOCONFIG
424 raidautoconfig = 1;
425 #endif
426
427 /*
428 * Register a finalizer which will be used to auto-config RAID
429 * sets once all real hardware devices have been found.
430 */
431 if (config_finalize_register(NULL, rf_autoconfig) != 0)
432 printf("WARNING: unable to register RAIDframe finalizer\n");
433 }
434
435 int
436 rf_autoconfig(struct device *self)
437 {
438 RF_AutoConfig_t *ac_list;
439 RF_ConfigSet_t *config_sets;
440
441 if (raidautoconfig == 0)
442 return (0);
443
444 /* XXX This code can only be run once. */
445 raidautoconfig = 0;
446
447 /* 1. locate all RAID components on the system */
448 #ifdef DEBUG
449 printf("Searching for RAID components...\n");
450 #endif
451 ac_list = rf_find_raid_components();
452
453 /* 2. Sort them into their respective sets. */
454 config_sets = rf_create_auto_sets(ac_list);
455
456 /*
457 * 3. Evaluate each set andconfigure the valid ones.
458 * This gets done in rf_buildroothack().
459 */
460 rf_buildroothack(config_sets);
461
462 return (1);
463 }
464
465 void
466 rf_buildroothack(RF_ConfigSet_t *config_sets)
467 {
468 RF_ConfigSet_t *cset;
469 RF_ConfigSet_t *next_cset;
470 int retcode;
471 int raidID;
472 int rootID;
473 int num_root;
474
475 rootID = 0;
476 num_root = 0;
477 cset = config_sets;
478 while(cset != NULL ) {
479 next_cset = cset->next;
480 if (rf_have_enough_components(cset) &&
481 cset->ac->clabel->autoconfigure==1) {
482 retcode = rf_auto_config_set(cset,&raidID);
483 if (!retcode) {
484 if (cset->rootable) {
485 rootID = raidID;
486 num_root++;
487 }
488 } else {
489 /* The autoconfig didn't work :( */
490 #if DEBUG
491 printf("Autoconfig failed with code %d for raid%d\n", retcode, raidID);
492 #endif
493 rf_release_all_vps(cset);
494 }
495 } else {
496 /* we're not autoconfiguring this set...
497 release the associated resources */
498 rf_release_all_vps(cset);
499 }
500 /* cleanup */
501 rf_cleanup_config_set(cset);
502 cset = next_cset;
503 }
504
505 /* we found something bootable... */
506
507 if (num_root == 1) {
508 booted_device = &raidrootdev[rootID];
509 } else if (num_root > 1) {
510 /* we can't guess.. require the user to answer... */
511 boothowto |= RB_ASKNAME;
512 }
513 }
514
515
516 int
517 raidsize(dev)
518 dev_t dev;
519 {
520 struct raid_softc *rs;
521 struct disklabel *lp;
522 int part, unit, omask, size;
523
524 unit = raidunit(dev);
525 if (unit >= numraid)
526 return (-1);
527 rs = &raid_softc[unit];
528
529 if ((rs->sc_flags & RAIDF_INITED) == 0)
530 return (-1);
531
532 part = DISKPART(dev);
533 omask = rs->sc_dkdev.dk_openmask & (1 << part);
534 lp = rs->sc_dkdev.dk_label;
535
536 if (omask == 0 && raidopen(dev, 0, S_IFBLK, curproc))
537 return (-1);
538
539 if (lp->d_partitions[part].p_fstype != FS_SWAP)
540 size = -1;
541 else
542 size = lp->d_partitions[part].p_size *
543 (lp->d_secsize / DEV_BSIZE);
544
545 if (omask == 0 && raidclose(dev, 0, S_IFBLK, curproc))
546 return (-1);
547
548 return (size);
549
550 }
551
552 int
553 raiddump(dev, blkno, va, size)
554 dev_t dev;
555 daddr_t blkno;
556 caddr_t va;
557 size_t size;
558 {
559 /* Not implemented. */
560 return ENXIO;
561 }
562 /* ARGSUSED */
563 int
564 raidopen(dev, flags, fmt, p)
565 dev_t dev;
566 int flags, fmt;
567 struct proc *p;
568 {
569 int unit = raidunit(dev);
570 struct raid_softc *rs;
571 struct disklabel *lp;
572 int part, pmask;
573 int error = 0;
574
575 if (unit >= numraid)
576 return (ENXIO);
577 rs = &raid_softc[unit];
578
579 if ((error = raidlock(rs)) != 0)
580 return (error);
581 lp = rs->sc_dkdev.dk_label;
582
583 part = DISKPART(dev);
584 pmask = (1 << part);
585
586 if ((rs->sc_flags & RAIDF_INITED) &&
587 (rs->sc_dkdev.dk_openmask == 0))
588 raidgetdisklabel(dev);
589
590 /* make sure that this partition exists */
591
592 if (part != RAW_PART) {
593 if (((rs->sc_flags & RAIDF_INITED) == 0) ||
594 ((part >= lp->d_npartitions) ||
595 (lp->d_partitions[part].p_fstype == FS_UNUSED))) {
596 error = ENXIO;
597 raidunlock(rs);
598 return (error);
599 }
600 }
601 /* Prevent this unit from being unconfigured while open. */
602 switch (fmt) {
603 case S_IFCHR:
604 rs->sc_dkdev.dk_copenmask |= pmask;
605 break;
606
607 case S_IFBLK:
608 rs->sc_dkdev.dk_bopenmask |= pmask;
609 break;
610 }
611
612 if ((rs->sc_dkdev.dk_openmask == 0) &&
613 ((rs->sc_flags & RAIDF_INITED) != 0)) {
614 /* First one... mark things as dirty... Note that we *MUST*
615 have done a configure before this. I DO NOT WANT TO BE
616 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
617 THAT THEY BELONG TOGETHER!!!!! */
618 /* XXX should check to see if we're only open for reading
619 here... If so, we needn't do this, but then need some
620 other way of keeping track of what's happened.. */
621
622 rf_markalldirty( raidPtrs[unit] );
623 }
624
625
626 rs->sc_dkdev.dk_openmask =
627 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
628
629 raidunlock(rs);
630
631 return (error);
632
633
634 }
635 /* ARGSUSED */
636 int
637 raidclose(dev, flags, fmt, p)
638 dev_t dev;
639 int flags, fmt;
640 struct proc *p;
641 {
642 int unit = raidunit(dev);
643 struct raid_softc *rs;
644 int error = 0;
645 int part;
646
647 if (unit >= numraid)
648 return (ENXIO);
649 rs = &raid_softc[unit];
650
651 if ((error = raidlock(rs)) != 0)
652 return (error);
653
654 part = DISKPART(dev);
655
656 /* ...that much closer to allowing unconfiguration... */
657 switch (fmt) {
658 case S_IFCHR:
659 rs->sc_dkdev.dk_copenmask &= ~(1 << part);
660 break;
661
662 case S_IFBLK:
663 rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
664 break;
665 }
666 rs->sc_dkdev.dk_openmask =
667 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
668
669 if ((rs->sc_dkdev.dk_openmask == 0) &&
670 ((rs->sc_flags & RAIDF_INITED) != 0)) {
671 /* Last one... device is not unconfigured yet.
672 Device shutdown has taken care of setting the
673 clean bits if RAIDF_INITED is not set
674 mark things as clean... */
675
676 rf_update_component_labels(raidPtrs[unit],
677 RF_FINAL_COMPONENT_UPDATE);
678 if (doing_shutdown) {
679 /* last one, and we're going down, so
680 lights out for this RAID set too. */
681 error = rf_Shutdown(raidPtrs[unit]);
682
683 /* It's no longer initialized... */
684 rs->sc_flags &= ~RAIDF_INITED;
685
686 /* Detach the disk. */
687 disk_detach(&rs->sc_dkdev);
688 }
689 }
690
691 raidunlock(rs);
692 return (0);
693
694 }
695
696 void
697 raidstrategy(bp)
698 struct buf *bp;
699 {
700 int s;
701
702 unsigned int raidID = raidunit(bp->b_dev);
703 RF_Raid_t *raidPtr;
704 struct raid_softc *rs = &raid_softc[raidID];
705 int wlabel;
706
707 if ((rs->sc_flags & RAIDF_INITED) ==0) {
708 bp->b_error = ENXIO;
709 bp->b_flags |= B_ERROR;
710 bp->b_resid = bp->b_bcount;
711 biodone(bp);
712 return;
713 }
714 if (raidID >= numraid || !raidPtrs[raidID]) {
715 bp->b_error = ENODEV;
716 bp->b_flags |= B_ERROR;
717 bp->b_resid = bp->b_bcount;
718 biodone(bp);
719 return;
720 }
721 raidPtr = raidPtrs[raidID];
722 if (!raidPtr->valid) {
723 bp->b_error = ENODEV;
724 bp->b_flags |= B_ERROR;
725 bp->b_resid = bp->b_bcount;
726 biodone(bp);
727 return;
728 }
729 if (bp->b_bcount == 0) {
730 db1_printf(("b_bcount is zero..\n"));
731 biodone(bp);
732 return;
733 }
734
735 /*
736 * Do bounds checking and adjust transfer. If there's an
737 * error, the bounds check will flag that for us.
738 */
739
740 wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
741 if (DISKPART(bp->b_dev) != RAW_PART)
742 if (bounds_check_with_label(&rs->sc_dkdev, bp, wlabel) <= 0) {
743 db1_printf(("Bounds check failed!!:%d %d\n",
744 (int) bp->b_blkno, (int) wlabel));
745 biodone(bp);
746 return;
747 }
748 s = splbio();
749
750 bp->b_resid = 0;
751
752 /* stuff it onto our queue */
753 BUFQ_PUT(&rs->buf_queue, bp);
754
755 raidstart(raidPtrs[raidID]);
756
757 splx(s);
758 }
759 /* ARGSUSED */
760 int
761 raidread(dev, uio, flags)
762 dev_t dev;
763 struct uio *uio;
764 int flags;
765 {
766 int unit = raidunit(dev);
767 struct raid_softc *rs;
768
769 if (unit >= numraid)
770 return (ENXIO);
771 rs = &raid_softc[unit];
772
773 if ((rs->sc_flags & RAIDF_INITED) == 0)
774 return (ENXIO);
775
776 return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
777
778 }
779 /* ARGSUSED */
780 int
781 raidwrite(dev, uio, flags)
782 dev_t dev;
783 struct uio *uio;
784 int flags;
785 {
786 int unit = raidunit(dev);
787 struct raid_softc *rs;
788
789 if (unit >= numraid)
790 return (ENXIO);
791 rs = &raid_softc[unit];
792
793 if ((rs->sc_flags & RAIDF_INITED) == 0)
794 return (ENXIO);
795
796 return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
797
798 }
799
800 int
801 raidioctl(dev, cmd, data, flag, p)
802 dev_t dev;
803 u_long cmd;
804 caddr_t data;
805 int flag;
806 struct proc *p;
807 {
808 int unit = raidunit(dev);
809 int error = 0;
810 int part, pmask;
811 struct raid_softc *rs;
812 RF_Config_t *k_cfg, *u_cfg;
813 RF_Raid_t *raidPtr;
814 RF_RaidDisk_t *diskPtr;
815 RF_AccTotals_t *totals;
816 RF_DeviceConfig_t *d_cfg, **ucfgp;
817 u_char *specific_buf;
818 int retcode = 0;
819 int column;
820 int raidid;
821 struct rf_recon_req *rrcopy, *rr;
822 RF_ComponentLabel_t *clabel;
823 RF_ComponentLabel_t ci_label;
824 RF_ComponentLabel_t **clabel_ptr;
825 RF_SingleComponent_t *sparePtr,*componentPtr;
826 RF_SingleComponent_t hot_spare;
827 RF_SingleComponent_t component;
828 RF_ProgressInfo_t progressInfo, **progressInfoPtr;
829 int i, j, d;
830 #ifdef __HAVE_OLD_DISKLABEL
831 struct disklabel newlabel;
832 #endif
833
834 if (unit >= numraid)
835 return (ENXIO);
836 rs = &raid_softc[unit];
837 raidPtr = raidPtrs[unit];
838
839 db1_printf(("raidioctl: %d %d %d %d\n", (int) dev,
840 (int) DISKPART(dev), (int) unit, (int) cmd));
841
842 /* Must be open for writes for these commands... */
843 switch (cmd) {
844 case DIOCSDINFO:
845 case DIOCWDINFO:
846 #ifdef __HAVE_OLD_DISKLABEL
847 case ODIOCWDINFO:
848 case ODIOCSDINFO:
849 #endif
850 case DIOCWLABEL:
851 if ((flag & FWRITE) == 0)
852 return (EBADF);
853 }
854
855 /* Must be initialized for these... */
856 switch (cmd) {
857 case DIOCGDINFO:
858 case DIOCSDINFO:
859 case DIOCWDINFO:
860 #ifdef __HAVE_OLD_DISKLABEL
861 case ODIOCGDINFO:
862 case ODIOCWDINFO:
863 case ODIOCSDINFO:
864 case ODIOCGDEFLABEL:
865 #endif
866 case DIOCGPART:
867 case DIOCWLABEL:
868 case DIOCGDEFLABEL:
869 case RAIDFRAME_SHUTDOWN:
870 case RAIDFRAME_REWRITEPARITY:
871 case RAIDFRAME_GET_INFO:
872 case RAIDFRAME_RESET_ACCTOTALS:
873 case RAIDFRAME_GET_ACCTOTALS:
874 case RAIDFRAME_KEEP_ACCTOTALS:
875 case RAIDFRAME_GET_SIZE:
876 case RAIDFRAME_FAIL_DISK:
877 case RAIDFRAME_COPYBACK:
878 case RAIDFRAME_CHECK_RECON_STATUS:
879 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
880 case RAIDFRAME_GET_COMPONENT_LABEL:
881 case RAIDFRAME_SET_COMPONENT_LABEL:
882 case RAIDFRAME_ADD_HOT_SPARE:
883 case RAIDFRAME_REMOVE_HOT_SPARE:
884 case RAIDFRAME_INIT_LABELS:
885 case RAIDFRAME_REBUILD_IN_PLACE:
886 case RAIDFRAME_CHECK_PARITY:
887 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
888 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
889 case RAIDFRAME_CHECK_COPYBACK_STATUS:
890 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
891 case RAIDFRAME_SET_AUTOCONFIG:
892 case RAIDFRAME_SET_ROOT:
893 case RAIDFRAME_DELETE_COMPONENT:
894 case RAIDFRAME_INCORPORATE_HOT_SPARE:
895 if ((rs->sc_flags & RAIDF_INITED) == 0)
896 return (ENXIO);
897 }
898
899 switch (cmd) {
900
901 /* configure the system */
902 case RAIDFRAME_CONFIGURE:
903
904 if (raidPtr->valid) {
905 /* There is a valid RAID set running on this unit! */
906 printf("raid%d: Device already configured!\n",unit);
907 return(EINVAL);
908 }
909
910 /* copy-in the configuration information */
911 /* data points to a pointer to the configuration structure */
912
913 u_cfg = *((RF_Config_t **) data);
914 RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
915 if (k_cfg == NULL) {
916 return (ENOMEM);
917 }
918 retcode = copyin(u_cfg, k_cfg, sizeof(RF_Config_t));
919 if (retcode) {
920 RF_Free(k_cfg, sizeof(RF_Config_t));
921 db1_printf(("rf_ioctl: retcode=%d copyin.1\n",
922 retcode));
923 return (retcode);
924 }
925 /* allocate a buffer for the layout-specific data, and copy it
926 * in */
927 if (k_cfg->layoutSpecificSize) {
928 if (k_cfg->layoutSpecificSize > 10000) {
929 /* sanity check */
930 RF_Free(k_cfg, sizeof(RF_Config_t));
931 return (EINVAL);
932 }
933 RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
934 (u_char *));
935 if (specific_buf == NULL) {
936 RF_Free(k_cfg, sizeof(RF_Config_t));
937 return (ENOMEM);
938 }
939 retcode = copyin(k_cfg->layoutSpecific, specific_buf,
940 k_cfg->layoutSpecificSize);
941 if (retcode) {
942 RF_Free(k_cfg, sizeof(RF_Config_t));
943 RF_Free(specific_buf,
944 k_cfg->layoutSpecificSize);
945 db1_printf(("rf_ioctl: retcode=%d copyin.2\n",
946 retcode));
947 return (retcode);
948 }
949 } else
950 specific_buf = NULL;
951 k_cfg->layoutSpecific = specific_buf;
952
953 /* should do some kind of sanity check on the configuration.
954 * Store the sum of all the bytes in the last byte? */
955
956 /* configure the system */
957
958 /*
959 * Clear the entire RAID descriptor, just to make sure
960 * there is no stale data left in the case of a
961 * reconfiguration
962 */
963 memset((char *) raidPtr, 0, sizeof(RF_Raid_t));
964 raidPtr->raidid = unit;
965
966 retcode = rf_Configure(raidPtr, k_cfg, NULL);
967
968 if (retcode == 0) {
969
970 /* allow this many simultaneous IO's to
971 this RAID device */
972 raidPtr->openings = RAIDOUTSTANDING;
973
974 raidinit(raidPtr);
975 rf_markalldirty(raidPtr);
976 }
977 /* free the buffers. No return code here. */
978 if (k_cfg->layoutSpecificSize) {
979 RF_Free(specific_buf, k_cfg->layoutSpecificSize);
980 }
981 RF_Free(k_cfg, sizeof(RF_Config_t));
982
983 return (retcode);
984
985 /* shutdown the system */
986 case RAIDFRAME_SHUTDOWN:
987
988 if ((error = raidlock(rs)) != 0)
989 return (error);
990
991 /*
992 * If somebody has a partition mounted, we shouldn't
993 * shutdown.
994 */
995
996 part = DISKPART(dev);
997 pmask = (1 << part);
998 if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
999 ((rs->sc_dkdev.dk_bopenmask & pmask) &&
1000 (rs->sc_dkdev.dk_copenmask & pmask))) {
1001 raidunlock(rs);
1002 return (EBUSY);
1003 }
1004
1005 retcode = rf_Shutdown(raidPtr);
1006
1007 /* It's no longer initialized... */
1008 rs->sc_flags &= ~RAIDF_INITED;
1009
1010 /* Detach the disk. */
1011 disk_detach(&rs->sc_dkdev);
1012
1013 raidunlock(rs);
1014
1015 return (retcode);
1016 case RAIDFRAME_GET_COMPONENT_LABEL:
1017 clabel_ptr = (RF_ComponentLabel_t **) data;
1018 /* need to read the component label for the disk indicated
1019 by row,column in clabel */
1020
1021 /* For practice, let's get it directly fromdisk, rather
1022 than from the in-core copy */
1023 RF_Malloc( clabel, sizeof( RF_ComponentLabel_t ),
1024 (RF_ComponentLabel_t *));
1025 if (clabel == NULL)
1026 return (ENOMEM);
1027
1028 memset((char *) clabel, 0, sizeof(RF_ComponentLabel_t));
1029
1030 retcode = copyin( *clabel_ptr, clabel,
1031 sizeof(RF_ComponentLabel_t));
1032
1033 if (retcode) {
1034 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
1035 return(retcode);
1036 }
1037
1038 clabel->row = 0; /* Don't allow looking at anything else.*/
1039
1040 column = clabel->column;
1041
1042 if ((column < 0) || (column >= raidPtr->numCol +
1043 raidPtr->numSpare)) {
1044 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
1045 return(EINVAL);
1046 }
1047
1048 raidread_component_label(raidPtr->Disks[column].dev,
1049 raidPtr->raid_cinfo[column].ci_vp,
1050 clabel );
1051
1052 retcode = copyout(clabel, *clabel_ptr,
1053 sizeof(RF_ComponentLabel_t));
1054 RF_Free(clabel, sizeof(RF_ComponentLabel_t));
1055 return (retcode);
1056
1057 case RAIDFRAME_SET_COMPONENT_LABEL:
1058 clabel = (RF_ComponentLabel_t *) data;
1059
1060 /* XXX check the label for valid stuff... */
1061 /* Note that some things *should not* get modified --
1062 the user should be re-initing the labels instead of
1063 trying to patch things.
1064 */
1065
1066 raidid = raidPtr->raidid;
1067 printf("raid%d: Got component label:\n", raidid);
1068 printf("raid%d: Version: %d\n", raidid, clabel->version);
1069 printf("raid%d: Serial Number: %d\n", raidid, clabel->serial_number);
1070 printf("raid%d: Mod counter: %d\n", raidid, clabel->mod_counter);
1071 printf("raid%d: Column: %d\n", raidid, clabel->column);
1072 printf("raid%d: Num Columns: %d\n", raidid, clabel->num_columns);
1073 printf("raid%d: Clean: %d\n", raidid, clabel->clean);
1074 printf("raid%d: Status: %d\n", raidid, clabel->status);
1075
1076 clabel->row = 0;
1077 column = clabel->column;
1078
1079 if ((column < 0) || (column >= raidPtr->numCol)) {
1080 return(EINVAL);
1081 }
1082
1083 /* XXX this isn't allowed to do anything for now :-) */
1084
1085 /* XXX and before it is, we need to fill in the rest
1086 of the fields!?!?!?! */
1087 #if 0
1088 raidwrite_component_label(
1089 raidPtr->Disks[column].dev,
1090 raidPtr->raid_cinfo[column].ci_vp,
1091 clabel );
1092 #endif
1093 return (0);
1094
1095 case RAIDFRAME_INIT_LABELS:
1096 clabel = (RF_ComponentLabel_t *) data;
1097 /*
1098 we only want the serial number from
1099 the above. We get all the rest of the information
1100 from the config that was used to create this RAID
1101 set.
1102 */
1103
1104 raidPtr->serial_number = clabel->serial_number;
1105
1106 raid_init_component_label(raidPtr, &ci_label);
1107 ci_label.serial_number = clabel->serial_number;
1108 ci_label.row = 0; /* we dont' pretend to support more */
1109
1110 for(column=0;column<raidPtr->numCol;column++) {
1111 diskPtr = &raidPtr->Disks[column];
1112 if (!RF_DEAD_DISK(diskPtr->status)) {
1113 ci_label.partitionSize = diskPtr->partitionSize;
1114 ci_label.column = column;
1115 raidwrite_component_label(
1116 raidPtr->Disks[column].dev,
1117 raidPtr->raid_cinfo[column].ci_vp,
1118 &ci_label );
1119 }
1120 }
1121
1122 return (retcode);
1123 case RAIDFRAME_SET_AUTOCONFIG:
1124 d = rf_set_autoconfig(raidPtr, *(int *) data);
1125 printf("raid%d: New autoconfig value is: %d\n",
1126 raidPtr->raidid, d);
1127 *(int *) data = d;
1128 return (retcode);
1129
1130 case RAIDFRAME_SET_ROOT:
1131 d = rf_set_rootpartition(raidPtr, *(int *) data);
1132 printf("raid%d: New rootpartition value is: %d\n",
1133 raidPtr->raidid, d);
1134 *(int *) data = d;
1135 return (retcode);
1136
1137 /* initialize all parity */
1138 case RAIDFRAME_REWRITEPARITY:
1139
1140 if (raidPtr->Layout.map->faultsTolerated == 0) {
1141 /* Parity for RAID 0 is trivially correct */
1142 raidPtr->parity_good = RF_RAID_CLEAN;
1143 return(0);
1144 }
1145
1146 if (raidPtr->parity_rewrite_in_progress == 1) {
1147 /* Re-write is already in progress! */
1148 return(EINVAL);
1149 }
1150
1151 retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
1152 rf_RewriteParityThread,
1153 raidPtr,"raid_parity");
1154 return (retcode);
1155
1156
1157 case RAIDFRAME_ADD_HOT_SPARE:
1158 sparePtr = (RF_SingleComponent_t *) data;
1159 memcpy( &hot_spare, sparePtr, sizeof(RF_SingleComponent_t));
1160 retcode = rf_add_hot_spare(raidPtr, &hot_spare);
1161 return(retcode);
1162
1163 case RAIDFRAME_REMOVE_HOT_SPARE:
1164 return(retcode);
1165
1166 case RAIDFRAME_DELETE_COMPONENT:
1167 componentPtr = (RF_SingleComponent_t *)data;
1168 memcpy( &component, componentPtr,
1169 sizeof(RF_SingleComponent_t));
1170 retcode = rf_delete_component(raidPtr, &component);
1171 return(retcode);
1172
1173 case RAIDFRAME_INCORPORATE_HOT_SPARE:
1174 componentPtr = (RF_SingleComponent_t *)data;
1175 memcpy( &component, componentPtr,
1176 sizeof(RF_SingleComponent_t));
1177 retcode = rf_incorporate_hot_spare(raidPtr, &component);
1178 return(retcode);
1179
1180 case RAIDFRAME_REBUILD_IN_PLACE:
1181
1182 if (raidPtr->Layout.map->faultsTolerated == 0) {
1183 /* Can't do this on a RAID 0!! */
1184 return(EINVAL);
1185 }
1186
1187 if (raidPtr->recon_in_progress == 1) {
1188 /* a reconstruct is already in progress! */
1189 return(EINVAL);
1190 }
1191
1192 componentPtr = (RF_SingleComponent_t *) data;
1193 memcpy( &component, componentPtr,
1194 sizeof(RF_SingleComponent_t));
1195 component.row = 0; /* we don't support any more */
1196 column = component.column;
1197
1198 if ((column < 0) || (column >= raidPtr->numCol)) {
1199 return(EINVAL);
1200 }
1201
1202 RF_LOCK_MUTEX(raidPtr->mutex);
1203 if ((raidPtr->Disks[column].status == rf_ds_optimal) &&
1204 (raidPtr->numFailures > 0)) {
1205 /* XXX 0 above shouldn't be constant!!! */
1206 /* some component other than this has failed.
1207 Let's not make things worse than they already
1208 are... */
1209 printf("raid%d: Unable to reconstruct to disk at:\n",
1210 raidPtr->raidid);
1211 printf("raid%d: Col: %d Too many failures.\n",
1212 raidPtr->raidid, column);
1213 RF_UNLOCK_MUTEX(raidPtr->mutex);
1214 return (EINVAL);
1215 }
1216 if (raidPtr->Disks[column].status ==
1217 rf_ds_reconstructing) {
1218 printf("raid%d: Unable to reconstruct to disk at:\n",
1219 raidPtr->raidid);
1220 printf("raid%d: Col: %d Reconstruction already occuring!\n", raidPtr->raidid, column);
1221
1222 RF_UNLOCK_MUTEX(raidPtr->mutex);
1223 return (EINVAL);
1224 }
1225 if (raidPtr->Disks[column].status == rf_ds_spared) {
1226 RF_UNLOCK_MUTEX(raidPtr->mutex);
1227 return (EINVAL);
1228 }
1229 RF_UNLOCK_MUTEX(raidPtr->mutex);
1230
1231 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1232 if (rrcopy == NULL)
1233 return(ENOMEM);
1234
1235 rrcopy->raidPtr = (void *) raidPtr;
1236 rrcopy->col = column;
1237
1238 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1239 rf_ReconstructInPlaceThread,
1240 rrcopy,"raid_reconip");
1241 return(retcode);
1242
1243 case RAIDFRAME_GET_INFO:
1244 if (!raidPtr->valid)
1245 return (ENODEV);
1246 ucfgp = (RF_DeviceConfig_t **) data;
1247 RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t),
1248 (RF_DeviceConfig_t *));
1249 if (d_cfg == NULL)
1250 return (ENOMEM);
1251 memset((char *) d_cfg, 0, sizeof(RF_DeviceConfig_t));
1252 d_cfg->rows = 1; /* there is only 1 row now */
1253 d_cfg->cols = raidPtr->numCol;
1254 d_cfg->ndevs = raidPtr->numCol;
1255 if (d_cfg->ndevs >= RF_MAX_DISKS) {
1256 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1257 return (ENOMEM);
1258 }
1259 d_cfg->nspares = raidPtr->numSpare;
1260 if (d_cfg->nspares >= RF_MAX_DISKS) {
1261 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1262 return (ENOMEM);
1263 }
1264 d_cfg->maxqdepth = raidPtr->maxQueueDepth;
1265 d = 0;
1266 for (j = 0; j < d_cfg->cols; j++) {
1267 d_cfg->devs[d] = raidPtr->Disks[j];
1268 d++;
1269 }
1270 for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) {
1271 d_cfg->spares[i] = raidPtr->Disks[j];
1272 }
1273 retcode = copyout(d_cfg, *ucfgp, sizeof(RF_DeviceConfig_t));
1274 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1275
1276 return (retcode);
1277
1278 case RAIDFRAME_CHECK_PARITY:
1279 *(int *) data = raidPtr->parity_good;
1280 return (0);
1281
1282 case RAIDFRAME_RESET_ACCTOTALS:
1283 memset(&raidPtr->acc_totals, 0, sizeof(raidPtr->acc_totals));
1284 return (0);
1285
1286 case RAIDFRAME_GET_ACCTOTALS:
1287 totals = (RF_AccTotals_t *) data;
1288 *totals = raidPtr->acc_totals;
1289 return (0);
1290
1291 case RAIDFRAME_KEEP_ACCTOTALS:
1292 raidPtr->keep_acc_totals = *(int *)data;
1293 return (0);
1294
1295 case RAIDFRAME_GET_SIZE:
1296 *(int *) data = raidPtr->totalSectors;
1297 return (0);
1298
1299 /* fail a disk & optionally start reconstruction */
1300 case RAIDFRAME_FAIL_DISK:
1301
1302 if (raidPtr->Layout.map->faultsTolerated == 0) {
1303 /* Can't do this on a RAID 0!! */
1304 return(EINVAL);
1305 }
1306
1307 rr = (struct rf_recon_req *) data;
1308 rr->row = 0;
1309 if (rr->col < 0 || rr->col >= raidPtr->numCol)
1310 return (EINVAL);
1311
1312
1313 RF_LOCK_MUTEX(raidPtr->mutex);
1314 if ((raidPtr->Disks[rr->col].status ==
1315 rf_ds_optimal) && (raidPtr->numFailures > 0)) {
1316 /* some other component has failed. Let's not make
1317 things worse. XXX wrong for RAID6 */
1318 RF_UNLOCK_MUTEX(raidPtr->mutex);
1319 return (EINVAL);
1320 }
1321 if (raidPtr->Disks[rr->col].status == rf_ds_spared) {
1322 /* Can't fail a spared disk! */
1323 RF_UNLOCK_MUTEX(raidPtr->mutex);
1324 return (EINVAL);
1325 }
1326 RF_UNLOCK_MUTEX(raidPtr->mutex);
1327
1328 /* make a copy of the recon request so that we don't rely on
1329 * the user's buffer */
1330 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1331 if (rrcopy == NULL)
1332 return(ENOMEM);
1333 memcpy(rrcopy, rr, sizeof(*rr));
1334 rrcopy->raidPtr = (void *) raidPtr;
1335
1336 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1337 rf_ReconThread,
1338 rrcopy,"raid_recon");
1339 return (0);
1340
1341 /* invoke a copyback operation after recon on whatever disk
1342 * needs it, if any */
1343 case RAIDFRAME_COPYBACK:
1344
1345 if (raidPtr->Layout.map->faultsTolerated == 0) {
1346 /* This makes no sense on a RAID 0!! */
1347 return(EINVAL);
1348 }
1349
1350 if (raidPtr->copyback_in_progress == 1) {
1351 /* Copyback is already in progress! */
1352 return(EINVAL);
1353 }
1354
1355 retcode = RF_CREATE_THREAD(raidPtr->copyback_thread,
1356 rf_CopybackThread,
1357 raidPtr,"raid_copyback");
1358 return (retcode);
1359
1360 /* return the percentage completion of reconstruction */
1361 case RAIDFRAME_CHECK_RECON_STATUS:
1362 if (raidPtr->Layout.map->faultsTolerated == 0) {
1363 /* This makes no sense on a RAID 0, so tell the
1364 user it's done. */
1365 *(int *) data = 100;
1366 return(0);
1367 }
1368 if (raidPtr->status != rf_rs_reconstructing)
1369 *(int *) data = 100;
1370 else
1371 *(int *) data = raidPtr->reconControl->percentComplete;
1372 return (0);
1373 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
1374 progressInfoPtr = (RF_ProgressInfo_t **) data;
1375 if (raidPtr->status != rf_rs_reconstructing) {
1376 progressInfo.remaining = 0;
1377 progressInfo.completed = 100;
1378 progressInfo.total = 100;
1379 } else {
1380 progressInfo.total =
1381 raidPtr->reconControl->numRUsTotal;
1382 progressInfo.completed =
1383 raidPtr->reconControl->numRUsComplete;
1384 progressInfo.remaining = progressInfo.total -
1385 progressInfo.completed;
1386 }
1387 retcode = copyout(&progressInfo, *progressInfoPtr,
1388 sizeof(RF_ProgressInfo_t));
1389 return (retcode);
1390
1391 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1392 if (raidPtr->Layout.map->faultsTolerated == 0) {
1393 /* This makes no sense on a RAID 0, so tell the
1394 user it's done. */
1395 *(int *) data = 100;
1396 return(0);
1397 }
1398 if (raidPtr->parity_rewrite_in_progress == 1) {
1399 *(int *) data = 100 *
1400 raidPtr->parity_rewrite_stripes_done /
1401 raidPtr->Layout.numStripe;
1402 } else {
1403 *(int *) data = 100;
1404 }
1405 return (0);
1406
1407 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
1408 progressInfoPtr = (RF_ProgressInfo_t **) data;
1409 if (raidPtr->parity_rewrite_in_progress == 1) {
1410 progressInfo.total = raidPtr->Layout.numStripe;
1411 progressInfo.completed =
1412 raidPtr->parity_rewrite_stripes_done;
1413 progressInfo.remaining = progressInfo.total -
1414 progressInfo.completed;
1415 } else {
1416 progressInfo.remaining = 0;
1417 progressInfo.completed = 100;
1418 progressInfo.total = 100;
1419 }
1420 retcode = copyout(&progressInfo, *progressInfoPtr,
1421 sizeof(RF_ProgressInfo_t));
1422 return (retcode);
1423
1424 case RAIDFRAME_CHECK_COPYBACK_STATUS:
1425 if (raidPtr->Layout.map->faultsTolerated == 0) {
1426 /* This makes no sense on a RAID 0 */
1427 *(int *) data = 100;
1428 return(0);
1429 }
1430 if (raidPtr->copyback_in_progress == 1) {
1431 *(int *) data = 100 * raidPtr->copyback_stripes_done /
1432 raidPtr->Layout.numStripe;
1433 } else {
1434 *(int *) data = 100;
1435 }
1436 return (0);
1437
1438 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
1439 progressInfoPtr = (RF_ProgressInfo_t **) data;
1440 if (raidPtr->copyback_in_progress == 1) {
1441 progressInfo.total = raidPtr->Layout.numStripe;
1442 progressInfo.completed =
1443 raidPtr->copyback_stripes_done;
1444 progressInfo.remaining = progressInfo.total -
1445 progressInfo.completed;
1446 } else {
1447 progressInfo.remaining = 0;
1448 progressInfo.completed = 100;
1449 progressInfo.total = 100;
1450 }
1451 retcode = copyout(&progressInfo, *progressInfoPtr,
1452 sizeof(RF_ProgressInfo_t));
1453 return (retcode);
1454
1455 /* the sparetable daemon calls this to wait for the kernel to
1456 * need a spare table. this ioctl does not return until a
1457 * spare table is needed. XXX -- calling mpsleep here in the
1458 * ioctl code is almost certainly wrong and evil. -- XXX XXX
1459 * -- I should either compute the spare table in the kernel,
1460 * or have a different -- XXX XXX -- interface (a different
1461 * character device) for delivering the table -- XXX */
1462 #if 0
1463 case RAIDFRAME_SPARET_WAIT:
1464 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1465 while (!rf_sparet_wait_queue)
1466 mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE);
1467 waitreq = rf_sparet_wait_queue;
1468 rf_sparet_wait_queue = rf_sparet_wait_queue->next;
1469 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1470
1471 /* structure assignment */
1472 *((RF_SparetWait_t *) data) = *waitreq;
1473
1474 RF_Free(waitreq, sizeof(*waitreq));
1475 return (0);
1476
1477 /* wakes up a process waiting on SPARET_WAIT and puts an error
1478 * code in it that will cause the dameon to exit */
1479 case RAIDFRAME_ABORT_SPARET_WAIT:
1480 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1481 waitreq->fcol = -1;
1482 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1483 waitreq->next = rf_sparet_wait_queue;
1484 rf_sparet_wait_queue = waitreq;
1485 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1486 wakeup(&rf_sparet_wait_queue);
1487 return (0);
1488
1489 /* used by the spare table daemon to deliver a spare table
1490 * into the kernel */
1491 case RAIDFRAME_SEND_SPARET:
1492
1493 /* install the spare table */
1494 retcode = rf_SetSpareTable(raidPtr, *(void **) data);
1495
1496 /* respond to the requestor. the return status of the spare
1497 * table installation is passed in the "fcol" field */
1498 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1499 waitreq->fcol = retcode;
1500 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1501 waitreq->next = rf_sparet_resp_queue;
1502 rf_sparet_resp_queue = waitreq;
1503 wakeup(&rf_sparet_resp_queue);
1504 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1505
1506 return (retcode);
1507 #endif
1508
1509 default:
1510 break; /* fall through to the os-specific code below */
1511
1512 }
1513
1514 if (!raidPtr->valid)
1515 return (EINVAL);
1516
1517 /*
1518 * Add support for "regular" device ioctls here.
1519 */
1520
1521 switch (cmd) {
1522 case DIOCGDINFO:
1523 *(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
1524 break;
1525 #ifdef __HAVE_OLD_DISKLABEL
1526 case ODIOCGDINFO:
1527 newlabel = *(rs->sc_dkdev.dk_label);
1528 if (newlabel.d_npartitions > OLDMAXPARTITIONS)
1529 return ENOTTY;
1530 memcpy(data, &newlabel, sizeof (struct olddisklabel));
1531 break;
1532 #endif
1533
1534 case DIOCGPART:
1535 ((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
1536 ((struct partinfo *) data)->part =
1537 &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
1538 break;
1539
1540 case DIOCWDINFO:
1541 case DIOCSDINFO:
1542 #ifdef __HAVE_OLD_DISKLABEL
1543 case ODIOCWDINFO:
1544 case ODIOCSDINFO:
1545 #endif
1546 {
1547 struct disklabel *lp;
1548 #ifdef __HAVE_OLD_DISKLABEL
1549 if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) {
1550 memset(&newlabel, 0, sizeof newlabel);
1551 memcpy(&newlabel, data, sizeof (struct olddisklabel));
1552 lp = &newlabel;
1553 } else
1554 #endif
1555 lp = (struct disklabel *)data;
1556
1557 if ((error = raidlock(rs)) != 0)
1558 return (error);
1559
1560 rs->sc_flags |= RAIDF_LABELLING;
1561
1562 error = setdisklabel(rs->sc_dkdev.dk_label,
1563 lp, 0, rs->sc_dkdev.dk_cpulabel);
1564 if (error == 0) {
1565 if (cmd == DIOCWDINFO
1566 #ifdef __HAVE_OLD_DISKLABEL
1567 || cmd == ODIOCWDINFO
1568 #endif
1569 )
1570 error = writedisklabel(RAIDLABELDEV(dev),
1571 raidstrategy, rs->sc_dkdev.dk_label,
1572 rs->sc_dkdev.dk_cpulabel);
1573 }
1574 rs->sc_flags &= ~RAIDF_LABELLING;
1575
1576 raidunlock(rs);
1577
1578 if (error)
1579 return (error);
1580 break;
1581 }
1582
1583 case DIOCWLABEL:
1584 if (*(int *) data != 0)
1585 rs->sc_flags |= RAIDF_WLABEL;
1586 else
1587 rs->sc_flags &= ~RAIDF_WLABEL;
1588 break;
1589
1590 case DIOCGDEFLABEL:
1591 raidgetdefaultlabel(raidPtr, rs, (struct disklabel *) data);
1592 break;
1593
1594 #ifdef __HAVE_OLD_DISKLABEL
1595 case ODIOCGDEFLABEL:
1596 raidgetdefaultlabel(raidPtr, rs, &newlabel);
1597 if (newlabel.d_npartitions > OLDMAXPARTITIONS)
1598 return ENOTTY;
1599 memcpy(data, &newlabel, sizeof (struct olddisklabel));
1600 break;
1601 #endif
1602
1603 default:
1604 retcode = ENOTTY;
1605 }
1606 return (retcode);
1607
1608 }
1609
1610
1611 /* raidinit -- complete the rest of the initialization for the
1612 RAIDframe device. */
1613
1614
1615 static void
1616 raidinit(raidPtr)
1617 RF_Raid_t *raidPtr;
1618 {
1619 struct raid_softc *rs;
1620 int unit;
1621
1622 unit = raidPtr->raidid;
1623
1624 rs = &raid_softc[unit];
1625
1626 /* XXX should check return code first... */
1627 rs->sc_flags |= RAIDF_INITED;
1628
1629 sprintf(rs->sc_xname, "raid%d", unit); /* XXX doesn't check bounds. */
1630
1631 rs->sc_dkdev.dk_name = rs->sc_xname;
1632
1633 /* disk_attach actually creates space for the CPU disklabel, among
1634 * other things, so it's critical to call this *BEFORE* we try putzing
1635 * with disklabels. */
1636
1637 disk_attach(&rs->sc_dkdev);
1638
1639 /* XXX There may be a weird interaction here between this, and
1640 * protectedSectors, as used in RAIDframe. */
1641
1642 rs->sc_size = raidPtr->totalSectors;
1643
1644 }
1645 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
1646 /* wake up the daemon & tell it to get us a spare table
1647 * XXX
1648 * the entries in the queues should be tagged with the raidPtr
1649 * so that in the extremely rare case that two recons happen at once,
1650 * we know for which device were requesting a spare table
1651 * XXX
1652 *
1653 * XXX This code is not currently used. GO
1654 */
1655 int
1656 rf_GetSpareTableFromDaemon(req)
1657 RF_SparetWait_t *req;
1658 {
1659 int retcode;
1660
1661 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1662 req->next = rf_sparet_wait_queue;
1663 rf_sparet_wait_queue = req;
1664 wakeup(&rf_sparet_wait_queue);
1665
1666 /* mpsleep unlocks the mutex */
1667 while (!rf_sparet_resp_queue) {
1668 tsleep(&rf_sparet_resp_queue, PRIBIO,
1669 "raidframe getsparetable", 0);
1670 }
1671 req = rf_sparet_resp_queue;
1672 rf_sparet_resp_queue = req->next;
1673 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1674
1675 retcode = req->fcol;
1676 RF_Free(req, sizeof(*req)); /* this is not the same req as we
1677 * alloc'd */
1678 return (retcode);
1679 }
1680 #endif
1681
1682 /* a wrapper around rf_DoAccess that extracts appropriate info from the
1683 * bp & passes it down.
1684 * any calls originating in the kernel must use non-blocking I/O
1685 * do some extra sanity checking to return "appropriate" error values for
1686 * certain conditions (to make some standard utilities work)
1687 *
1688 * Formerly known as: rf_DoAccessKernel
1689 */
1690 void
1691 raidstart(raidPtr)
1692 RF_Raid_t *raidPtr;
1693 {
1694 RF_SectorCount_t num_blocks, pb, sum;
1695 RF_RaidAddr_t raid_addr;
1696 struct partition *pp;
1697 daddr_t blocknum;
1698 int unit;
1699 struct raid_softc *rs;
1700 int do_async;
1701 struct buf *bp;
1702
1703 unit = raidPtr->raidid;
1704 rs = &raid_softc[unit];
1705
1706 /* quick check to see if anything has died recently */
1707 RF_LOCK_MUTEX(raidPtr->mutex);
1708 if (raidPtr->numNewFailures > 0) {
1709 RF_UNLOCK_MUTEX(raidPtr->mutex);
1710 rf_update_component_labels(raidPtr,
1711 RF_NORMAL_COMPONENT_UPDATE);
1712 RF_LOCK_MUTEX(raidPtr->mutex);
1713 raidPtr->numNewFailures--;
1714 }
1715
1716 /* Check to see if we're at the limit... */
1717 while (raidPtr->openings > 0) {
1718 RF_UNLOCK_MUTEX(raidPtr->mutex);
1719
1720 /* get the next item, if any, from the queue */
1721 if ((bp = BUFQ_GET(&rs->buf_queue)) == NULL) {
1722 /* nothing more to do */
1723 return;
1724 }
1725
1726 /* Ok, for the bp we have here, bp->b_blkno is relative to the
1727 * partition.. Need to make it absolute to the underlying
1728 * device.. */
1729
1730 blocknum = bp->b_blkno;
1731 if (DISKPART(bp->b_dev) != RAW_PART) {
1732 pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
1733 blocknum += pp->p_offset;
1734 }
1735
1736 db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
1737 (int) blocknum));
1738
1739 db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
1740 db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
1741
1742 /* *THIS* is where we adjust what block we're going to...
1743 * but DO NOT TOUCH bp->b_blkno!!! */
1744 raid_addr = blocknum;
1745
1746 num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
1747 pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
1748 sum = raid_addr + num_blocks + pb;
1749 if (1 || rf_debugKernelAccess) {
1750 db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
1751 (int) raid_addr, (int) sum, (int) num_blocks,
1752 (int) pb, (int) bp->b_resid));
1753 }
1754 if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
1755 || (sum < num_blocks) || (sum < pb)) {
1756 bp->b_error = ENOSPC;
1757 bp->b_flags |= B_ERROR;
1758 bp->b_resid = bp->b_bcount;
1759 biodone(bp);
1760 RF_LOCK_MUTEX(raidPtr->mutex);
1761 continue;
1762 }
1763 /*
1764 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
1765 */
1766
1767 if (bp->b_bcount & raidPtr->sectorMask) {
1768 bp->b_error = EINVAL;
1769 bp->b_flags |= B_ERROR;
1770 bp->b_resid = bp->b_bcount;
1771 biodone(bp);
1772 RF_LOCK_MUTEX(raidPtr->mutex);
1773 continue;
1774
1775 }
1776 db1_printf(("Calling DoAccess..\n"));
1777
1778
1779 RF_LOCK_MUTEX(raidPtr->mutex);
1780 raidPtr->openings--;
1781 RF_UNLOCK_MUTEX(raidPtr->mutex);
1782
1783 /*
1784 * Everything is async.
1785 */
1786 do_async = 1;
1787
1788 disk_busy(&rs->sc_dkdev);
1789
1790 /* XXX we're still at splbio() here... do we *really*
1791 need to be? */
1792
1793 /* don't ever condition on bp->b_flags & B_WRITE.
1794 * always condition on B_READ instead */
1795
1796 bp->b_error = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
1797 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
1798 do_async, raid_addr, num_blocks,
1799 bp->b_data, bp, RF_DAG_NONBLOCKING_IO);
1800
1801 if (bp->b_error) {
1802 bp->b_flags |= B_ERROR;
1803 }
1804
1805 RF_LOCK_MUTEX(raidPtr->mutex);
1806 }
1807 RF_UNLOCK_MUTEX(raidPtr->mutex);
1808 }
1809
1810
1811
1812
1813 /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */
1814
1815 int
1816 rf_DispatchKernelIO(queue, req)
1817 RF_DiskQueue_t *queue;
1818 RF_DiskQueueData_t *req;
1819 {
1820 int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
1821 struct buf *bp;
1822 struct raidbuf *raidbp = NULL;
1823
1824 req->queue = queue;
1825
1826 #if DIAGNOSTIC
1827 if (queue->raidPtr->raidid >= numraid) {
1828 printf("Invalid unit number: %d %d\n", queue->raidPtr->raidid,
1829 numraid);
1830 panic("Invalid Unit number in rf_DispatchKernelIO");
1831 }
1832 #endif
1833
1834 bp = req->bp;
1835 #if 1
1836 /* XXX when there is a physical disk failure, someone is passing us a
1837 * buffer that contains old stuff!! Attempt to deal with this problem
1838 * without taking a performance hit... (not sure where the real bug
1839 * is. It's buried in RAIDframe somewhere) :-( GO ) */
1840
1841 if (bp->b_flags & B_ERROR) {
1842 bp->b_flags &= ~B_ERROR;
1843 }
1844 if (bp->b_error != 0) {
1845 bp->b_error = 0;
1846 }
1847 #endif
1848 raidbp = pool_get(&raidframe_cbufpool, PR_NOWAIT);
1849 if (raidbp == NULL) {
1850 bp->b_flags |= B_ERROR;
1851 bp->b_error = ENOMEM;
1852 return (ENOMEM);
1853 }
1854 BUF_INIT(&raidbp->rf_buf);
1855
1856 /*
1857 * context for raidiodone
1858 */
1859 raidbp->rf_obp = bp;
1860 raidbp->req = req;
1861
1862 switch (req->type) {
1863 case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */
1864 /* XXX need to do something extra here.. */
1865 /* I'm leaving this in, as I've never actually seen it used,
1866 * and I'd like folks to report it... GO */
1867 printf(("WAKEUP CALLED\n"));
1868 queue->numOutstanding++;
1869
1870 /* XXX need to glue the original buffer into this?? */
1871
1872 KernelWakeupFunc(&raidbp->rf_buf);
1873 break;
1874
1875 case RF_IO_TYPE_READ:
1876 case RF_IO_TYPE_WRITE:
1877
1878 if (req->tracerec) {
1879 RF_ETIMER_START(req->tracerec->timer);
1880 }
1881 InitBP(&raidbp->rf_buf, queue->rf_cinfo->ci_vp,
1882 op | bp->b_flags, queue->rf_cinfo->ci_dev,
1883 req->sectorOffset, req->numSector,
1884 req->buf, KernelWakeupFunc, (void *) req,
1885 queue->raidPtr->logBytesPerSector, req->b_proc);
1886
1887 if (rf_debugKernelAccess) {
1888 db1_printf(("dispatch: bp->b_blkno = %ld\n",
1889 (long) bp->b_blkno));
1890 }
1891 queue->numOutstanding++;
1892 queue->last_deq_sector = req->sectorOffset;
1893 /* acc wouldn't have been let in if there were any pending
1894 * reqs at any other priority */
1895 queue->curPriority = req->priority;
1896
1897 db1_printf(("Going for %c to unit %d col %d\n",
1898 req->type, queue->raidPtr->raidid,
1899 queue->col));
1900 db1_printf(("sector %d count %d (%d bytes) %d\n",
1901 (int) req->sectorOffset, (int) req->numSector,
1902 (int) (req->numSector <<
1903 queue->raidPtr->logBytesPerSector),
1904 (int) queue->raidPtr->logBytesPerSector));
1905 if ((raidbp->rf_buf.b_flags & B_READ) == 0) {
1906 raidbp->rf_buf.b_vp->v_numoutput++;
1907 }
1908 VOP_STRATEGY(&raidbp->rf_buf);
1909
1910 break;
1911
1912 default:
1913 panic("bad req->type in rf_DispatchKernelIO");
1914 }
1915 db1_printf(("Exiting from DispatchKernelIO\n"));
1916
1917 return (0);
1918 }
1919 /* this is the callback function associated with a I/O invoked from
1920 kernel code.
1921 */
1922 static void
1923 KernelWakeupFunc(vbp)
1924 struct buf *vbp;
1925 {
1926 RF_DiskQueueData_t *req = NULL;
1927 RF_DiskQueue_t *queue;
1928 struct raidbuf *raidbp = (struct raidbuf *) vbp;
1929 struct buf *bp;
1930 int s;
1931
1932 s = splbio();
1933 db1_printf(("recovering the request queue:\n"));
1934 req = raidbp->req;
1935
1936 bp = raidbp->rf_obp;
1937
1938 queue = (RF_DiskQueue_t *) req->queue;
1939
1940 if (raidbp->rf_buf.b_flags & B_ERROR) {
1941 bp->b_flags |= B_ERROR;
1942 bp->b_error = raidbp->rf_buf.b_error ?
1943 raidbp->rf_buf.b_error : EIO;
1944 }
1945
1946 /* XXX methinks this could be wrong... */
1947 #if 1
1948 bp->b_resid = raidbp->rf_buf.b_resid;
1949 #endif
1950
1951 if (req->tracerec) {
1952 RF_ETIMER_STOP(req->tracerec->timer);
1953 RF_ETIMER_EVAL(req->tracerec->timer);
1954 RF_LOCK_MUTEX(rf_tracing_mutex);
1955 req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1956 req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1957 req->tracerec->num_phys_ios++;
1958 RF_UNLOCK_MUTEX(rf_tracing_mutex);
1959 }
1960 bp->b_bcount = raidbp->rf_buf.b_bcount; /* XXXX ?? */
1961
1962 /* XXX Ok, let's get aggressive... If B_ERROR is set, let's go
1963 * ballistic, and mark the component as hosed... */
1964
1965 if (bp->b_flags & B_ERROR) {
1966 /* Mark the disk as dead */
1967 /* but only mark it once... */
1968 if (queue->raidPtr->Disks[queue->col].status ==
1969 rf_ds_optimal) {
1970 printf("raid%d: IO Error. Marking %s as failed.\n",
1971 queue->raidPtr->raidid,
1972 queue->raidPtr->Disks[queue->col].devname);
1973 queue->raidPtr->Disks[queue->col].status =
1974 rf_ds_failed;
1975 queue->raidPtr->status = rf_rs_degraded;
1976 queue->raidPtr->numFailures++;
1977 queue->raidPtr->numNewFailures++;
1978 } else { /* Disk is already dead... */
1979 /* printf("Disk already marked as dead!\n"); */
1980 }
1981
1982 }
1983
1984 pool_put(&raidframe_cbufpool, raidbp);
1985
1986 /* Fill in the error value */
1987
1988 req->error = (bp->b_flags & B_ERROR) ? bp->b_error : 0;
1989
1990 simple_lock(&queue->raidPtr->iodone_lock);
1991
1992 /* Drop this one on the "finished" queue... */
1993 TAILQ_INSERT_TAIL(&(queue->raidPtr->iodone), req, iodone_entries);
1994
1995 /* Let the raidio thread know there is work to be done. */
1996 wakeup(&(queue->raidPtr->iodone));
1997
1998 simple_unlock(&queue->raidPtr->iodone_lock);
1999
2000 splx(s);
2001 }
2002
2003
2004
2005 /*
2006 * initialize a buf structure for doing an I/O in the kernel.
2007 */
2008 static void
2009 InitBP(bp, b_vp, rw_flag, dev, startSect, numSect, buf, cbFunc, cbArg,
2010 logBytesPerSector, b_proc)
2011 struct buf *bp;
2012 struct vnode *b_vp;
2013 unsigned rw_flag;
2014 dev_t dev;
2015 RF_SectorNum_t startSect;
2016 RF_SectorCount_t numSect;
2017 caddr_t buf;
2018 void (*cbFunc) (struct buf *);
2019 void *cbArg;
2020 int logBytesPerSector;
2021 struct proc *b_proc;
2022 {
2023 /* bp->b_flags = B_PHYS | rw_flag; */
2024 bp->b_flags = B_CALL | rw_flag; /* XXX need B_PHYS here too??? */
2025 bp->b_bcount = numSect << logBytesPerSector;
2026 bp->b_bufsize = bp->b_bcount;
2027 bp->b_error = 0;
2028 bp->b_dev = dev;
2029 bp->b_data = buf;
2030 bp->b_blkno = startSect;
2031 bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */
2032 if (bp->b_bcount == 0) {
2033 panic("bp->b_bcount is zero in InitBP!!");
2034 }
2035 bp->b_proc = b_proc;
2036 bp->b_iodone = cbFunc;
2037 bp->b_vp = b_vp;
2038
2039 }
2040
2041 static void
2042 raidgetdefaultlabel(raidPtr, rs, lp)
2043 RF_Raid_t *raidPtr;
2044 struct raid_softc *rs;
2045 struct disklabel *lp;
2046 {
2047 memset(lp, 0, sizeof(*lp));
2048
2049 /* fabricate a label... */
2050 lp->d_secperunit = raidPtr->totalSectors;
2051 lp->d_secsize = raidPtr->bytesPerSector;
2052 lp->d_nsectors = raidPtr->Layout.dataSectorsPerStripe;
2053 lp->d_ntracks = 4 * raidPtr->numCol;
2054 lp->d_ncylinders = raidPtr->totalSectors /
2055 (lp->d_nsectors * lp->d_ntracks);
2056 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
2057
2058 strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
2059 lp->d_type = DTYPE_RAID;
2060 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
2061 lp->d_rpm = 3600;
2062 lp->d_interleave = 1;
2063 lp->d_flags = 0;
2064
2065 lp->d_partitions[RAW_PART].p_offset = 0;
2066 lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors;
2067 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
2068 lp->d_npartitions = RAW_PART + 1;
2069
2070 lp->d_magic = DISKMAGIC;
2071 lp->d_magic2 = DISKMAGIC;
2072 lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
2073
2074 }
2075 /*
2076 * Read the disklabel from the raid device. If one is not present, fake one
2077 * up.
2078 */
2079 static void
2080 raidgetdisklabel(dev)
2081 dev_t dev;
2082 {
2083 int unit = raidunit(dev);
2084 struct raid_softc *rs = &raid_softc[unit];
2085 const char *errstring;
2086 struct disklabel *lp = rs->sc_dkdev.dk_label;
2087 struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel;
2088 RF_Raid_t *raidPtr;
2089
2090 db1_printf(("Getting the disklabel...\n"));
2091
2092 memset(clp, 0, sizeof(*clp));
2093
2094 raidPtr = raidPtrs[unit];
2095
2096 raidgetdefaultlabel(raidPtr, rs, lp);
2097
2098 /*
2099 * Call the generic disklabel extraction routine.
2100 */
2101 errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
2102 rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
2103 if (errstring)
2104 raidmakedisklabel(rs);
2105 else {
2106 int i;
2107 struct partition *pp;
2108
2109 /*
2110 * Sanity check whether the found disklabel is valid.
2111 *
2112 * This is necessary since total size of the raid device
2113 * may vary when an interleave is changed even though exactly
2114 * same componets are used, and old disklabel may used
2115 * if that is found.
2116 */
2117 if (lp->d_secperunit != rs->sc_size)
2118 printf("raid%d: WARNING: %s: "
2119 "total sector size in disklabel (%d) != "
2120 "the size of raid (%ld)\n", unit, rs->sc_xname,
2121 lp->d_secperunit, (long) rs->sc_size);
2122 for (i = 0; i < lp->d_npartitions; i++) {
2123 pp = &lp->d_partitions[i];
2124 if (pp->p_offset + pp->p_size > rs->sc_size)
2125 printf("raid%d: WARNING: %s: end of partition `%c' "
2126 "exceeds the size of raid (%ld)\n",
2127 unit, rs->sc_xname, 'a' + i, (long) rs->sc_size);
2128 }
2129 }
2130
2131 }
2132 /*
2133 * Take care of things one might want to take care of in the event
2134 * that a disklabel isn't present.
2135 */
2136 static void
2137 raidmakedisklabel(rs)
2138 struct raid_softc *rs;
2139 {
2140 struct disklabel *lp = rs->sc_dkdev.dk_label;
2141 db1_printf(("Making a label..\n"));
2142
2143 /*
2144 * For historical reasons, if there's no disklabel present
2145 * the raw partition must be marked FS_BSDFFS.
2146 */
2147
2148 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
2149
2150 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
2151
2152 lp->d_checksum = dkcksum(lp);
2153 }
2154 /*
2155 * Lookup the provided name in the filesystem. If the file exists,
2156 * is a valid block device, and isn't being used by anyone else,
2157 * set *vpp to the file's vnode.
2158 * You'll find the original of this in ccd.c
2159 */
2160 int
2161 raidlookup(path, p, vpp)
2162 char *path;
2163 struct proc *p;
2164 struct vnode **vpp; /* result */
2165 {
2166 struct nameidata nd;
2167 struct vnode *vp;
2168 struct vattr va;
2169 int error;
2170
2171 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, path, p);
2172 if ((error = vn_open(&nd, FREAD | FWRITE, 0)) != 0) {
2173 return (error);
2174 }
2175 vp = nd.ni_vp;
2176 if (vp->v_usecount > 1) {
2177 VOP_UNLOCK(vp, 0);
2178 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2179 return (EBUSY);
2180 }
2181 if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)) != 0) {
2182 VOP_UNLOCK(vp, 0);
2183 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2184 return (error);
2185 }
2186 /* XXX: eventually we should handle VREG, too. */
2187 if (va.va_type != VBLK) {
2188 VOP_UNLOCK(vp, 0);
2189 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2190 return (ENOTBLK);
2191 }
2192 VOP_UNLOCK(vp, 0);
2193 *vpp = vp;
2194 return (0);
2195 }
2196 /*
2197 * Wait interruptibly for an exclusive lock.
2198 *
2199 * XXX
2200 * Several drivers do this; it should be abstracted and made MP-safe.
2201 * (Hmm... where have we seen this warning before :-> GO )
2202 */
2203 static int
2204 raidlock(rs)
2205 struct raid_softc *rs;
2206 {
2207 int error;
2208
2209 while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
2210 rs->sc_flags |= RAIDF_WANTED;
2211 if ((error =
2212 tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
2213 return (error);
2214 }
2215 rs->sc_flags |= RAIDF_LOCKED;
2216 return (0);
2217 }
2218 /*
2219 * Unlock and wake up any waiters.
2220 */
2221 static void
2222 raidunlock(rs)
2223 struct raid_softc *rs;
2224 {
2225
2226 rs->sc_flags &= ~RAIDF_LOCKED;
2227 if ((rs->sc_flags & RAIDF_WANTED) != 0) {
2228 rs->sc_flags &= ~RAIDF_WANTED;
2229 wakeup(rs);
2230 }
2231 }
2232
2233
2234 #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */
2235 #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */
2236
2237 int
2238 raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter)
2239 {
2240 RF_ComponentLabel_t clabel;
2241 raidread_component_label(dev, b_vp, &clabel);
2242 clabel.mod_counter = mod_counter;
2243 clabel.clean = RF_RAID_CLEAN;
2244 raidwrite_component_label(dev, b_vp, &clabel);
2245 return(0);
2246 }
2247
2248
2249 int
2250 raidmarkdirty(dev_t dev, struct vnode *b_vp, int mod_counter)
2251 {
2252 RF_ComponentLabel_t clabel;
2253 raidread_component_label(dev, b_vp, &clabel);
2254 clabel.mod_counter = mod_counter;
2255 clabel.clean = RF_RAID_DIRTY;
2256 raidwrite_component_label(dev, b_vp, &clabel);
2257 return(0);
2258 }
2259
2260 /* ARGSUSED */
2261 int
2262 raidread_component_label(dev, b_vp, clabel)
2263 dev_t dev;
2264 struct vnode *b_vp;
2265 RF_ComponentLabel_t *clabel;
2266 {
2267 struct buf *bp;
2268 const struct bdevsw *bdev;
2269 int error;
2270
2271 /* XXX should probably ensure that we don't try to do this if
2272 someone has changed rf_protected_sectors. */
2273
2274 if (b_vp == NULL) {
2275 /* For whatever reason, this component is not valid.
2276 Don't try to read a component label from it. */
2277 return(EINVAL);
2278 }
2279
2280 /* get a block of the appropriate size... */
2281 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2282 bp->b_dev = dev;
2283
2284 /* get our ducks in a row for the read */
2285 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2286 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2287 bp->b_flags |= B_READ;
2288 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2289
2290 bdev = bdevsw_lookup(bp->b_dev);
2291 if (bdev == NULL)
2292 return (ENXIO);
2293 (*bdev->d_strategy)(bp);
2294
2295 error = biowait(bp);
2296
2297 if (!error) {
2298 memcpy(clabel, bp->b_data,
2299 sizeof(RF_ComponentLabel_t));
2300 }
2301
2302 brelse(bp);
2303 return(error);
2304 }
2305 /* ARGSUSED */
2306 int
2307 raidwrite_component_label(dev, b_vp, clabel)
2308 dev_t dev;
2309 struct vnode *b_vp;
2310 RF_ComponentLabel_t *clabel;
2311 {
2312 struct buf *bp;
2313 const struct bdevsw *bdev;
2314 int error;
2315
2316 /* get a block of the appropriate size... */
2317 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2318 bp->b_dev = dev;
2319
2320 /* get our ducks in a row for the write */
2321 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2322 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2323 bp->b_flags |= B_WRITE;
2324 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2325
2326 memset(bp->b_data, 0, RF_COMPONENT_INFO_SIZE );
2327
2328 memcpy(bp->b_data, clabel, sizeof(RF_ComponentLabel_t));
2329
2330 bdev = bdevsw_lookup(bp->b_dev);
2331 if (bdev == NULL)
2332 return (ENXIO);
2333 (*bdev->d_strategy)(bp);
2334 error = biowait(bp);
2335 brelse(bp);
2336 if (error) {
2337 #if 1
2338 printf("Failed to write RAID component info!\n");
2339 #endif
2340 }
2341
2342 return(error);
2343 }
2344
2345 void
2346 rf_markalldirty(raidPtr)
2347 RF_Raid_t *raidPtr;
2348 {
2349 RF_ComponentLabel_t clabel;
2350 int sparecol;
2351 int c;
2352 int j;
2353 int scol = -1;
2354
2355 raidPtr->mod_counter++;
2356 for (c = 0; c < raidPtr->numCol; c++) {
2357 /* we don't want to touch (at all) a disk that has
2358 failed */
2359 if (!RF_DEAD_DISK(raidPtr->Disks[c].status)) {
2360 raidread_component_label(
2361 raidPtr->Disks[c].dev,
2362 raidPtr->raid_cinfo[c].ci_vp,
2363 &clabel);
2364 if (clabel.status == rf_ds_spared) {
2365 /* XXX do something special...
2366 but whatever you do, don't
2367 try to access it!! */
2368 } else {
2369 raidmarkdirty(
2370 raidPtr->Disks[c].dev,
2371 raidPtr->raid_cinfo[c].ci_vp,
2372 raidPtr->mod_counter);
2373 }
2374 }
2375 }
2376
2377 for( c = 0; c < raidPtr->numSpare ; c++) {
2378 sparecol = raidPtr->numCol + c;
2379 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
2380 /*
2381
2382 we claim this disk is "optimal" if it's
2383 rf_ds_used_spare, as that means it should be
2384 directly substitutable for the disk it replaced.
2385 We note that too...
2386
2387 */
2388
2389 for(j=0;j<raidPtr->numCol;j++) {
2390 if (raidPtr->Disks[j].spareCol == sparecol) {
2391 scol = j;
2392 break;
2393 }
2394 }
2395
2396 raidread_component_label(
2397 raidPtr->Disks[sparecol].dev,
2398 raidPtr->raid_cinfo[sparecol].ci_vp,
2399 &clabel);
2400 /* make sure status is noted */
2401
2402 raid_init_component_label(raidPtr, &clabel);
2403
2404 clabel.row = 0;
2405 clabel.column = scol;
2406 /* Note: we *don't* change status from rf_ds_used_spare
2407 to rf_ds_optimal */
2408 /* clabel.status = rf_ds_optimal; */
2409
2410 raidmarkdirty(raidPtr->Disks[sparecol].dev,
2411 raidPtr->raid_cinfo[sparecol].ci_vp,
2412 raidPtr->mod_counter);
2413 }
2414 }
2415 }
2416
2417
2418 void
2419 rf_update_component_labels(raidPtr, final)
2420 RF_Raid_t *raidPtr;
2421 int final;
2422 {
2423 RF_ComponentLabel_t clabel;
2424 int sparecol;
2425 int c;
2426 int j;
2427 int scol;
2428
2429 scol = -1;
2430
2431 /* XXX should do extra checks to make sure things really are clean,
2432 rather than blindly setting the clean bit... */
2433
2434 raidPtr->mod_counter++;
2435
2436 for (c = 0; c < raidPtr->numCol; c++) {
2437 if (raidPtr->Disks[c].status == rf_ds_optimal) {
2438 raidread_component_label(
2439 raidPtr->Disks[c].dev,
2440 raidPtr->raid_cinfo[c].ci_vp,
2441 &clabel);
2442 /* make sure status is noted */
2443 clabel.status = rf_ds_optimal;
2444 /* bump the counter */
2445 clabel.mod_counter = raidPtr->mod_counter;
2446
2447 raidwrite_component_label(
2448 raidPtr->Disks[c].dev,
2449 raidPtr->raid_cinfo[c].ci_vp,
2450 &clabel);
2451 if (final == RF_FINAL_COMPONENT_UPDATE) {
2452 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2453 raidmarkclean(
2454 raidPtr->Disks[c].dev,
2455 raidPtr->raid_cinfo[c].ci_vp,
2456 raidPtr->mod_counter);
2457 }
2458 }
2459 }
2460 /* else we don't touch it.. */
2461 }
2462
2463 for( c = 0; c < raidPtr->numSpare ; c++) {
2464 sparecol = raidPtr->numCol + c;
2465 /* Need to ensure that the reconstruct actually completed! */
2466 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
2467 /*
2468
2469 we claim this disk is "optimal" if it's
2470 rf_ds_used_spare, as that means it should be
2471 directly substitutable for the disk it replaced.
2472 We note that too...
2473
2474 */
2475
2476 for(j=0;j<raidPtr->numCol;j++) {
2477 if (raidPtr->Disks[j].spareCol == sparecol) {
2478 scol = j;
2479 break;
2480 }
2481 }
2482
2483 /* XXX shouldn't *really* need this... */
2484 raidread_component_label(
2485 raidPtr->Disks[sparecol].dev,
2486 raidPtr->raid_cinfo[sparecol].ci_vp,
2487 &clabel);
2488 /* make sure status is noted */
2489
2490 raid_init_component_label(raidPtr, &clabel);
2491
2492 clabel.mod_counter = raidPtr->mod_counter;
2493 clabel.column = scol;
2494 clabel.status = rf_ds_optimal;
2495
2496 raidwrite_component_label(
2497 raidPtr->Disks[sparecol].dev,
2498 raidPtr->raid_cinfo[sparecol].ci_vp,
2499 &clabel);
2500 if (final == RF_FINAL_COMPONENT_UPDATE) {
2501 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2502 raidmarkclean( raidPtr->Disks[sparecol].dev,
2503 raidPtr->raid_cinfo[sparecol].ci_vp,
2504 raidPtr->mod_counter);
2505 }
2506 }
2507 }
2508 }
2509 }
2510
2511 void
2512 rf_close_component(raidPtr, vp, auto_configured)
2513 RF_Raid_t *raidPtr;
2514 struct vnode *vp;
2515 int auto_configured;
2516 {
2517 struct proc *p;
2518
2519 p = raidPtr->engine_thread;
2520
2521 if (vp != NULL) {
2522 if (auto_configured == 1) {
2523 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2524 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
2525 vput(vp);
2526
2527 } else {
2528 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2529 }
2530 }
2531 }
2532
2533
2534 void
2535 rf_UnconfigureVnodes(raidPtr)
2536 RF_Raid_t *raidPtr;
2537 {
2538 int r,c;
2539 struct vnode *vp;
2540 int acd;
2541
2542
2543 /* We take this opportunity to close the vnodes like we should.. */
2544
2545 for (c = 0; c < raidPtr->numCol; c++) {
2546 vp = raidPtr->raid_cinfo[c].ci_vp;
2547 acd = raidPtr->Disks[c].auto_configured;
2548 rf_close_component(raidPtr, vp, acd);
2549 raidPtr->raid_cinfo[c].ci_vp = NULL;
2550 raidPtr->Disks[c].auto_configured = 0;
2551 }
2552
2553 for (r = 0; r < raidPtr->numSpare; r++) {
2554 vp = raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp;
2555 acd = raidPtr->Disks[raidPtr->numCol + r].auto_configured;
2556 rf_close_component(raidPtr, vp, acd);
2557 raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp = NULL;
2558 raidPtr->Disks[raidPtr->numCol + r].auto_configured = 0;
2559 }
2560 }
2561
2562
2563 void
2564 rf_ReconThread(req)
2565 struct rf_recon_req *req;
2566 {
2567 int s;
2568 RF_Raid_t *raidPtr;
2569
2570 s = splbio();
2571 raidPtr = (RF_Raid_t *) req->raidPtr;
2572 raidPtr->recon_in_progress = 1;
2573
2574 rf_FailDisk((RF_Raid_t *) req->raidPtr, req->col,
2575 ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
2576
2577 /* XXX get rid of this! we don't need it at all.. */
2578 RF_Free(req, sizeof(*req));
2579
2580 raidPtr->recon_in_progress = 0;
2581 splx(s);
2582
2583 /* That's all... */
2584 kthread_exit(0); /* does not return */
2585 }
2586
2587 void
2588 rf_RewriteParityThread(raidPtr)
2589 RF_Raid_t *raidPtr;
2590 {
2591 int retcode;
2592 int s;
2593
2594 raidPtr->parity_rewrite_in_progress = 1;
2595 s = splbio();
2596 retcode = rf_RewriteParity(raidPtr);
2597 splx(s);
2598 if (retcode) {
2599 printf("raid%d: Error re-writing parity!\n",raidPtr->raidid);
2600 } else {
2601 /* set the clean bit! If we shutdown correctly,
2602 the clean bit on each component label will get
2603 set */
2604 raidPtr->parity_good = RF_RAID_CLEAN;
2605 }
2606 raidPtr->parity_rewrite_in_progress = 0;
2607
2608 /* Anyone waiting for us to stop? If so, inform them... */
2609 if (raidPtr->waitShutdown) {
2610 wakeup(&raidPtr->parity_rewrite_in_progress);
2611 }
2612
2613 /* That's all... */
2614 kthread_exit(0); /* does not return */
2615 }
2616
2617
2618 void
2619 rf_CopybackThread(raidPtr)
2620 RF_Raid_t *raidPtr;
2621 {
2622 int s;
2623
2624 raidPtr->copyback_in_progress = 1;
2625 s = splbio();
2626 rf_CopybackReconstructedData(raidPtr);
2627 splx(s);
2628 raidPtr->copyback_in_progress = 0;
2629
2630 /* That's all... */
2631 kthread_exit(0); /* does not return */
2632 }
2633
2634
2635 void
2636 rf_ReconstructInPlaceThread(req)
2637 struct rf_recon_req *req;
2638 {
2639 int s;
2640 RF_Raid_t *raidPtr;
2641
2642 s = splbio();
2643 raidPtr = req->raidPtr;
2644 raidPtr->recon_in_progress = 1;
2645 rf_ReconstructInPlace(raidPtr, req->col);
2646 RF_Free(req, sizeof(*req));
2647 raidPtr->recon_in_progress = 0;
2648 splx(s);
2649
2650 /* That's all... */
2651 kthread_exit(0); /* does not return */
2652 }
2653
2654 RF_AutoConfig_t *
2655 rf_find_raid_components()
2656 {
2657 struct vnode *vp;
2658 struct disklabel label;
2659 struct device *dv;
2660 dev_t dev;
2661 int bmajor;
2662 int error;
2663 int i;
2664 int good_one;
2665 RF_ComponentLabel_t *clabel;
2666 RF_AutoConfig_t *ac_list;
2667 RF_AutoConfig_t *ac;
2668
2669
2670 /* initialize the AutoConfig list */
2671 ac_list = NULL;
2672
2673 /* we begin by trolling through *all* the devices on the system */
2674
2675 for (dv = alldevs.tqh_first; dv != NULL;
2676 dv = dv->dv_list.tqe_next) {
2677
2678 /* we are only interested in disks... */
2679 if (dv->dv_class != DV_DISK)
2680 continue;
2681
2682 /* we don't care about floppies... */
2683 if (!strcmp(dv->dv_cfdata->cf_name,"fd")) {
2684 continue;
2685 }
2686
2687 /* we don't care about CD's... */
2688 if (!strcmp(dv->dv_cfdata->cf_name,"cd")) {
2689 continue;
2690 }
2691
2692 /* hdfd is the Atari/Hades floppy driver */
2693 if (!strcmp(dv->dv_cfdata->cf_name,"hdfd")) {
2694 continue;
2695 }
2696 /* fdisa is the Atari/Milan floppy driver */
2697 if (!strcmp(dv->dv_cfdata->cf_name,"fdisa")) {
2698 continue;
2699 }
2700
2701 /* need to find the device_name_to_block_device_major stuff */
2702 bmajor = devsw_name2blk(dv->dv_xname, NULL, 0);
2703
2704 /* get a vnode for the raw partition of this disk */
2705
2706 dev = MAKEDISKDEV(bmajor, dv->dv_unit, RAW_PART);
2707 if (bdevvp(dev, &vp))
2708 panic("RAID can't alloc vnode");
2709
2710 error = VOP_OPEN(vp, FREAD, NOCRED, 0);
2711
2712 if (error) {
2713 /* "Who cares." Continue looking
2714 for something that exists*/
2715 vput(vp);
2716 continue;
2717 }
2718
2719 /* Ok, the disk exists. Go get the disklabel. */
2720 error = VOP_IOCTL(vp, DIOCGDINFO, &label, FREAD, NOCRED, 0);
2721 if (error) {
2722 /*
2723 * XXX can't happen - open() would
2724 * have errored out (or faked up one)
2725 */
2726 printf("can't get label for dev %s%c (%d)!?!?\n",
2727 dv->dv_xname, 'a' + RAW_PART, error);
2728 }
2729
2730 /* don't need this any more. We'll allocate it again
2731 a little later if we really do... */
2732 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2733 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
2734 vput(vp);
2735
2736 for (i=0; i < label.d_npartitions; i++) {
2737 /* We only support partitions marked as RAID */
2738 if (label.d_partitions[i].p_fstype != FS_RAID)
2739 continue;
2740
2741 dev = MAKEDISKDEV(bmajor, dv->dv_unit, i);
2742 if (bdevvp(dev, &vp))
2743 panic("RAID can't alloc vnode");
2744
2745 error = VOP_OPEN(vp, FREAD, NOCRED, 0);
2746 if (error) {
2747 /* Whatever... */
2748 vput(vp);
2749 continue;
2750 }
2751
2752 good_one = 0;
2753
2754 clabel = (RF_ComponentLabel_t *)
2755 malloc(sizeof(RF_ComponentLabel_t),
2756 M_RAIDFRAME, M_NOWAIT);
2757 if (clabel == NULL) {
2758 /* XXX CLEANUP HERE */
2759 printf("RAID auto config: out of memory!\n");
2760 return(NULL); /* XXX probably should panic? */
2761 }
2762
2763 if (!raidread_component_label(dev, vp, clabel)) {
2764 /* Got the label. Does it look reasonable? */
2765 if (rf_reasonable_label(clabel) &&
2766 (clabel->partitionSize <=
2767 label.d_partitions[i].p_size)) {
2768 #if DEBUG
2769 printf("Component on: %s%c: %d\n",
2770 dv->dv_xname, 'a'+i,
2771 label.d_partitions[i].p_size);
2772 rf_print_component_label(clabel);
2773 #endif
2774 /* if it's reasonable, add it,
2775 else ignore it. */
2776 ac = (RF_AutoConfig_t *)
2777 malloc(sizeof(RF_AutoConfig_t),
2778 M_RAIDFRAME,
2779 M_NOWAIT);
2780 if (ac == NULL) {
2781 /* XXX should panic?? */
2782 return(NULL);
2783 }
2784
2785 sprintf(ac->devname, "%s%c",
2786 dv->dv_xname, 'a'+i);
2787 ac->dev = dev;
2788 ac->vp = vp;
2789 ac->clabel = clabel;
2790 ac->next = ac_list;
2791 ac_list = ac;
2792 good_one = 1;
2793 }
2794 }
2795 if (!good_one) {
2796 /* cleanup */
2797 free(clabel, M_RAIDFRAME);
2798 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2799 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
2800 vput(vp);
2801 }
2802 }
2803 }
2804 return(ac_list);
2805 }
2806
2807 static int
2808 rf_reasonable_label(clabel)
2809 RF_ComponentLabel_t *clabel;
2810 {
2811
2812 if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) ||
2813 (clabel->version==RF_COMPONENT_LABEL_VERSION)) &&
2814 ((clabel->clean == RF_RAID_CLEAN) ||
2815 (clabel->clean == RF_RAID_DIRTY)) &&
2816 clabel->row >=0 &&
2817 clabel->column >= 0 &&
2818 clabel->num_rows > 0 &&
2819 clabel->num_columns > 0 &&
2820 clabel->row < clabel->num_rows &&
2821 clabel->column < clabel->num_columns &&
2822 clabel->blockSize > 0 &&
2823 clabel->numBlocks > 0) {
2824 /* label looks reasonable enough... */
2825 return(1);
2826 }
2827 return(0);
2828 }
2829
2830
2831 #if DEBUG
2832 void
2833 rf_print_component_label(clabel)
2834 RF_ComponentLabel_t *clabel;
2835 {
2836 printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
2837 clabel->row, clabel->column,
2838 clabel->num_rows, clabel->num_columns);
2839 printf(" Version: %d Serial Number: %d Mod Counter: %d\n",
2840 clabel->version, clabel->serial_number,
2841 clabel->mod_counter);
2842 printf(" Clean: %s Status: %d\n",
2843 clabel->clean ? "Yes" : "No", clabel->status );
2844 printf(" sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
2845 clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
2846 printf(" RAID Level: %c blocksize: %d numBlocks: %d\n",
2847 (char) clabel->parityConfig, clabel->blockSize,
2848 clabel->numBlocks);
2849 printf(" Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No" );
2850 printf(" Contains root partition: %s\n",
2851 clabel->root_partition ? "Yes" : "No" );
2852 printf(" Last configured as: raid%d\n", clabel->last_unit );
2853 #if 0
2854 printf(" Config order: %d\n", clabel->config_order);
2855 #endif
2856
2857 }
2858 #endif
2859
2860 RF_ConfigSet_t *
2861 rf_create_auto_sets(ac_list)
2862 RF_AutoConfig_t *ac_list;
2863 {
2864 RF_AutoConfig_t *ac;
2865 RF_ConfigSet_t *config_sets;
2866 RF_ConfigSet_t *cset;
2867 RF_AutoConfig_t *ac_next;
2868
2869
2870 config_sets = NULL;
2871
2872 /* Go through the AutoConfig list, and figure out which components
2873 belong to what sets. */
2874 ac = ac_list;
2875 while(ac!=NULL) {
2876 /* we're going to putz with ac->next, so save it here
2877 for use at the end of the loop */
2878 ac_next = ac->next;
2879
2880 if (config_sets == NULL) {
2881 /* will need at least this one... */
2882 config_sets = (RF_ConfigSet_t *)
2883 malloc(sizeof(RF_ConfigSet_t),
2884 M_RAIDFRAME, M_NOWAIT);
2885 if (config_sets == NULL) {
2886 panic("rf_create_auto_sets: No memory!");
2887 }
2888 /* this one is easy :) */
2889 config_sets->ac = ac;
2890 config_sets->next = NULL;
2891 config_sets->rootable = 0;
2892 ac->next = NULL;
2893 } else {
2894 /* which set does this component fit into? */
2895 cset = config_sets;
2896 while(cset!=NULL) {
2897 if (rf_does_it_fit(cset, ac)) {
2898 /* looks like it matches... */
2899 ac->next = cset->ac;
2900 cset->ac = ac;
2901 break;
2902 }
2903 cset = cset->next;
2904 }
2905 if (cset==NULL) {
2906 /* didn't find a match above... new set..*/
2907 cset = (RF_ConfigSet_t *)
2908 malloc(sizeof(RF_ConfigSet_t),
2909 M_RAIDFRAME, M_NOWAIT);
2910 if (cset == NULL) {
2911 panic("rf_create_auto_sets: No memory!");
2912 }
2913 cset->ac = ac;
2914 ac->next = NULL;
2915 cset->next = config_sets;
2916 cset->rootable = 0;
2917 config_sets = cset;
2918 }
2919 }
2920 ac = ac_next;
2921 }
2922
2923
2924 return(config_sets);
2925 }
2926
2927 static int
2928 rf_does_it_fit(cset, ac)
2929 RF_ConfigSet_t *cset;
2930 RF_AutoConfig_t *ac;
2931 {
2932 RF_ComponentLabel_t *clabel1, *clabel2;
2933
2934 /* If this one matches the *first* one in the set, that's good
2935 enough, since the other members of the set would have been
2936 through here too... */
2937 /* note that we are not checking partitionSize here..
2938
2939 Note that we are also not checking the mod_counters here.
2940 If everything else matches execpt the mod_counter, that's
2941 good enough for this test. We will deal with the mod_counters
2942 a little later in the autoconfiguration process.
2943
2944 (clabel1->mod_counter == clabel2->mod_counter) &&
2945
2946 The reason we don't check for this is that failed disks
2947 will have lower modification counts. If those disks are
2948 not added to the set they used to belong to, then they will
2949 form their own set, which may result in 2 different sets,
2950 for example, competing to be configured at raid0, and
2951 perhaps competing to be the root filesystem set. If the
2952 wrong ones get configured, or both attempt to become /,
2953 weird behaviour and or serious lossage will occur. Thus we
2954 need to bring them into the fold here, and kick them out at
2955 a later point.
2956
2957 */
2958
2959 clabel1 = cset->ac->clabel;
2960 clabel2 = ac->clabel;
2961 if ((clabel1->version == clabel2->version) &&
2962 (clabel1->serial_number == clabel2->serial_number) &&
2963 (clabel1->num_rows == clabel2->num_rows) &&
2964 (clabel1->num_columns == clabel2->num_columns) &&
2965 (clabel1->sectPerSU == clabel2->sectPerSU) &&
2966 (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
2967 (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
2968 (clabel1->parityConfig == clabel2->parityConfig) &&
2969 (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
2970 (clabel1->blockSize == clabel2->blockSize) &&
2971 (clabel1->numBlocks == clabel2->numBlocks) &&
2972 (clabel1->autoconfigure == clabel2->autoconfigure) &&
2973 (clabel1->root_partition == clabel2->root_partition) &&
2974 (clabel1->last_unit == clabel2->last_unit) &&
2975 (clabel1->config_order == clabel2->config_order)) {
2976 /* if it get's here, it almost *has* to be a match */
2977 } else {
2978 /* it's not consistent with somebody in the set..
2979 punt */
2980 return(0);
2981 }
2982 /* all was fine.. it must fit... */
2983 return(1);
2984 }
2985
2986 int
2987 rf_have_enough_components(cset)
2988 RF_ConfigSet_t *cset;
2989 {
2990 RF_AutoConfig_t *ac;
2991 RF_AutoConfig_t *auto_config;
2992 RF_ComponentLabel_t *clabel;
2993 int c;
2994 int num_cols;
2995 int num_missing;
2996 int mod_counter;
2997 int mod_counter_found;
2998 int even_pair_failed;
2999 char parity_type;
3000
3001
3002 /* check to see that we have enough 'live' components
3003 of this set. If so, we can configure it if necessary */
3004
3005 num_cols = cset->ac->clabel->num_columns;
3006 parity_type = cset->ac->clabel->parityConfig;
3007
3008 /* XXX Check for duplicate components!?!?!? */
3009
3010 /* Determine what the mod_counter is supposed to be for this set. */
3011
3012 mod_counter_found = 0;
3013 mod_counter = 0;
3014 ac = cset->ac;
3015 while(ac!=NULL) {
3016 if (mod_counter_found==0) {
3017 mod_counter = ac->clabel->mod_counter;
3018 mod_counter_found = 1;
3019 } else {
3020 if (ac->clabel->mod_counter > mod_counter) {
3021 mod_counter = ac->clabel->mod_counter;
3022 }
3023 }
3024 ac = ac->next;
3025 }
3026
3027 num_missing = 0;
3028 auto_config = cset->ac;
3029
3030 even_pair_failed = 0;
3031 for(c=0; c<num_cols; c++) {
3032 ac = auto_config;
3033 while(ac!=NULL) {
3034 if ((ac->clabel->column == c) &&
3035 (ac->clabel->mod_counter == mod_counter)) {
3036 /* it's this one... */
3037 #if DEBUG
3038 printf("Found: %s at %d\n",
3039 ac->devname,c);
3040 #endif
3041 break;
3042 }
3043 ac=ac->next;
3044 }
3045 if (ac==NULL) {
3046 /* Didn't find one here! */
3047 /* special case for RAID 1, especially
3048 where there are more than 2
3049 components (where RAIDframe treats
3050 things a little differently :( ) */
3051 if (parity_type == '1') {
3052 if (c%2 == 0) { /* even component */
3053 even_pair_failed = 1;
3054 } else { /* odd component. If
3055 we're failed, and
3056 so is the even
3057 component, it's
3058 "Good Night, Charlie" */
3059 if (even_pair_failed == 1) {
3060 return(0);
3061 }
3062 }
3063 } else {
3064 /* normal accounting */
3065 num_missing++;
3066 }
3067 }
3068 if ((parity_type == '1') && (c%2 == 1)) {
3069 /* Just did an even component, and we didn't
3070 bail.. reset the even_pair_failed flag,
3071 and go on to the next component.... */
3072 even_pair_failed = 0;
3073 }
3074 }
3075
3076 clabel = cset->ac->clabel;
3077
3078 if (((clabel->parityConfig == '0') && (num_missing > 0)) ||
3079 ((clabel->parityConfig == '4') && (num_missing > 1)) ||
3080 ((clabel->parityConfig == '5') && (num_missing > 1))) {
3081 /* XXX this needs to be made *much* more general */
3082 /* Too many failures */
3083 return(0);
3084 }
3085 /* otherwise, all is well, and we've got enough to take a kick
3086 at autoconfiguring this set */
3087 return(1);
3088 }
3089
3090 void
3091 rf_create_configuration(ac,config,raidPtr)
3092 RF_AutoConfig_t *ac;
3093 RF_Config_t *config;
3094 RF_Raid_t *raidPtr;
3095 {
3096 RF_ComponentLabel_t *clabel;
3097 int i;
3098
3099 clabel = ac->clabel;
3100
3101 /* 1. Fill in the common stuff */
3102 config->numRow = clabel->num_rows = 1;
3103 config->numCol = clabel->num_columns;
3104 config->numSpare = 0; /* XXX should this be set here? */
3105 config->sectPerSU = clabel->sectPerSU;
3106 config->SUsPerPU = clabel->SUsPerPU;
3107 config->SUsPerRU = clabel->SUsPerRU;
3108 config->parityConfig = clabel->parityConfig;
3109 /* XXX... */
3110 strcpy(config->diskQueueType,"fifo");
3111 config->maxOutstandingDiskReqs = clabel->maxOutstanding;
3112 config->layoutSpecificSize = 0; /* XXX ?? */
3113
3114 while(ac!=NULL) {
3115 /* row/col values will be in range due to the checks
3116 in reasonable_label() */
3117 strcpy(config->devnames[0][ac->clabel->column],
3118 ac->devname);
3119 ac = ac->next;
3120 }
3121
3122 for(i=0;i<RF_MAXDBGV;i++) {
3123 config->debugVars[i][0] = 0;
3124 }
3125 }
3126
3127 int
3128 rf_set_autoconfig(raidPtr, new_value)
3129 RF_Raid_t *raidPtr;
3130 int new_value;
3131 {
3132 RF_ComponentLabel_t clabel;
3133 struct vnode *vp;
3134 dev_t dev;
3135 int column;
3136 int sparecol;
3137
3138 raidPtr->autoconfigure = new_value;
3139
3140 for(column=0; column<raidPtr->numCol; column++) {
3141 if (raidPtr->Disks[column].status == rf_ds_optimal) {
3142 dev = raidPtr->Disks[column].dev;
3143 vp = raidPtr->raid_cinfo[column].ci_vp;
3144 raidread_component_label(dev, vp, &clabel);
3145 clabel.autoconfigure = new_value;
3146 raidwrite_component_label(dev, vp, &clabel);
3147 }
3148 }
3149 for(column = 0; column < raidPtr->numSpare ; column++) {
3150 sparecol = raidPtr->numCol + column;
3151 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3152 dev = raidPtr->Disks[sparecol].dev;
3153 vp = raidPtr->raid_cinfo[sparecol].ci_vp;
3154 raidread_component_label(dev, vp, &clabel);
3155 clabel.autoconfigure = new_value;
3156 raidwrite_component_label(dev, vp, &clabel);
3157 }
3158 }
3159 return(new_value);
3160 }
3161
3162 int
3163 rf_set_rootpartition(raidPtr, new_value)
3164 RF_Raid_t *raidPtr;
3165 int new_value;
3166 {
3167 RF_ComponentLabel_t clabel;
3168 struct vnode *vp;
3169 dev_t dev;
3170 int column;
3171 int sparecol;
3172
3173 raidPtr->root_partition = new_value;
3174 for(column=0; column<raidPtr->numCol; column++) {
3175 if (raidPtr->Disks[column].status == rf_ds_optimal) {
3176 dev = raidPtr->Disks[column].dev;
3177 vp = raidPtr->raid_cinfo[column].ci_vp;
3178 raidread_component_label(dev, vp, &clabel);
3179 clabel.root_partition = new_value;
3180 raidwrite_component_label(dev, vp, &clabel);
3181 }
3182 }
3183 for(column = 0; column < raidPtr->numSpare ; column++) {
3184 sparecol = raidPtr->numCol + column;
3185 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3186 dev = raidPtr->Disks[sparecol].dev;
3187 vp = raidPtr->raid_cinfo[sparecol].ci_vp;
3188 raidread_component_label(dev, vp, &clabel);
3189 clabel.root_partition = new_value;
3190 raidwrite_component_label(dev, vp, &clabel);
3191 }
3192 }
3193 return(new_value);
3194 }
3195
3196 void
3197 rf_release_all_vps(cset)
3198 RF_ConfigSet_t *cset;
3199 {
3200 RF_AutoConfig_t *ac;
3201
3202 ac = cset->ac;
3203 while(ac!=NULL) {
3204 /* Close the vp, and give it back */
3205 if (ac->vp) {
3206 vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY);
3207 VOP_CLOSE(ac->vp, FREAD, NOCRED, 0);
3208 vput(ac->vp);
3209 ac->vp = NULL;
3210 }
3211 ac = ac->next;
3212 }
3213 }
3214
3215
3216 void
3217 rf_cleanup_config_set(cset)
3218 RF_ConfigSet_t *cset;
3219 {
3220 RF_AutoConfig_t *ac;
3221 RF_AutoConfig_t *next_ac;
3222
3223 ac = cset->ac;
3224 while(ac!=NULL) {
3225 next_ac = ac->next;
3226 /* nuke the label */
3227 free(ac->clabel, M_RAIDFRAME);
3228 /* cleanup the config structure */
3229 free(ac, M_RAIDFRAME);
3230 /* "next.." */
3231 ac = next_ac;
3232 }
3233 /* and, finally, nuke the config set */
3234 free(cset, M_RAIDFRAME);
3235 }
3236
3237
3238 void
3239 raid_init_component_label(raidPtr, clabel)
3240 RF_Raid_t *raidPtr;
3241 RF_ComponentLabel_t *clabel;
3242 {
3243 /* current version number */
3244 clabel->version = RF_COMPONENT_LABEL_VERSION;
3245 clabel->serial_number = raidPtr->serial_number;
3246 clabel->mod_counter = raidPtr->mod_counter;
3247 clabel->num_rows = 1;
3248 clabel->num_columns = raidPtr->numCol;
3249 clabel->clean = RF_RAID_DIRTY; /* not clean */
3250 clabel->status = rf_ds_optimal; /* "It's good!" */
3251
3252 clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
3253 clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
3254 clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
3255
3256 clabel->blockSize = raidPtr->bytesPerSector;
3257 clabel->numBlocks = raidPtr->sectorsPerDisk;
3258
3259 /* XXX not portable */
3260 clabel->parityConfig = raidPtr->Layout.map->parityConfig;
3261 clabel->maxOutstanding = raidPtr->maxOutstanding;
3262 clabel->autoconfigure = raidPtr->autoconfigure;
3263 clabel->root_partition = raidPtr->root_partition;
3264 clabel->last_unit = raidPtr->raidid;
3265 clabel->config_order = raidPtr->config_order;
3266 }
3267
3268 int
3269 rf_auto_config_set(cset,unit)
3270 RF_ConfigSet_t *cset;
3271 int *unit;
3272 {
3273 RF_Raid_t *raidPtr;
3274 RF_Config_t *config;
3275 int raidID;
3276 int retcode;
3277
3278 #if DEBUG
3279 printf("RAID autoconfigure\n");
3280 #endif
3281
3282 retcode = 0;
3283 *unit = -1;
3284
3285 /* 1. Create a config structure */
3286
3287 config = (RF_Config_t *)malloc(sizeof(RF_Config_t),
3288 M_RAIDFRAME,
3289 M_NOWAIT);
3290 if (config==NULL) {
3291 printf("Out of mem!?!?\n");
3292 /* XXX do something more intelligent here. */
3293 return(1);
3294 }
3295
3296 memset(config, 0, sizeof(RF_Config_t));
3297
3298 /*
3299 2. Figure out what RAID ID this one is supposed to live at
3300 See if we can get the same RAID dev that it was configured
3301 on last time..
3302 */
3303
3304 raidID = cset->ac->clabel->last_unit;
3305 if ((raidID < 0) || (raidID >= numraid)) {
3306 /* let's not wander off into lala land. */
3307 raidID = numraid - 1;
3308 }
3309 if (raidPtrs[raidID]->valid != 0) {
3310
3311 /*
3312 Nope... Go looking for an alternative...
3313 Start high so we don't immediately use raid0 if that's
3314 not taken.
3315 */
3316
3317 for(raidID = numraid - 1; raidID >= 0; raidID--) {
3318 if (raidPtrs[raidID]->valid == 0) {
3319 /* can use this one! */
3320 break;
3321 }
3322 }
3323 }
3324
3325 if (raidID < 0) {
3326 /* punt... */
3327 printf("Unable to auto configure this set!\n");
3328 printf("(Out of RAID devs!)\n");
3329 return(1);
3330 }
3331
3332 #if DEBUG
3333 printf("Configuring raid%d:\n",raidID);
3334 #endif
3335
3336 raidPtr = raidPtrs[raidID];
3337
3338 /* XXX all this stuff should be done SOMEWHERE ELSE! */
3339 raidPtr->raidid = raidID;
3340 raidPtr->openings = RAIDOUTSTANDING;
3341
3342 /* 3. Build the configuration structure */
3343 rf_create_configuration(cset->ac, config, raidPtr);
3344
3345 /* 4. Do the configuration */
3346 retcode = rf_Configure(raidPtr, config, cset->ac);
3347
3348 if (retcode == 0) {
3349
3350 raidinit(raidPtrs[raidID]);
3351
3352 rf_markalldirty(raidPtrs[raidID]);
3353 raidPtrs[raidID]->autoconfigure = 1; /* XXX do this here? */
3354 if (cset->ac->clabel->root_partition==1) {
3355 /* everything configured just fine. Make a note
3356 that this set is eligible to be root. */
3357 cset->rootable = 1;
3358 /* XXX do this here? */
3359 raidPtrs[raidID]->root_partition = 1;
3360 }
3361 }
3362
3363 /* 5. Cleanup */
3364 free(config, M_RAIDFRAME);
3365
3366 *unit = raidID;
3367 return(retcode);
3368 }
3369
3370 void
3371 rf_disk_unbusy(desc)
3372 RF_RaidAccessDesc_t *desc;
3373 {
3374 struct buf *bp;
3375
3376 bp = (struct buf *)desc->bp;
3377 disk_unbusy(&raid_softc[desc->raidPtr->raidid].sc_dkdev,
3378 (bp->b_bcount - bp->b_resid), (bp->b_flags & B_READ));
3379 }
3380