rf_netbsdkintf.c revision 1.169 1 /* $NetBSD: rf_netbsdkintf.c,v 1.169 2003/12/30 21:59:03 oster Exp $ */
2 /*-
3 * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
4 * All rights reserved.
5 *
6 * This code is derived from software contributed to The NetBSD Foundation
7 * by Greg Oster; Jason R. Thorpe.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed by the NetBSD
20 * Foundation, Inc. and its contributors.
21 * 4. Neither the name of The NetBSD Foundation nor the names of its
22 * contributors may be used to endorse or promote products derived
23 * from this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
26 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
36 */
37
38 /*
39 * Copyright (c) 1990, 1993
40 * The Regents of the University of California. All rights reserved.
41 *
42 * This code is derived from software contributed to Berkeley by
43 * the Systems Programming Group of the University of Utah Computer
44 * Science Department.
45 *
46 * Redistribution and use in source and binary forms, with or without
47 * modification, are permitted provided that the following conditions
48 * are met:
49 * 1. Redistributions of source code must retain the above copyright
50 * notice, this list of conditions and the following disclaimer.
51 * 2. Redistributions in binary form must reproduce the above copyright
52 * notice, this list of conditions and the following disclaimer in the
53 * documentation and/or other materials provided with the distribution.
54 * 3. Neither the name of the University nor the names of its contributors
55 * may be used to endorse or promote products derived from this software
56 * without specific prior written permission.
57 *
58 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
59 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
60 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
61 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
62 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
63 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
64 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
65 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
66 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
67 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
68 * SUCH DAMAGE.
69 *
70 * from: Utah $Hdr: cd.c 1.6 90/11/28$
71 *
72 * @(#)cd.c 8.2 (Berkeley) 11/16/93
73 */
74
75 /*
76 * Copyright (c) 1988 University of Utah.
77 *
78 * This code is derived from software contributed to Berkeley by
79 * the Systems Programming Group of the University of Utah Computer
80 * Science Department.
81 *
82 * Redistribution and use in source and binary forms, with or without
83 * modification, are permitted provided that the following conditions
84 * are met:
85 * 1. Redistributions of source code must retain the above copyright
86 * notice, this list of conditions and the following disclaimer.
87 * 2. Redistributions in binary form must reproduce the above copyright
88 * notice, this list of conditions and the following disclaimer in the
89 * documentation and/or other materials provided with the distribution.
90 * 3. All advertising materials mentioning features or use of this software
91 * must display the following acknowledgement:
92 * This product includes software developed by the University of
93 * California, Berkeley and its contributors.
94 * 4. Neither the name of the University nor the names of its contributors
95 * may be used to endorse or promote products derived from this software
96 * without specific prior written permission.
97 *
98 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
99 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
100 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
101 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
102 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
103 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
104 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
105 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
106 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
107 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
108 * SUCH DAMAGE.
109 *
110 * from: Utah $Hdr: cd.c 1.6 90/11/28$
111 *
112 * @(#)cd.c 8.2 (Berkeley) 11/16/93
113 */
114
115 /*
116 * Copyright (c) 1995 Carnegie-Mellon University.
117 * All rights reserved.
118 *
119 * Authors: Mark Holland, Jim Zelenka
120 *
121 * Permission to use, copy, modify and distribute this software and
122 * its documentation is hereby granted, provided that both the copyright
123 * notice and this permission notice appear in all copies of the
124 * software, derivative works or modified versions, and any portions
125 * thereof, and that both notices appear in supporting documentation.
126 *
127 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
128 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
129 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
130 *
131 * Carnegie Mellon requests users of this software to return to
132 *
133 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
134 * School of Computer Science
135 * Carnegie Mellon University
136 * Pittsburgh PA 15213-3890
137 *
138 * any improvements or extensions that they make and grant Carnegie the
139 * rights to redistribute these changes.
140 */
141
142 /***********************************************************
143 *
144 * rf_kintf.c -- the kernel interface routines for RAIDframe
145 *
146 ***********************************************************/
147
148 #include <sys/cdefs.h>
149 __KERNEL_RCSID(0, "$NetBSD: rf_netbsdkintf.c,v 1.169 2003/12/30 21:59:03 oster Exp $");
150
151 #include <sys/param.h>
152 #include <sys/errno.h>
153 #include <sys/pool.h>
154 #include <sys/proc.h>
155 #include <sys/queue.h>
156 #include <sys/disk.h>
157 #include <sys/device.h>
158 #include <sys/stat.h>
159 #include <sys/ioctl.h>
160 #include <sys/fcntl.h>
161 #include <sys/systm.h>
162 #include <sys/namei.h>
163 #include <sys/vnode.h>
164 #include <sys/disklabel.h>
165 #include <sys/conf.h>
166 #include <sys/lock.h>
167 #include <sys/buf.h>
168 #include <sys/user.h>
169 #include <sys/reboot.h>
170
171 #include <dev/raidframe/raidframevar.h>
172 #include <dev/raidframe/raidframeio.h>
173 #include "raid.h"
174 #include "opt_raid_autoconfig.h"
175 #include "rf_raid.h"
176 #include "rf_copyback.h"
177 #include "rf_dag.h"
178 #include "rf_dagflags.h"
179 #include "rf_desc.h"
180 #include "rf_diskqueue.h"
181 #include "rf_etimer.h"
182 #include "rf_general.h"
183 #include "rf_kintf.h"
184 #include "rf_options.h"
185 #include "rf_driver.h"
186 #include "rf_parityscan.h"
187 #include "rf_threadstuff.h"
188
189 #ifdef DEBUG
190 int rf_kdebug_level = 0;
191 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
192 #else /* DEBUG */
193 #define db1_printf(a) { }
194 #endif /* DEBUG */
195
196 static RF_Raid_t **raidPtrs; /* global raid device descriptors */
197
198 RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex)
199
200 static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a
201 * spare table */
202 static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from
203 * installation process */
204
205 MALLOC_DEFINE(M_RAIDFRAME, "RAIDframe", "RAIDframe structures");
206
207 /* prototypes */
208 static void KernelWakeupFunc(struct buf * bp);
209 static void InitBP(struct buf * bp, struct vnode *, unsigned rw_flag,
210 dev_t dev, RF_SectorNum_t startSect,
211 RF_SectorCount_t numSect, caddr_t buf,
212 void (*cbFunc) (struct buf *), void *cbArg,
213 int logBytesPerSector, struct proc * b_proc);
214 static void raidinit(RF_Raid_t *);
215
216 void raidattach(int);
217
218 dev_type_open(raidopen);
219 dev_type_close(raidclose);
220 dev_type_read(raidread);
221 dev_type_write(raidwrite);
222 dev_type_ioctl(raidioctl);
223 dev_type_strategy(raidstrategy);
224 dev_type_dump(raiddump);
225 dev_type_size(raidsize);
226
227 const struct bdevsw raid_bdevsw = {
228 raidopen, raidclose, raidstrategy, raidioctl,
229 raiddump, raidsize, D_DISK
230 };
231
232 const struct cdevsw raid_cdevsw = {
233 raidopen, raidclose, raidread, raidwrite, raidioctl,
234 nostop, notty, nopoll, nommap, nokqfilter, D_DISK
235 };
236
237 /*
238 * Pilfered from ccd.c
239 */
240
241 struct raidbuf {
242 struct buf rf_buf; /* new I/O buf. MUST BE FIRST!!! */
243 struct buf *rf_obp; /* ptr. to original I/O buf */
244 RF_DiskQueueData_t *req;/* the request that this was part of.. */
245 };
246
247 /* component buffer pool */
248 struct pool raidframe_cbufpool;
249
250 /* XXX Not sure if the following should be replacing the raidPtrs above,
251 or if it should be used in conjunction with that...
252 */
253
254 struct raid_softc {
255 int sc_flags; /* flags */
256 int sc_cflags; /* configuration flags */
257 size_t sc_size; /* size of the raid device */
258 char sc_xname[20]; /* XXX external name */
259 struct disk sc_dkdev; /* generic disk device info */
260 struct bufq_state buf_queue; /* used for the device queue */
261 };
262 /* sc_flags */
263 #define RAIDF_INITED 0x01 /* unit has been initialized */
264 #define RAIDF_WLABEL 0x02 /* label area is writable */
265 #define RAIDF_LABELLING 0x04 /* unit is currently being labelled */
266 #define RAIDF_WANTED 0x40 /* someone is waiting to obtain a lock */
267 #define RAIDF_LOCKED 0x80 /* unit is locked */
268
269 #define raidunit(x) DISKUNIT(x)
270 int numraid = 0;
271
272 /*
273 * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
274 * Be aware that large numbers can allow the driver to consume a lot of
275 * kernel memory, especially on writes, and in degraded mode reads.
276 *
277 * For example: with a stripe width of 64 blocks (32k) and 5 disks,
278 * a single 64K write will typically require 64K for the old data,
279 * 64K for the old parity, and 64K for the new parity, for a total
280 * of 192K (if the parity buffer is not re-used immediately).
281 * Even it if is used immediately, that's still 128K, which when multiplied
282 * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
283 *
284 * Now in degraded mode, for example, a 64K read on the above setup may
285 * require data reconstruction, which will require *all* of the 4 remaining
286 * disks to participate -- 4 * 32K/disk == 128K again.
287 */
288
289 #ifndef RAIDOUTSTANDING
290 #define RAIDOUTSTANDING 6
291 #endif
292
293 #define RAIDLABELDEV(dev) \
294 (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
295
296 /* declared here, and made public, for the benefit of KVM stuff.. */
297 struct raid_softc *raid_softc;
298
299 static void raidgetdefaultlabel(RF_Raid_t *, struct raid_softc *,
300 struct disklabel *);
301 static void raidgetdisklabel(dev_t);
302 static void raidmakedisklabel(struct raid_softc *);
303
304 static int raidlock(struct raid_softc *);
305 static void raidunlock(struct raid_softc *);
306
307 static void rf_markalldirty(RF_Raid_t *);
308
309 struct device *raidrootdev;
310
311 void rf_ReconThread(struct rf_recon_req *);
312 /* XXX what I want is: */
313 /*void rf_ReconThread(RF_Raid_t *raidPtr); */
314 void rf_RewriteParityThread(RF_Raid_t *raidPtr);
315 void rf_CopybackThread(RF_Raid_t *raidPtr);
316 void rf_ReconstructInPlaceThread(struct rf_recon_req *);
317 int rf_autoconfig(struct device *self);
318 void rf_buildroothack(RF_ConfigSet_t *);
319
320 RF_AutoConfig_t *rf_find_raid_components(void);
321 RF_ConfigSet_t *rf_create_auto_sets(RF_AutoConfig_t *);
322 static int rf_does_it_fit(RF_ConfigSet_t *,RF_AutoConfig_t *);
323 static int rf_reasonable_label(RF_ComponentLabel_t *);
324 void rf_create_configuration(RF_AutoConfig_t *,RF_Config_t *, RF_Raid_t *);
325 int rf_set_autoconfig(RF_Raid_t *, int);
326 int rf_set_rootpartition(RF_Raid_t *, int);
327 void rf_release_all_vps(RF_ConfigSet_t *);
328 void rf_cleanup_config_set(RF_ConfigSet_t *);
329 int rf_have_enough_components(RF_ConfigSet_t *);
330 int rf_auto_config_set(RF_ConfigSet_t *, int *);
331
332 static int raidautoconfig = 0; /* Debugging, mostly. Set to 0 to not
333 allow autoconfig to take place.
334 Note that this is overridden by having
335 RAID_AUTOCONFIG as an option in the
336 kernel config file. */
337
338 void
339 raidattach(int num)
340 {
341 int raidID;
342 int i, rc;
343
344 #ifdef DEBUG
345 printf("raidattach: Asked for %d units\n", num);
346 #endif
347
348 if (num <= 0) {
349 #ifdef DIAGNOSTIC
350 panic("raidattach: count <= 0");
351 #endif
352 return;
353 }
354 /* This is where all the initialization stuff gets done. */
355
356 numraid = num;
357
358 /* Make some space for requested number of units... */
359
360 RF_Malloc(raidPtrs, num * sizeof(RF_Raid_t *), (RF_Raid_t **));
361 if (raidPtrs == NULL) {
362 panic("raidPtrs is NULL!!");
363 }
364
365 /* Initialize the component buffer pool. */
366 pool_init(&raidframe_cbufpool, sizeof(struct raidbuf), 0,
367 0, 0, "raidpl", NULL);
368
369 rf_mutex_init(&rf_sparet_wait_mutex);
370
371 rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
372
373 for (i = 0; i < num; i++)
374 raidPtrs[i] = NULL;
375 rc = rf_BootRaidframe();
376 if (rc == 0)
377 printf("Kernelized RAIDframe activated\n");
378 else
379 panic("Serious error booting RAID!!");
380
381 /* put together some datastructures like the CCD device does.. This
382 * lets us lock the device and what-not when it gets opened. */
383
384 raid_softc = (struct raid_softc *)
385 malloc(num * sizeof(struct raid_softc),
386 M_RAIDFRAME, M_NOWAIT);
387 if (raid_softc == NULL) {
388 printf("WARNING: no memory for RAIDframe driver\n");
389 return;
390 }
391
392 memset(raid_softc, 0, num * sizeof(struct raid_softc));
393
394 raidrootdev = (struct device *)malloc(num * sizeof(struct device),
395 M_RAIDFRAME, M_NOWAIT);
396 if (raidrootdev == NULL) {
397 panic("No memory for RAIDframe driver!!?!?!");
398 }
399
400 for (raidID = 0; raidID < num; raidID++) {
401 bufq_alloc(&raid_softc[raidID].buf_queue, BUFQ_FCFS);
402
403 raidrootdev[raidID].dv_class = DV_DISK;
404 raidrootdev[raidID].dv_cfdata = NULL;
405 raidrootdev[raidID].dv_unit = raidID;
406 raidrootdev[raidID].dv_parent = NULL;
407 raidrootdev[raidID].dv_flags = 0;
408 sprintf(raidrootdev[raidID].dv_xname,"raid%d",raidID);
409
410 RF_Malloc(raidPtrs[raidID], sizeof(RF_Raid_t),
411 (RF_Raid_t *));
412 if (raidPtrs[raidID] == NULL) {
413 printf("WARNING: raidPtrs[%d] is NULL\n", raidID);
414 numraid = raidID;
415 return;
416 }
417 }
418
419 #ifdef RAID_AUTOCONFIG
420 raidautoconfig = 1;
421 #endif
422
423 /*
424 * Register a finalizer which will be used to auto-config RAID
425 * sets once all real hardware devices have been found.
426 */
427 if (config_finalize_register(NULL, rf_autoconfig) != 0)
428 printf("WARNING: unable to register RAIDframe finalizer\n");
429 }
430
431 int
432 rf_autoconfig(struct device *self)
433 {
434 RF_AutoConfig_t *ac_list;
435 RF_ConfigSet_t *config_sets;
436
437 if (raidautoconfig == 0)
438 return (0);
439
440 /* XXX This code can only be run once. */
441 raidautoconfig = 0;
442
443 /* 1. locate all RAID components on the system */
444 #ifdef DEBUG
445 printf("Searching for RAID components...\n");
446 #endif
447 ac_list = rf_find_raid_components();
448
449 /* 2. Sort them into their respective sets. */
450 config_sets = rf_create_auto_sets(ac_list);
451
452 /*
453 * 3. Evaluate each set andconfigure the valid ones.
454 * This gets done in rf_buildroothack().
455 */
456 rf_buildroothack(config_sets);
457
458 return (1);
459 }
460
461 void
462 rf_buildroothack(RF_ConfigSet_t *config_sets)
463 {
464 RF_ConfigSet_t *cset;
465 RF_ConfigSet_t *next_cset;
466 int retcode;
467 int raidID;
468 int rootID;
469 int num_root;
470
471 rootID = 0;
472 num_root = 0;
473 cset = config_sets;
474 while(cset != NULL ) {
475 next_cset = cset->next;
476 if (rf_have_enough_components(cset) &&
477 cset->ac->clabel->autoconfigure==1) {
478 retcode = rf_auto_config_set(cset,&raidID);
479 if (!retcode) {
480 if (cset->rootable) {
481 rootID = raidID;
482 num_root++;
483 }
484 } else {
485 /* The autoconfig didn't work :( */
486 #if DEBUG
487 printf("Autoconfig failed with code %d for raid%d\n", retcode, raidID);
488 #endif
489 rf_release_all_vps(cset);
490 }
491 } else {
492 /* we're not autoconfiguring this set...
493 release the associated resources */
494 rf_release_all_vps(cset);
495 }
496 /* cleanup */
497 rf_cleanup_config_set(cset);
498 cset = next_cset;
499 }
500
501 /* we found something bootable... */
502
503 if (num_root == 1) {
504 booted_device = &raidrootdev[rootID];
505 } else if (num_root > 1) {
506 /* we can't guess.. require the user to answer... */
507 boothowto |= RB_ASKNAME;
508 }
509 }
510
511
512 int
513 raidsize(dev_t dev)
514 {
515 struct raid_softc *rs;
516 struct disklabel *lp;
517 int part, unit, omask, size;
518
519 unit = raidunit(dev);
520 if (unit >= numraid)
521 return (-1);
522 rs = &raid_softc[unit];
523
524 if ((rs->sc_flags & RAIDF_INITED) == 0)
525 return (-1);
526
527 part = DISKPART(dev);
528 omask = rs->sc_dkdev.dk_openmask & (1 << part);
529 lp = rs->sc_dkdev.dk_label;
530
531 if (omask == 0 && raidopen(dev, 0, S_IFBLK, curproc))
532 return (-1);
533
534 if (lp->d_partitions[part].p_fstype != FS_SWAP)
535 size = -1;
536 else
537 size = lp->d_partitions[part].p_size *
538 (lp->d_secsize / DEV_BSIZE);
539
540 if (omask == 0 && raidclose(dev, 0, S_IFBLK, curproc))
541 return (-1);
542
543 return (size);
544
545 }
546
547 int
548 raiddump(dev_t dev, daddr_t blkno, caddr_t va, size_t size)
549 {
550 /* Not implemented. */
551 return ENXIO;
552 }
553 /* ARGSUSED */
554 int
555 raidopen(dev_t dev, int flags, int fmt, struct proc *p)
556 {
557 int unit = raidunit(dev);
558 struct raid_softc *rs;
559 struct disklabel *lp;
560 int part, pmask;
561 int error = 0;
562
563 if (unit >= numraid)
564 return (ENXIO);
565 rs = &raid_softc[unit];
566
567 if ((error = raidlock(rs)) != 0)
568 return (error);
569 lp = rs->sc_dkdev.dk_label;
570
571 part = DISKPART(dev);
572 pmask = (1 << part);
573
574 if ((rs->sc_flags & RAIDF_INITED) &&
575 (rs->sc_dkdev.dk_openmask == 0))
576 raidgetdisklabel(dev);
577
578 /* make sure that this partition exists */
579
580 if (part != RAW_PART) {
581 if (((rs->sc_flags & RAIDF_INITED) == 0) ||
582 ((part >= lp->d_npartitions) ||
583 (lp->d_partitions[part].p_fstype == FS_UNUSED))) {
584 error = ENXIO;
585 raidunlock(rs);
586 return (error);
587 }
588 }
589 /* Prevent this unit from being unconfigured while open. */
590 switch (fmt) {
591 case S_IFCHR:
592 rs->sc_dkdev.dk_copenmask |= pmask;
593 break;
594
595 case S_IFBLK:
596 rs->sc_dkdev.dk_bopenmask |= pmask;
597 break;
598 }
599
600 if ((rs->sc_dkdev.dk_openmask == 0) &&
601 ((rs->sc_flags & RAIDF_INITED) != 0)) {
602 /* First one... mark things as dirty... Note that we *MUST*
603 have done a configure before this. I DO NOT WANT TO BE
604 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
605 THAT THEY BELONG TOGETHER!!!!! */
606 /* XXX should check to see if we're only open for reading
607 here... If so, we needn't do this, but then need some
608 other way of keeping track of what's happened.. */
609
610 rf_markalldirty( raidPtrs[unit] );
611 }
612
613
614 rs->sc_dkdev.dk_openmask =
615 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
616
617 raidunlock(rs);
618
619 return (error);
620
621
622 }
623 /* ARGSUSED */
624 int
625 raidclose(dev_t dev, int flags, int fmt, struct proc *p)
626 {
627 int unit = raidunit(dev);
628 struct raid_softc *rs;
629 int error = 0;
630 int part;
631
632 if (unit >= numraid)
633 return (ENXIO);
634 rs = &raid_softc[unit];
635
636 if ((error = raidlock(rs)) != 0)
637 return (error);
638
639 part = DISKPART(dev);
640
641 /* ...that much closer to allowing unconfiguration... */
642 switch (fmt) {
643 case S_IFCHR:
644 rs->sc_dkdev.dk_copenmask &= ~(1 << part);
645 break;
646
647 case S_IFBLK:
648 rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
649 break;
650 }
651 rs->sc_dkdev.dk_openmask =
652 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
653
654 if ((rs->sc_dkdev.dk_openmask == 0) &&
655 ((rs->sc_flags & RAIDF_INITED) != 0)) {
656 /* Last one... device is not unconfigured yet.
657 Device shutdown has taken care of setting the
658 clean bits if RAIDF_INITED is not set
659 mark things as clean... */
660
661 rf_update_component_labels(raidPtrs[unit],
662 RF_FINAL_COMPONENT_UPDATE);
663 if (doing_shutdown) {
664 /* last one, and we're going down, so
665 lights out for this RAID set too. */
666 error = rf_Shutdown(raidPtrs[unit]);
667
668 /* It's no longer initialized... */
669 rs->sc_flags &= ~RAIDF_INITED;
670
671 /* Detach the disk. */
672 disk_detach(&rs->sc_dkdev);
673 }
674 }
675
676 raidunlock(rs);
677 return (0);
678
679 }
680
681 void
682 raidstrategy(struct buf *bp)
683 {
684 int s;
685
686 unsigned int raidID = raidunit(bp->b_dev);
687 RF_Raid_t *raidPtr;
688 struct raid_softc *rs = &raid_softc[raidID];
689 int wlabel;
690
691 if ((rs->sc_flags & RAIDF_INITED) ==0) {
692 bp->b_error = ENXIO;
693 bp->b_flags |= B_ERROR;
694 bp->b_resid = bp->b_bcount;
695 biodone(bp);
696 return;
697 }
698 if (raidID >= numraid || !raidPtrs[raidID]) {
699 bp->b_error = ENODEV;
700 bp->b_flags |= B_ERROR;
701 bp->b_resid = bp->b_bcount;
702 biodone(bp);
703 return;
704 }
705 raidPtr = raidPtrs[raidID];
706 if (!raidPtr->valid) {
707 bp->b_error = ENODEV;
708 bp->b_flags |= B_ERROR;
709 bp->b_resid = bp->b_bcount;
710 biodone(bp);
711 return;
712 }
713 if (bp->b_bcount == 0) {
714 db1_printf(("b_bcount is zero..\n"));
715 biodone(bp);
716 return;
717 }
718
719 /*
720 * Do bounds checking and adjust transfer. If there's an
721 * error, the bounds check will flag that for us.
722 */
723
724 wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
725 if (DISKPART(bp->b_dev) != RAW_PART)
726 if (bounds_check_with_label(&rs->sc_dkdev, bp, wlabel) <= 0) {
727 db1_printf(("Bounds check failed!!:%d %d\n",
728 (int) bp->b_blkno, (int) wlabel));
729 biodone(bp);
730 return;
731 }
732 s = splbio();
733
734 bp->b_resid = 0;
735
736 /* stuff it onto our queue */
737 BUFQ_PUT(&rs->buf_queue, bp);
738
739 raidstart(raidPtrs[raidID]);
740
741 splx(s);
742 }
743 /* ARGSUSED */
744 int
745 raidread(dev_t dev, struct uio *uio, int flags)
746 {
747 int unit = raidunit(dev);
748 struct raid_softc *rs;
749
750 if (unit >= numraid)
751 return (ENXIO);
752 rs = &raid_softc[unit];
753
754 if ((rs->sc_flags & RAIDF_INITED) == 0)
755 return (ENXIO);
756
757 return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
758
759 }
760 /* ARGSUSED */
761 int
762 raidwrite(dev_t dev, struct uio *uio, int flags)
763 {
764 int unit = raidunit(dev);
765 struct raid_softc *rs;
766
767 if (unit >= numraid)
768 return (ENXIO);
769 rs = &raid_softc[unit];
770
771 if ((rs->sc_flags & RAIDF_INITED) == 0)
772 return (ENXIO);
773
774 return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
775
776 }
777
778 int
779 raidioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct proc *p)
780 {
781 int unit = raidunit(dev);
782 int error = 0;
783 int part, pmask;
784 struct raid_softc *rs;
785 RF_Config_t *k_cfg, *u_cfg;
786 RF_Raid_t *raidPtr;
787 RF_RaidDisk_t *diskPtr;
788 RF_AccTotals_t *totals;
789 RF_DeviceConfig_t *d_cfg, **ucfgp;
790 u_char *specific_buf;
791 int retcode = 0;
792 int column;
793 int raidid;
794 struct rf_recon_req *rrcopy, *rr;
795 RF_ComponentLabel_t *clabel;
796 RF_ComponentLabel_t ci_label;
797 RF_ComponentLabel_t **clabel_ptr;
798 RF_SingleComponent_t *sparePtr,*componentPtr;
799 RF_SingleComponent_t hot_spare;
800 RF_SingleComponent_t component;
801 RF_ProgressInfo_t progressInfo, **progressInfoPtr;
802 int i, j, d;
803 #ifdef __HAVE_OLD_DISKLABEL
804 struct disklabel newlabel;
805 #endif
806
807 if (unit >= numraid)
808 return (ENXIO);
809 rs = &raid_softc[unit];
810 raidPtr = raidPtrs[unit];
811
812 db1_printf(("raidioctl: %d %d %d %d\n", (int) dev,
813 (int) DISKPART(dev), (int) unit, (int) cmd));
814
815 /* Must be open for writes for these commands... */
816 switch (cmd) {
817 case DIOCSDINFO:
818 case DIOCWDINFO:
819 #ifdef __HAVE_OLD_DISKLABEL
820 case ODIOCWDINFO:
821 case ODIOCSDINFO:
822 #endif
823 case DIOCWLABEL:
824 if ((flag & FWRITE) == 0)
825 return (EBADF);
826 }
827
828 /* Must be initialized for these... */
829 switch (cmd) {
830 case DIOCGDINFO:
831 case DIOCSDINFO:
832 case DIOCWDINFO:
833 #ifdef __HAVE_OLD_DISKLABEL
834 case ODIOCGDINFO:
835 case ODIOCWDINFO:
836 case ODIOCSDINFO:
837 case ODIOCGDEFLABEL:
838 #endif
839 case DIOCGPART:
840 case DIOCWLABEL:
841 case DIOCGDEFLABEL:
842 case RAIDFRAME_SHUTDOWN:
843 case RAIDFRAME_REWRITEPARITY:
844 case RAIDFRAME_GET_INFO:
845 case RAIDFRAME_RESET_ACCTOTALS:
846 case RAIDFRAME_GET_ACCTOTALS:
847 case RAIDFRAME_KEEP_ACCTOTALS:
848 case RAIDFRAME_GET_SIZE:
849 case RAIDFRAME_FAIL_DISK:
850 case RAIDFRAME_COPYBACK:
851 case RAIDFRAME_CHECK_RECON_STATUS:
852 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
853 case RAIDFRAME_GET_COMPONENT_LABEL:
854 case RAIDFRAME_SET_COMPONENT_LABEL:
855 case RAIDFRAME_ADD_HOT_SPARE:
856 case RAIDFRAME_REMOVE_HOT_SPARE:
857 case RAIDFRAME_INIT_LABELS:
858 case RAIDFRAME_REBUILD_IN_PLACE:
859 case RAIDFRAME_CHECK_PARITY:
860 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
861 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
862 case RAIDFRAME_CHECK_COPYBACK_STATUS:
863 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
864 case RAIDFRAME_SET_AUTOCONFIG:
865 case RAIDFRAME_SET_ROOT:
866 case RAIDFRAME_DELETE_COMPONENT:
867 case RAIDFRAME_INCORPORATE_HOT_SPARE:
868 if ((rs->sc_flags & RAIDF_INITED) == 0)
869 return (ENXIO);
870 }
871
872 switch (cmd) {
873
874 /* configure the system */
875 case RAIDFRAME_CONFIGURE:
876
877 if (raidPtr->valid) {
878 /* There is a valid RAID set running on this unit! */
879 printf("raid%d: Device already configured!\n",unit);
880 return(EINVAL);
881 }
882
883 /* copy-in the configuration information */
884 /* data points to a pointer to the configuration structure */
885
886 u_cfg = *((RF_Config_t **) data);
887 RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
888 if (k_cfg == NULL) {
889 return (ENOMEM);
890 }
891 retcode = copyin(u_cfg, k_cfg, sizeof(RF_Config_t));
892 if (retcode) {
893 RF_Free(k_cfg, sizeof(RF_Config_t));
894 db1_printf(("rf_ioctl: retcode=%d copyin.1\n",
895 retcode));
896 return (retcode);
897 }
898 /* allocate a buffer for the layout-specific data, and copy it
899 * in */
900 if (k_cfg->layoutSpecificSize) {
901 if (k_cfg->layoutSpecificSize > 10000) {
902 /* sanity check */
903 RF_Free(k_cfg, sizeof(RF_Config_t));
904 return (EINVAL);
905 }
906 RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
907 (u_char *));
908 if (specific_buf == NULL) {
909 RF_Free(k_cfg, sizeof(RF_Config_t));
910 return (ENOMEM);
911 }
912 retcode = copyin(k_cfg->layoutSpecific, specific_buf,
913 k_cfg->layoutSpecificSize);
914 if (retcode) {
915 RF_Free(k_cfg, sizeof(RF_Config_t));
916 RF_Free(specific_buf,
917 k_cfg->layoutSpecificSize);
918 db1_printf(("rf_ioctl: retcode=%d copyin.2\n",
919 retcode));
920 return (retcode);
921 }
922 } else
923 specific_buf = NULL;
924 k_cfg->layoutSpecific = specific_buf;
925
926 /* should do some kind of sanity check on the configuration.
927 * Store the sum of all the bytes in the last byte? */
928
929 /* configure the system */
930
931 /*
932 * Clear the entire RAID descriptor, just to make sure
933 * there is no stale data left in the case of a
934 * reconfiguration
935 */
936 memset((char *) raidPtr, 0, sizeof(RF_Raid_t));
937 raidPtr->raidid = unit;
938
939 retcode = rf_Configure(raidPtr, k_cfg, NULL);
940
941 if (retcode == 0) {
942
943 /* allow this many simultaneous IO's to
944 this RAID device */
945 raidPtr->openings = RAIDOUTSTANDING;
946
947 raidinit(raidPtr);
948 rf_markalldirty(raidPtr);
949 }
950 /* free the buffers. No return code here. */
951 if (k_cfg->layoutSpecificSize) {
952 RF_Free(specific_buf, k_cfg->layoutSpecificSize);
953 }
954 RF_Free(k_cfg, sizeof(RF_Config_t));
955
956 return (retcode);
957
958 /* shutdown the system */
959 case RAIDFRAME_SHUTDOWN:
960
961 if ((error = raidlock(rs)) != 0)
962 return (error);
963
964 /*
965 * If somebody has a partition mounted, we shouldn't
966 * shutdown.
967 */
968
969 part = DISKPART(dev);
970 pmask = (1 << part);
971 if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
972 ((rs->sc_dkdev.dk_bopenmask & pmask) &&
973 (rs->sc_dkdev.dk_copenmask & pmask))) {
974 raidunlock(rs);
975 return (EBUSY);
976 }
977
978 retcode = rf_Shutdown(raidPtr);
979
980 /* It's no longer initialized... */
981 rs->sc_flags &= ~RAIDF_INITED;
982
983 /* Detach the disk. */
984 disk_detach(&rs->sc_dkdev);
985
986 raidunlock(rs);
987
988 return (retcode);
989 case RAIDFRAME_GET_COMPONENT_LABEL:
990 clabel_ptr = (RF_ComponentLabel_t **) data;
991 /* need to read the component label for the disk indicated
992 by row,column in clabel */
993
994 /* For practice, let's get it directly fromdisk, rather
995 than from the in-core copy */
996 RF_Malloc( clabel, sizeof( RF_ComponentLabel_t ),
997 (RF_ComponentLabel_t *));
998 if (clabel == NULL)
999 return (ENOMEM);
1000
1001 memset((char *) clabel, 0, sizeof(RF_ComponentLabel_t));
1002
1003 retcode = copyin( *clabel_ptr, clabel,
1004 sizeof(RF_ComponentLabel_t));
1005
1006 if (retcode) {
1007 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
1008 return(retcode);
1009 }
1010
1011 clabel->row = 0; /* Don't allow looking at anything else.*/
1012
1013 column = clabel->column;
1014
1015 if ((column < 0) || (column >= raidPtr->numCol +
1016 raidPtr->numSpare)) {
1017 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
1018 return(EINVAL);
1019 }
1020
1021 raidread_component_label(raidPtr->Disks[column].dev,
1022 raidPtr->raid_cinfo[column].ci_vp,
1023 clabel );
1024
1025 retcode = copyout(clabel, *clabel_ptr,
1026 sizeof(RF_ComponentLabel_t));
1027 RF_Free(clabel, sizeof(RF_ComponentLabel_t));
1028 return (retcode);
1029
1030 case RAIDFRAME_SET_COMPONENT_LABEL:
1031 clabel = (RF_ComponentLabel_t *) data;
1032
1033 /* XXX check the label for valid stuff... */
1034 /* Note that some things *should not* get modified --
1035 the user should be re-initing the labels instead of
1036 trying to patch things.
1037 */
1038
1039 raidid = raidPtr->raidid;
1040 printf("raid%d: Got component label:\n", raidid);
1041 printf("raid%d: Version: %d\n", raidid, clabel->version);
1042 printf("raid%d: Serial Number: %d\n", raidid, clabel->serial_number);
1043 printf("raid%d: Mod counter: %d\n", raidid, clabel->mod_counter);
1044 printf("raid%d: Column: %d\n", raidid, clabel->column);
1045 printf("raid%d: Num Columns: %d\n", raidid, clabel->num_columns);
1046 printf("raid%d: Clean: %d\n", raidid, clabel->clean);
1047 printf("raid%d: Status: %d\n", raidid, clabel->status);
1048
1049 clabel->row = 0;
1050 column = clabel->column;
1051
1052 if ((column < 0) || (column >= raidPtr->numCol)) {
1053 return(EINVAL);
1054 }
1055
1056 /* XXX this isn't allowed to do anything for now :-) */
1057
1058 /* XXX and before it is, we need to fill in the rest
1059 of the fields!?!?!?! */
1060 #if 0
1061 raidwrite_component_label(
1062 raidPtr->Disks[column].dev,
1063 raidPtr->raid_cinfo[column].ci_vp,
1064 clabel );
1065 #endif
1066 return (0);
1067
1068 case RAIDFRAME_INIT_LABELS:
1069 clabel = (RF_ComponentLabel_t *) data;
1070 /*
1071 we only want the serial number from
1072 the above. We get all the rest of the information
1073 from the config that was used to create this RAID
1074 set.
1075 */
1076
1077 raidPtr->serial_number = clabel->serial_number;
1078
1079 raid_init_component_label(raidPtr, &ci_label);
1080 ci_label.serial_number = clabel->serial_number;
1081 ci_label.row = 0; /* we dont' pretend to support more */
1082
1083 for(column=0;column<raidPtr->numCol;column++) {
1084 diskPtr = &raidPtr->Disks[column];
1085 if (!RF_DEAD_DISK(diskPtr->status)) {
1086 ci_label.partitionSize = diskPtr->partitionSize;
1087 ci_label.column = column;
1088 raidwrite_component_label(
1089 raidPtr->Disks[column].dev,
1090 raidPtr->raid_cinfo[column].ci_vp,
1091 &ci_label );
1092 }
1093 }
1094
1095 return (retcode);
1096 case RAIDFRAME_SET_AUTOCONFIG:
1097 d = rf_set_autoconfig(raidPtr, *(int *) data);
1098 printf("raid%d: New autoconfig value is: %d\n",
1099 raidPtr->raidid, d);
1100 *(int *) data = d;
1101 return (retcode);
1102
1103 case RAIDFRAME_SET_ROOT:
1104 d = rf_set_rootpartition(raidPtr, *(int *) data);
1105 printf("raid%d: New rootpartition value is: %d\n",
1106 raidPtr->raidid, d);
1107 *(int *) data = d;
1108 return (retcode);
1109
1110 /* initialize all parity */
1111 case RAIDFRAME_REWRITEPARITY:
1112
1113 if (raidPtr->Layout.map->faultsTolerated == 0) {
1114 /* Parity for RAID 0 is trivially correct */
1115 raidPtr->parity_good = RF_RAID_CLEAN;
1116 return(0);
1117 }
1118
1119 if (raidPtr->parity_rewrite_in_progress == 1) {
1120 /* Re-write is already in progress! */
1121 return(EINVAL);
1122 }
1123
1124 retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
1125 rf_RewriteParityThread,
1126 raidPtr,"raid_parity");
1127 return (retcode);
1128
1129
1130 case RAIDFRAME_ADD_HOT_SPARE:
1131 sparePtr = (RF_SingleComponent_t *) data;
1132 memcpy( &hot_spare, sparePtr, sizeof(RF_SingleComponent_t));
1133 retcode = rf_add_hot_spare(raidPtr, &hot_spare);
1134 return(retcode);
1135
1136 case RAIDFRAME_REMOVE_HOT_SPARE:
1137 return(retcode);
1138
1139 case RAIDFRAME_DELETE_COMPONENT:
1140 componentPtr = (RF_SingleComponent_t *)data;
1141 memcpy( &component, componentPtr,
1142 sizeof(RF_SingleComponent_t));
1143 retcode = rf_delete_component(raidPtr, &component);
1144 return(retcode);
1145
1146 case RAIDFRAME_INCORPORATE_HOT_SPARE:
1147 componentPtr = (RF_SingleComponent_t *)data;
1148 memcpy( &component, componentPtr,
1149 sizeof(RF_SingleComponent_t));
1150 retcode = rf_incorporate_hot_spare(raidPtr, &component);
1151 return(retcode);
1152
1153 case RAIDFRAME_REBUILD_IN_PLACE:
1154
1155 if (raidPtr->Layout.map->faultsTolerated == 0) {
1156 /* Can't do this on a RAID 0!! */
1157 return(EINVAL);
1158 }
1159
1160 if (raidPtr->recon_in_progress == 1) {
1161 /* a reconstruct is already in progress! */
1162 return(EINVAL);
1163 }
1164
1165 componentPtr = (RF_SingleComponent_t *) data;
1166 memcpy( &component, componentPtr,
1167 sizeof(RF_SingleComponent_t));
1168 component.row = 0; /* we don't support any more */
1169 column = component.column;
1170
1171 if ((column < 0) || (column >= raidPtr->numCol)) {
1172 return(EINVAL);
1173 }
1174
1175 RF_LOCK_MUTEX(raidPtr->mutex);
1176 if ((raidPtr->Disks[column].status == rf_ds_optimal) &&
1177 (raidPtr->numFailures > 0)) {
1178 /* XXX 0 above shouldn't be constant!!! */
1179 /* some component other than this has failed.
1180 Let's not make things worse than they already
1181 are... */
1182 printf("raid%d: Unable to reconstruct to disk at:\n",
1183 raidPtr->raidid);
1184 printf("raid%d: Col: %d Too many failures.\n",
1185 raidPtr->raidid, column);
1186 RF_UNLOCK_MUTEX(raidPtr->mutex);
1187 return (EINVAL);
1188 }
1189 if (raidPtr->Disks[column].status ==
1190 rf_ds_reconstructing) {
1191 printf("raid%d: Unable to reconstruct to disk at:\n",
1192 raidPtr->raidid);
1193 printf("raid%d: Col: %d Reconstruction already occuring!\n", raidPtr->raidid, column);
1194
1195 RF_UNLOCK_MUTEX(raidPtr->mutex);
1196 return (EINVAL);
1197 }
1198 if (raidPtr->Disks[column].status == rf_ds_spared) {
1199 RF_UNLOCK_MUTEX(raidPtr->mutex);
1200 return (EINVAL);
1201 }
1202 RF_UNLOCK_MUTEX(raidPtr->mutex);
1203
1204 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1205 if (rrcopy == NULL)
1206 return(ENOMEM);
1207
1208 rrcopy->raidPtr = (void *) raidPtr;
1209 rrcopy->col = column;
1210
1211 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1212 rf_ReconstructInPlaceThread,
1213 rrcopy,"raid_reconip");
1214 return(retcode);
1215
1216 case RAIDFRAME_GET_INFO:
1217 if (!raidPtr->valid)
1218 return (ENODEV);
1219 ucfgp = (RF_DeviceConfig_t **) data;
1220 RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t),
1221 (RF_DeviceConfig_t *));
1222 if (d_cfg == NULL)
1223 return (ENOMEM);
1224 memset((char *) d_cfg, 0, sizeof(RF_DeviceConfig_t));
1225 d_cfg->rows = 1; /* there is only 1 row now */
1226 d_cfg->cols = raidPtr->numCol;
1227 d_cfg->ndevs = raidPtr->numCol;
1228 if (d_cfg->ndevs >= RF_MAX_DISKS) {
1229 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1230 return (ENOMEM);
1231 }
1232 d_cfg->nspares = raidPtr->numSpare;
1233 if (d_cfg->nspares >= RF_MAX_DISKS) {
1234 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1235 return (ENOMEM);
1236 }
1237 d_cfg->maxqdepth = raidPtr->maxQueueDepth;
1238 d = 0;
1239 for (j = 0; j < d_cfg->cols; j++) {
1240 d_cfg->devs[d] = raidPtr->Disks[j];
1241 d++;
1242 }
1243 for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) {
1244 d_cfg->spares[i] = raidPtr->Disks[j];
1245 }
1246 retcode = copyout(d_cfg, *ucfgp, sizeof(RF_DeviceConfig_t));
1247 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1248
1249 return (retcode);
1250
1251 case RAIDFRAME_CHECK_PARITY:
1252 *(int *) data = raidPtr->parity_good;
1253 return (0);
1254
1255 case RAIDFRAME_RESET_ACCTOTALS:
1256 memset(&raidPtr->acc_totals, 0, sizeof(raidPtr->acc_totals));
1257 return (0);
1258
1259 case RAIDFRAME_GET_ACCTOTALS:
1260 totals = (RF_AccTotals_t *) data;
1261 *totals = raidPtr->acc_totals;
1262 return (0);
1263
1264 case RAIDFRAME_KEEP_ACCTOTALS:
1265 raidPtr->keep_acc_totals = *(int *)data;
1266 return (0);
1267
1268 case RAIDFRAME_GET_SIZE:
1269 *(int *) data = raidPtr->totalSectors;
1270 return (0);
1271
1272 /* fail a disk & optionally start reconstruction */
1273 case RAIDFRAME_FAIL_DISK:
1274
1275 if (raidPtr->Layout.map->faultsTolerated == 0) {
1276 /* Can't do this on a RAID 0!! */
1277 return(EINVAL);
1278 }
1279
1280 rr = (struct rf_recon_req *) data;
1281 rr->row = 0;
1282 if (rr->col < 0 || rr->col >= raidPtr->numCol)
1283 return (EINVAL);
1284
1285
1286 RF_LOCK_MUTEX(raidPtr->mutex);
1287 if ((raidPtr->Disks[rr->col].status ==
1288 rf_ds_optimal) && (raidPtr->numFailures > 0)) {
1289 /* some other component has failed. Let's not make
1290 things worse. XXX wrong for RAID6 */
1291 RF_UNLOCK_MUTEX(raidPtr->mutex);
1292 return (EINVAL);
1293 }
1294 if (raidPtr->Disks[rr->col].status == rf_ds_spared) {
1295 /* Can't fail a spared disk! */
1296 RF_UNLOCK_MUTEX(raidPtr->mutex);
1297 return (EINVAL);
1298 }
1299 RF_UNLOCK_MUTEX(raidPtr->mutex);
1300
1301 /* make a copy of the recon request so that we don't rely on
1302 * the user's buffer */
1303 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1304 if (rrcopy == NULL)
1305 return(ENOMEM);
1306 memcpy(rrcopy, rr, sizeof(*rr));
1307 rrcopy->raidPtr = (void *) raidPtr;
1308
1309 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1310 rf_ReconThread,
1311 rrcopy,"raid_recon");
1312 return (0);
1313
1314 /* invoke a copyback operation after recon on whatever disk
1315 * needs it, if any */
1316 case RAIDFRAME_COPYBACK:
1317
1318 if (raidPtr->Layout.map->faultsTolerated == 0) {
1319 /* This makes no sense on a RAID 0!! */
1320 return(EINVAL);
1321 }
1322
1323 if (raidPtr->copyback_in_progress == 1) {
1324 /* Copyback is already in progress! */
1325 return(EINVAL);
1326 }
1327
1328 retcode = RF_CREATE_THREAD(raidPtr->copyback_thread,
1329 rf_CopybackThread,
1330 raidPtr,"raid_copyback");
1331 return (retcode);
1332
1333 /* return the percentage completion of reconstruction */
1334 case RAIDFRAME_CHECK_RECON_STATUS:
1335 if (raidPtr->Layout.map->faultsTolerated == 0) {
1336 /* This makes no sense on a RAID 0, so tell the
1337 user it's done. */
1338 *(int *) data = 100;
1339 return(0);
1340 }
1341 if (raidPtr->status != rf_rs_reconstructing)
1342 *(int *) data = 100;
1343 else
1344 *(int *) data = raidPtr->reconControl->percentComplete;
1345 return (0);
1346 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
1347 progressInfoPtr = (RF_ProgressInfo_t **) data;
1348 if (raidPtr->status != rf_rs_reconstructing) {
1349 progressInfo.remaining = 0;
1350 progressInfo.completed = 100;
1351 progressInfo.total = 100;
1352 } else {
1353 progressInfo.total =
1354 raidPtr->reconControl->numRUsTotal;
1355 progressInfo.completed =
1356 raidPtr->reconControl->numRUsComplete;
1357 progressInfo.remaining = progressInfo.total -
1358 progressInfo.completed;
1359 }
1360 retcode = copyout(&progressInfo, *progressInfoPtr,
1361 sizeof(RF_ProgressInfo_t));
1362 return (retcode);
1363
1364 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1365 if (raidPtr->Layout.map->faultsTolerated == 0) {
1366 /* This makes no sense on a RAID 0, so tell the
1367 user it's done. */
1368 *(int *) data = 100;
1369 return(0);
1370 }
1371 if (raidPtr->parity_rewrite_in_progress == 1) {
1372 *(int *) data = 100 *
1373 raidPtr->parity_rewrite_stripes_done /
1374 raidPtr->Layout.numStripe;
1375 } else {
1376 *(int *) data = 100;
1377 }
1378 return (0);
1379
1380 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
1381 progressInfoPtr = (RF_ProgressInfo_t **) data;
1382 if (raidPtr->parity_rewrite_in_progress == 1) {
1383 progressInfo.total = raidPtr->Layout.numStripe;
1384 progressInfo.completed =
1385 raidPtr->parity_rewrite_stripes_done;
1386 progressInfo.remaining = progressInfo.total -
1387 progressInfo.completed;
1388 } else {
1389 progressInfo.remaining = 0;
1390 progressInfo.completed = 100;
1391 progressInfo.total = 100;
1392 }
1393 retcode = copyout(&progressInfo, *progressInfoPtr,
1394 sizeof(RF_ProgressInfo_t));
1395 return (retcode);
1396
1397 case RAIDFRAME_CHECK_COPYBACK_STATUS:
1398 if (raidPtr->Layout.map->faultsTolerated == 0) {
1399 /* This makes no sense on a RAID 0 */
1400 *(int *) data = 100;
1401 return(0);
1402 }
1403 if (raidPtr->copyback_in_progress == 1) {
1404 *(int *) data = 100 * raidPtr->copyback_stripes_done /
1405 raidPtr->Layout.numStripe;
1406 } else {
1407 *(int *) data = 100;
1408 }
1409 return (0);
1410
1411 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
1412 progressInfoPtr = (RF_ProgressInfo_t **) data;
1413 if (raidPtr->copyback_in_progress == 1) {
1414 progressInfo.total = raidPtr->Layout.numStripe;
1415 progressInfo.completed =
1416 raidPtr->copyback_stripes_done;
1417 progressInfo.remaining = progressInfo.total -
1418 progressInfo.completed;
1419 } else {
1420 progressInfo.remaining = 0;
1421 progressInfo.completed = 100;
1422 progressInfo.total = 100;
1423 }
1424 retcode = copyout(&progressInfo, *progressInfoPtr,
1425 sizeof(RF_ProgressInfo_t));
1426 return (retcode);
1427
1428 /* the sparetable daemon calls this to wait for the kernel to
1429 * need a spare table. this ioctl does not return until a
1430 * spare table is needed. XXX -- calling mpsleep here in the
1431 * ioctl code is almost certainly wrong and evil. -- XXX XXX
1432 * -- I should either compute the spare table in the kernel,
1433 * or have a different -- XXX XXX -- interface (a different
1434 * character device) for delivering the table -- XXX */
1435 #if 0
1436 case RAIDFRAME_SPARET_WAIT:
1437 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1438 while (!rf_sparet_wait_queue)
1439 mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE);
1440 waitreq = rf_sparet_wait_queue;
1441 rf_sparet_wait_queue = rf_sparet_wait_queue->next;
1442 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1443
1444 /* structure assignment */
1445 *((RF_SparetWait_t *) data) = *waitreq;
1446
1447 RF_Free(waitreq, sizeof(*waitreq));
1448 return (0);
1449
1450 /* wakes up a process waiting on SPARET_WAIT and puts an error
1451 * code in it that will cause the dameon to exit */
1452 case RAIDFRAME_ABORT_SPARET_WAIT:
1453 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1454 waitreq->fcol = -1;
1455 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1456 waitreq->next = rf_sparet_wait_queue;
1457 rf_sparet_wait_queue = waitreq;
1458 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1459 wakeup(&rf_sparet_wait_queue);
1460 return (0);
1461
1462 /* used by the spare table daemon to deliver a spare table
1463 * into the kernel */
1464 case RAIDFRAME_SEND_SPARET:
1465
1466 /* install the spare table */
1467 retcode = rf_SetSpareTable(raidPtr, *(void **) data);
1468
1469 /* respond to the requestor. the return status of the spare
1470 * table installation is passed in the "fcol" field */
1471 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1472 waitreq->fcol = retcode;
1473 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1474 waitreq->next = rf_sparet_resp_queue;
1475 rf_sparet_resp_queue = waitreq;
1476 wakeup(&rf_sparet_resp_queue);
1477 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1478
1479 return (retcode);
1480 #endif
1481
1482 default:
1483 break; /* fall through to the os-specific code below */
1484
1485 }
1486
1487 if (!raidPtr->valid)
1488 return (EINVAL);
1489
1490 /*
1491 * Add support for "regular" device ioctls here.
1492 */
1493
1494 switch (cmd) {
1495 case DIOCGDINFO:
1496 *(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
1497 break;
1498 #ifdef __HAVE_OLD_DISKLABEL
1499 case ODIOCGDINFO:
1500 newlabel = *(rs->sc_dkdev.dk_label);
1501 if (newlabel.d_npartitions > OLDMAXPARTITIONS)
1502 return ENOTTY;
1503 memcpy(data, &newlabel, sizeof (struct olddisklabel));
1504 break;
1505 #endif
1506
1507 case DIOCGPART:
1508 ((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
1509 ((struct partinfo *) data)->part =
1510 &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
1511 break;
1512
1513 case DIOCWDINFO:
1514 case DIOCSDINFO:
1515 #ifdef __HAVE_OLD_DISKLABEL
1516 case ODIOCWDINFO:
1517 case ODIOCSDINFO:
1518 #endif
1519 {
1520 struct disklabel *lp;
1521 #ifdef __HAVE_OLD_DISKLABEL
1522 if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) {
1523 memset(&newlabel, 0, sizeof newlabel);
1524 memcpy(&newlabel, data, sizeof (struct olddisklabel));
1525 lp = &newlabel;
1526 } else
1527 #endif
1528 lp = (struct disklabel *)data;
1529
1530 if ((error = raidlock(rs)) != 0)
1531 return (error);
1532
1533 rs->sc_flags |= RAIDF_LABELLING;
1534
1535 error = setdisklabel(rs->sc_dkdev.dk_label,
1536 lp, 0, rs->sc_dkdev.dk_cpulabel);
1537 if (error == 0) {
1538 if (cmd == DIOCWDINFO
1539 #ifdef __HAVE_OLD_DISKLABEL
1540 || cmd == ODIOCWDINFO
1541 #endif
1542 )
1543 error = writedisklabel(RAIDLABELDEV(dev),
1544 raidstrategy, rs->sc_dkdev.dk_label,
1545 rs->sc_dkdev.dk_cpulabel);
1546 }
1547 rs->sc_flags &= ~RAIDF_LABELLING;
1548
1549 raidunlock(rs);
1550
1551 if (error)
1552 return (error);
1553 break;
1554 }
1555
1556 case DIOCWLABEL:
1557 if (*(int *) data != 0)
1558 rs->sc_flags |= RAIDF_WLABEL;
1559 else
1560 rs->sc_flags &= ~RAIDF_WLABEL;
1561 break;
1562
1563 case DIOCGDEFLABEL:
1564 raidgetdefaultlabel(raidPtr, rs, (struct disklabel *) data);
1565 break;
1566
1567 #ifdef __HAVE_OLD_DISKLABEL
1568 case ODIOCGDEFLABEL:
1569 raidgetdefaultlabel(raidPtr, rs, &newlabel);
1570 if (newlabel.d_npartitions > OLDMAXPARTITIONS)
1571 return ENOTTY;
1572 memcpy(data, &newlabel, sizeof (struct olddisklabel));
1573 break;
1574 #endif
1575
1576 default:
1577 retcode = ENOTTY;
1578 }
1579 return (retcode);
1580
1581 }
1582
1583
1584 /* raidinit -- complete the rest of the initialization for the
1585 RAIDframe device. */
1586
1587
1588 static void
1589 raidinit(RF_Raid_t *raidPtr)
1590 {
1591 struct raid_softc *rs;
1592 int unit;
1593
1594 unit = raidPtr->raidid;
1595
1596 rs = &raid_softc[unit];
1597
1598 /* XXX should check return code first... */
1599 rs->sc_flags |= RAIDF_INITED;
1600
1601 sprintf(rs->sc_xname, "raid%d", unit); /* XXX doesn't check bounds. */
1602
1603 rs->sc_dkdev.dk_name = rs->sc_xname;
1604
1605 /* disk_attach actually creates space for the CPU disklabel, among
1606 * other things, so it's critical to call this *BEFORE* we try putzing
1607 * with disklabels. */
1608
1609 disk_attach(&rs->sc_dkdev);
1610
1611 /* XXX There may be a weird interaction here between this, and
1612 * protectedSectors, as used in RAIDframe. */
1613
1614 rs->sc_size = raidPtr->totalSectors;
1615
1616 }
1617 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
1618 /* wake up the daemon & tell it to get us a spare table
1619 * XXX
1620 * the entries in the queues should be tagged with the raidPtr
1621 * so that in the extremely rare case that two recons happen at once,
1622 * we know for which device were requesting a spare table
1623 * XXX
1624 *
1625 * XXX This code is not currently used. GO
1626 */
1627 int
1628 rf_GetSpareTableFromDaemon(RF_SparetWait_t *req)
1629 {
1630 int retcode;
1631
1632 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1633 req->next = rf_sparet_wait_queue;
1634 rf_sparet_wait_queue = req;
1635 wakeup(&rf_sparet_wait_queue);
1636
1637 /* mpsleep unlocks the mutex */
1638 while (!rf_sparet_resp_queue) {
1639 tsleep(&rf_sparet_resp_queue, PRIBIO,
1640 "raidframe getsparetable", 0);
1641 }
1642 req = rf_sparet_resp_queue;
1643 rf_sparet_resp_queue = req->next;
1644 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1645
1646 retcode = req->fcol;
1647 RF_Free(req, sizeof(*req)); /* this is not the same req as we
1648 * alloc'd */
1649 return (retcode);
1650 }
1651 #endif
1652
1653 /* a wrapper around rf_DoAccess that extracts appropriate info from the
1654 * bp & passes it down.
1655 * any calls originating in the kernel must use non-blocking I/O
1656 * do some extra sanity checking to return "appropriate" error values for
1657 * certain conditions (to make some standard utilities work)
1658 *
1659 * Formerly known as: rf_DoAccessKernel
1660 */
1661 void
1662 raidstart(RF_Raid_t *raidPtr)
1663 {
1664 RF_SectorCount_t num_blocks, pb, sum;
1665 RF_RaidAddr_t raid_addr;
1666 struct partition *pp;
1667 daddr_t blocknum;
1668 int unit;
1669 struct raid_softc *rs;
1670 int do_async;
1671 struct buf *bp;
1672
1673 unit = raidPtr->raidid;
1674 rs = &raid_softc[unit];
1675
1676 /* quick check to see if anything has died recently */
1677 RF_LOCK_MUTEX(raidPtr->mutex);
1678 if (raidPtr->numNewFailures > 0) {
1679 RF_UNLOCK_MUTEX(raidPtr->mutex);
1680 rf_update_component_labels(raidPtr,
1681 RF_NORMAL_COMPONENT_UPDATE);
1682 RF_LOCK_MUTEX(raidPtr->mutex);
1683 raidPtr->numNewFailures--;
1684 }
1685
1686 /* Check to see if we're at the limit... */
1687 while (raidPtr->openings > 0) {
1688 RF_UNLOCK_MUTEX(raidPtr->mutex);
1689
1690 /* get the next item, if any, from the queue */
1691 if ((bp = BUFQ_GET(&rs->buf_queue)) == NULL) {
1692 /* nothing more to do */
1693 return;
1694 }
1695
1696 /* Ok, for the bp we have here, bp->b_blkno is relative to the
1697 * partition.. Need to make it absolute to the underlying
1698 * device.. */
1699
1700 blocknum = bp->b_blkno;
1701 if (DISKPART(bp->b_dev) != RAW_PART) {
1702 pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
1703 blocknum += pp->p_offset;
1704 }
1705
1706 db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
1707 (int) blocknum));
1708
1709 db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
1710 db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
1711
1712 /* *THIS* is where we adjust what block we're going to...
1713 * but DO NOT TOUCH bp->b_blkno!!! */
1714 raid_addr = blocknum;
1715
1716 num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
1717 pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
1718 sum = raid_addr + num_blocks + pb;
1719 if (1 || rf_debugKernelAccess) {
1720 db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
1721 (int) raid_addr, (int) sum, (int) num_blocks,
1722 (int) pb, (int) bp->b_resid));
1723 }
1724 if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
1725 || (sum < num_blocks) || (sum < pb)) {
1726 bp->b_error = ENOSPC;
1727 bp->b_flags |= B_ERROR;
1728 bp->b_resid = bp->b_bcount;
1729 biodone(bp);
1730 RF_LOCK_MUTEX(raidPtr->mutex);
1731 continue;
1732 }
1733 /*
1734 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
1735 */
1736
1737 if (bp->b_bcount & raidPtr->sectorMask) {
1738 bp->b_error = EINVAL;
1739 bp->b_flags |= B_ERROR;
1740 bp->b_resid = bp->b_bcount;
1741 biodone(bp);
1742 RF_LOCK_MUTEX(raidPtr->mutex);
1743 continue;
1744
1745 }
1746 db1_printf(("Calling DoAccess..\n"));
1747
1748
1749 RF_LOCK_MUTEX(raidPtr->mutex);
1750 raidPtr->openings--;
1751 RF_UNLOCK_MUTEX(raidPtr->mutex);
1752
1753 /*
1754 * Everything is async.
1755 */
1756 do_async = 1;
1757
1758 disk_busy(&rs->sc_dkdev);
1759
1760 /* XXX we're still at splbio() here... do we *really*
1761 need to be? */
1762
1763 /* don't ever condition on bp->b_flags & B_WRITE.
1764 * always condition on B_READ instead */
1765
1766 bp->b_error = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
1767 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
1768 do_async, raid_addr, num_blocks,
1769 bp->b_data, bp, RF_DAG_NONBLOCKING_IO);
1770
1771 if (bp->b_error) {
1772 bp->b_flags |= B_ERROR;
1773 }
1774
1775 RF_LOCK_MUTEX(raidPtr->mutex);
1776 }
1777 RF_UNLOCK_MUTEX(raidPtr->mutex);
1778 }
1779
1780
1781
1782
1783 /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */
1784
1785 int
1786 rf_DispatchKernelIO(RF_DiskQueue_t *queue, RF_DiskQueueData_t *req)
1787 {
1788 int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
1789 struct buf *bp;
1790 struct raidbuf *raidbp = NULL;
1791
1792 req->queue = queue;
1793
1794 #if DIAGNOSTIC
1795 if (queue->raidPtr->raidid >= numraid) {
1796 printf("Invalid unit number: %d %d\n", queue->raidPtr->raidid,
1797 numraid);
1798 panic("Invalid Unit number in rf_DispatchKernelIO");
1799 }
1800 #endif
1801
1802 bp = req->bp;
1803 #if 1
1804 /* XXX when there is a physical disk failure, someone is passing us a
1805 * buffer that contains old stuff!! Attempt to deal with this problem
1806 * without taking a performance hit... (not sure where the real bug
1807 * is. It's buried in RAIDframe somewhere) :-( GO ) */
1808
1809 if (bp->b_flags & B_ERROR) {
1810 bp->b_flags &= ~B_ERROR;
1811 }
1812 if (bp->b_error != 0) {
1813 bp->b_error = 0;
1814 }
1815 #endif
1816 raidbp = pool_get(&raidframe_cbufpool, PR_NOWAIT);
1817 if (raidbp == NULL) {
1818 bp->b_flags |= B_ERROR;
1819 bp->b_error = ENOMEM;
1820 return (ENOMEM);
1821 }
1822 BUF_INIT(&raidbp->rf_buf);
1823
1824 /*
1825 * context for raidiodone
1826 */
1827 raidbp->rf_obp = bp;
1828 raidbp->req = req;
1829
1830 switch (req->type) {
1831 case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */
1832 /* XXX need to do something extra here.. */
1833 /* I'm leaving this in, as I've never actually seen it used,
1834 * and I'd like folks to report it... GO */
1835 printf(("WAKEUP CALLED\n"));
1836 queue->numOutstanding++;
1837
1838 /* XXX need to glue the original buffer into this?? */
1839
1840 KernelWakeupFunc(&raidbp->rf_buf);
1841 break;
1842
1843 case RF_IO_TYPE_READ:
1844 case RF_IO_TYPE_WRITE:
1845
1846 if (req->tracerec) {
1847 RF_ETIMER_START(req->tracerec->timer);
1848 }
1849 InitBP(&raidbp->rf_buf, queue->rf_cinfo->ci_vp,
1850 op | bp->b_flags, queue->rf_cinfo->ci_dev,
1851 req->sectorOffset, req->numSector,
1852 req->buf, KernelWakeupFunc, (void *) req,
1853 queue->raidPtr->logBytesPerSector, req->b_proc);
1854
1855 if (rf_debugKernelAccess) {
1856 db1_printf(("dispatch: bp->b_blkno = %ld\n",
1857 (long) bp->b_blkno));
1858 }
1859 queue->numOutstanding++;
1860 queue->last_deq_sector = req->sectorOffset;
1861 /* acc wouldn't have been let in if there were any pending
1862 * reqs at any other priority */
1863 queue->curPriority = req->priority;
1864
1865 db1_printf(("Going for %c to unit %d col %d\n",
1866 req->type, queue->raidPtr->raidid,
1867 queue->col));
1868 db1_printf(("sector %d count %d (%d bytes) %d\n",
1869 (int) req->sectorOffset, (int) req->numSector,
1870 (int) (req->numSector <<
1871 queue->raidPtr->logBytesPerSector),
1872 (int) queue->raidPtr->logBytesPerSector));
1873 if ((raidbp->rf_buf.b_flags & B_READ) == 0) {
1874 raidbp->rf_buf.b_vp->v_numoutput++;
1875 }
1876 VOP_STRATEGY(&raidbp->rf_buf);
1877
1878 break;
1879
1880 default:
1881 panic("bad req->type in rf_DispatchKernelIO");
1882 }
1883 db1_printf(("Exiting from DispatchKernelIO\n"));
1884
1885 return (0);
1886 }
1887 /* this is the callback function associated with a I/O invoked from
1888 kernel code.
1889 */
1890 static void
1891 KernelWakeupFunc(struct buf *vbp)
1892 {
1893 RF_DiskQueueData_t *req = NULL;
1894 RF_DiskQueue_t *queue;
1895 struct raidbuf *raidbp = (struct raidbuf *) vbp;
1896 struct buf *bp;
1897 int s;
1898
1899 s = splbio();
1900 db1_printf(("recovering the request queue:\n"));
1901 req = raidbp->req;
1902
1903 bp = raidbp->rf_obp;
1904
1905 queue = (RF_DiskQueue_t *) req->queue;
1906
1907 if (raidbp->rf_buf.b_flags & B_ERROR) {
1908 bp->b_flags |= B_ERROR;
1909 bp->b_error = raidbp->rf_buf.b_error ?
1910 raidbp->rf_buf.b_error : EIO;
1911 }
1912
1913 /* XXX methinks this could be wrong... */
1914 #if 1
1915 bp->b_resid = raidbp->rf_buf.b_resid;
1916 #endif
1917
1918 if (req->tracerec) {
1919 RF_ETIMER_STOP(req->tracerec->timer);
1920 RF_ETIMER_EVAL(req->tracerec->timer);
1921 RF_LOCK_MUTEX(rf_tracing_mutex);
1922 req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1923 req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1924 req->tracerec->num_phys_ios++;
1925 RF_UNLOCK_MUTEX(rf_tracing_mutex);
1926 }
1927 bp->b_bcount = raidbp->rf_buf.b_bcount; /* XXXX ?? */
1928
1929 /* XXX Ok, let's get aggressive... If B_ERROR is set, let's go
1930 * ballistic, and mark the component as hosed... */
1931
1932 if (bp->b_flags & B_ERROR) {
1933 /* Mark the disk as dead */
1934 /* but only mark it once... */
1935 if (queue->raidPtr->Disks[queue->col].status ==
1936 rf_ds_optimal) {
1937 printf("raid%d: IO Error. Marking %s as failed.\n",
1938 queue->raidPtr->raidid,
1939 queue->raidPtr->Disks[queue->col].devname);
1940 queue->raidPtr->Disks[queue->col].status =
1941 rf_ds_failed;
1942 queue->raidPtr->status = rf_rs_degraded;
1943 queue->raidPtr->numFailures++;
1944 queue->raidPtr->numNewFailures++;
1945 } else { /* Disk is already dead... */
1946 /* printf("Disk already marked as dead!\n"); */
1947 }
1948
1949 }
1950
1951 pool_put(&raidframe_cbufpool, raidbp);
1952
1953 /* Fill in the error value */
1954
1955 req->error = (bp->b_flags & B_ERROR) ? bp->b_error : 0;
1956
1957 simple_lock(&queue->raidPtr->iodone_lock);
1958
1959 /* Drop this one on the "finished" queue... */
1960 TAILQ_INSERT_TAIL(&(queue->raidPtr->iodone), req, iodone_entries);
1961
1962 /* Let the raidio thread know there is work to be done. */
1963 wakeup(&(queue->raidPtr->iodone));
1964
1965 simple_unlock(&queue->raidPtr->iodone_lock);
1966
1967 splx(s);
1968 }
1969
1970
1971
1972 /*
1973 * initialize a buf structure for doing an I/O in the kernel.
1974 */
1975 static void
1976 InitBP(struct buf *bp, struct vnode *b_vp, unsigned rw_flag, dev_t dev,
1977 RF_SectorNum_t startSect, RF_SectorCount_t numSect, caddr_t buf,
1978 void (*cbFunc) (struct buf *), void *cbArg, int logBytesPerSector,
1979 struct proc *b_proc)
1980 {
1981 /* bp->b_flags = B_PHYS | rw_flag; */
1982 bp->b_flags = B_CALL | rw_flag; /* XXX need B_PHYS here too??? */
1983 bp->b_bcount = numSect << logBytesPerSector;
1984 bp->b_bufsize = bp->b_bcount;
1985 bp->b_error = 0;
1986 bp->b_dev = dev;
1987 bp->b_data = buf;
1988 bp->b_blkno = startSect;
1989 bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */
1990 if (bp->b_bcount == 0) {
1991 panic("bp->b_bcount is zero in InitBP!!");
1992 }
1993 bp->b_proc = b_proc;
1994 bp->b_iodone = cbFunc;
1995 bp->b_vp = b_vp;
1996
1997 }
1998
1999 static void
2000 raidgetdefaultlabel(RF_Raid_t *raidPtr, struct raid_softc *rs,
2001 struct disklabel *lp)
2002 {
2003 memset(lp, 0, sizeof(*lp));
2004
2005 /* fabricate a label... */
2006 lp->d_secperunit = raidPtr->totalSectors;
2007 lp->d_secsize = raidPtr->bytesPerSector;
2008 lp->d_nsectors = raidPtr->Layout.dataSectorsPerStripe;
2009 lp->d_ntracks = 4 * raidPtr->numCol;
2010 lp->d_ncylinders = raidPtr->totalSectors /
2011 (lp->d_nsectors * lp->d_ntracks);
2012 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
2013
2014 strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
2015 lp->d_type = DTYPE_RAID;
2016 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
2017 lp->d_rpm = 3600;
2018 lp->d_interleave = 1;
2019 lp->d_flags = 0;
2020
2021 lp->d_partitions[RAW_PART].p_offset = 0;
2022 lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors;
2023 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
2024 lp->d_npartitions = RAW_PART + 1;
2025
2026 lp->d_magic = DISKMAGIC;
2027 lp->d_magic2 = DISKMAGIC;
2028 lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
2029
2030 }
2031 /*
2032 * Read the disklabel from the raid device. If one is not present, fake one
2033 * up.
2034 */
2035 static void
2036 raidgetdisklabel(dev_t dev)
2037 {
2038 int unit = raidunit(dev);
2039 struct raid_softc *rs = &raid_softc[unit];
2040 const char *errstring;
2041 struct disklabel *lp = rs->sc_dkdev.dk_label;
2042 struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel;
2043 RF_Raid_t *raidPtr;
2044
2045 db1_printf(("Getting the disklabel...\n"));
2046
2047 memset(clp, 0, sizeof(*clp));
2048
2049 raidPtr = raidPtrs[unit];
2050
2051 raidgetdefaultlabel(raidPtr, rs, lp);
2052
2053 /*
2054 * Call the generic disklabel extraction routine.
2055 */
2056 errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
2057 rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
2058 if (errstring)
2059 raidmakedisklabel(rs);
2060 else {
2061 int i;
2062 struct partition *pp;
2063
2064 /*
2065 * Sanity check whether the found disklabel is valid.
2066 *
2067 * This is necessary since total size of the raid device
2068 * may vary when an interleave is changed even though exactly
2069 * same componets are used, and old disklabel may used
2070 * if that is found.
2071 */
2072 if (lp->d_secperunit != rs->sc_size)
2073 printf("raid%d: WARNING: %s: "
2074 "total sector size in disklabel (%d) != "
2075 "the size of raid (%ld)\n", unit, rs->sc_xname,
2076 lp->d_secperunit, (long) rs->sc_size);
2077 for (i = 0; i < lp->d_npartitions; i++) {
2078 pp = &lp->d_partitions[i];
2079 if (pp->p_offset + pp->p_size > rs->sc_size)
2080 printf("raid%d: WARNING: %s: end of partition `%c' "
2081 "exceeds the size of raid (%ld)\n",
2082 unit, rs->sc_xname, 'a' + i, (long) rs->sc_size);
2083 }
2084 }
2085
2086 }
2087 /*
2088 * Take care of things one might want to take care of in the event
2089 * that a disklabel isn't present.
2090 */
2091 static void
2092 raidmakedisklabel(struct raid_softc *rs)
2093 {
2094 struct disklabel *lp = rs->sc_dkdev.dk_label;
2095 db1_printf(("Making a label..\n"));
2096
2097 /*
2098 * For historical reasons, if there's no disklabel present
2099 * the raw partition must be marked FS_BSDFFS.
2100 */
2101
2102 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
2103
2104 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
2105
2106 lp->d_checksum = dkcksum(lp);
2107 }
2108 /*
2109 * Lookup the provided name in the filesystem. If the file exists,
2110 * is a valid block device, and isn't being used by anyone else,
2111 * set *vpp to the file's vnode.
2112 * You'll find the original of this in ccd.c
2113 */
2114 int
2115 raidlookup(char *path, struct proc *p, struct vnode **vpp)
2116 {
2117 struct nameidata nd;
2118 struct vnode *vp;
2119 struct vattr va;
2120 int error;
2121
2122 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, path, p);
2123 if ((error = vn_open(&nd, FREAD | FWRITE, 0)) != 0) {
2124 return (error);
2125 }
2126 vp = nd.ni_vp;
2127 if (vp->v_usecount > 1) {
2128 VOP_UNLOCK(vp, 0);
2129 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2130 return (EBUSY);
2131 }
2132 if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)) != 0) {
2133 VOP_UNLOCK(vp, 0);
2134 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2135 return (error);
2136 }
2137 /* XXX: eventually we should handle VREG, too. */
2138 if (va.va_type != VBLK) {
2139 VOP_UNLOCK(vp, 0);
2140 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2141 return (ENOTBLK);
2142 }
2143 VOP_UNLOCK(vp, 0);
2144 *vpp = vp;
2145 return (0);
2146 }
2147 /*
2148 * Wait interruptibly for an exclusive lock.
2149 *
2150 * XXX
2151 * Several drivers do this; it should be abstracted and made MP-safe.
2152 * (Hmm... where have we seen this warning before :-> GO )
2153 */
2154 static int
2155 raidlock(struct raid_softc *rs)
2156 {
2157 int error;
2158
2159 while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
2160 rs->sc_flags |= RAIDF_WANTED;
2161 if ((error =
2162 tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
2163 return (error);
2164 }
2165 rs->sc_flags |= RAIDF_LOCKED;
2166 return (0);
2167 }
2168 /*
2169 * Unlock and wake up any waiters.
2170 */
2171 static void
2172 raidunlock(struct raid_softc *rs)
2173 {
2174
2175 rs->sc_flags &= ~RAIDF_LOCKED;
2176 if ((rs->sc_flags & RAIDF_WANTED) != 0) {
2177 rs->sc_flags &= ~RAIDF_WANTED;
2178 wakeup(rs);
2179 }
2180 }
2181
2182
2183 #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */
2184 #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */
2185
2186 int
2187 raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter)
2188 {
2189 RF_ComponentLabel_t clabel;
2190 raidread_component_label(dev, b_vp, &clabel);
2191 clabel.mod_counter = mod_counter;
2192 clabel.clean = RF_RAID_CLEAN;
2193 raidwrite_component_label(dev, b_vp, &clabel);
2194 return(0);
2195 }
2196
2197
2198 int
2199 raidmarkdirty(dev_t dev, struct vnode *b_vp, int mod_counter)
2200 {
2201 RF_ComponentLabel_t clabel;
2202 raidread_component_label(dev, b_vp, &clabel);
2203 clabel.mod_counter = mod_counter;
2204 clabel.clean = RF_RAID_DIRTY;
2205 raidwrite_component_label(dev, b_vp, &clabel);
2206 return(0);
2207 }
2208
2209 /* ARGSUSED */
2210 int
2211 raidread_component_label(dev_t dev, struct vnode *b_vp,
2212 RF_ComponentLabel_t *clabel)
2213 {
2214 struct buf *bp;
2215 const struct bdevsw *bdev;
2216 int error;
2217
2218 /* XXX should probably ensure that we don't try to do this if
2219 someone has changed rf_protected_sectors. */
2220
2221 if (b_vp == NULL) {
2222 /* For whatever reason, this component is not valid.
2223 Don't try to read a component label from it. */
2224 return(EINVAL);
2225 }
2226
2227 /* get a block of the appropriate size... */
2228 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2229 bp->b_dev = dev;
2230
2231 /* get our ducks in a row for the read */
2232 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2233 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2234 bp->b_flags |= B_READ;
2235 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2236
2237 bdev = bdevsw_lookup(bp->b_dev);
2238 if (bdev == NULL)
2239 return (ENXIO);
2240 (*bdev->d_strategy)(bp);
2241
2242 error = biowait(bp);
2243
2244 if (!error) {
2245 memcpy(clabel, bp->b_data,
2246 sizeof(RF_ComponentLabel_t));
2247 }
2248
2249 brelse(bp);
2250 return(error);
2251 }
2252 /* ARGSUSED */
2253 int
2254 raidwrite_component_label(dev_t dev, struct vnode *b_vp,
2255 RF_ComponentLabel_t *clabel)
2256 {
2257 struct buf *bp;
2258 const struct bdevsw *bdev;
2259 int error;
2260
2261 /* get a block of the appropriate size... */
2262 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2263 bp->b_dev = dev;
2264
2265 /* get our ducks in a row for the write */
2266 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2267 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2268 bp->b_flags |= B_WRITE;
2269 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2270
2271 memset(bp->b_data, 0, RF_COMPONENT_INFO_SIZE );
2272
2273 memcpy(bp->b_data, clabel, sizeof(RF_ComponentLabel_t));
2274
2275 bdev = bdevsw_lookup(bp->b_dev);
2276 if (bdev == NULL)
2277 return (ENXIO);
2278 (*bdev->d_strategy)(bp);
2279 error = biowait(bp);
2280 brelse(bp);
2281 if (error) {
2282 #if 1
2283 printf("Failed to write RAID component info!\n");
2284 #endif
2285 }
2286
2287 return(error);
2288 }
2289
2290 void
2291 rf_markalldirty(RF_Raid_t *raidPtr)
2292 {
2293 RF_ComponentLabel_t clabel;
2294 int sparecol;
2295 int c;
2296 int j;
2297 int scol = -1;
2298
2299 raidPtr->mod_counter++;
2300 for (c = 0; c < raidPtr->numCol; c++) {
2301 /* we don't want to touch (at all) a disk that has
2302 failed */
2303 if (!RF_DEAD_DISK(raidPtr->Disks[c].status)) {
2304 raidread_component_label(
2305 raidPtr->Disks[c].dev,
2306 raidPtr->raid_cinfo[c].ci_vp,
2307 &clabel);
2308 if (clabel.status == rf_ds_spared) {
2309 /* XXX do something special...
2310 but whatever you do, don't
2311 try to access it!! */
2312 } else {
2313 raidmarkdirty(
2314 raidPtr->Disks[c].dev,
2315 raidPtr->raid_cinfo[c].ci_vp,
2316 raidPtr->mod_counter);
2317 }
2318 }
2319 }
2320
2321 for( c = 0; c < raidPtr->numSpare ; c++) {
2322 sparecol = raidPtr->numCol + c;
2323 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
2324 /*
2325
2326 we claim this disk is "optimal" if it's
2327 rf_ds_used_spare, as that means it should be
2328 directly substitutable for the disk it replaced.
2329 We note that too...
2330
2331 */
2332
2333 for(j=0;j<raidPtr->numCol;j++) {
2334 if (raidPtr->Disks[j].spareCol == sparecol) {
2335 scol = j;
2336 break;
2337 }
2338 }
2339
2340 raidread_component_label(
2341 raidPtr->Disks[sparecol].dev,
2342 raidPtr->raid_cinfo[sparecol].ci_vp,
2343 &clabel);
2344 /* make sure status is noted */
2345
2346 raid_init_component_label(raidPtr, &clabel);
2347
2348 clabel.row = 0;
2349 clabel.column = scol;
2350 /* Note: we *don't* change status from rf_ds_used_spare
2351 to rf_ds_optimal */
2352 /* clabel.status = rf_ds_optimal; */
2353
2354 raidmarkdirty(raidPtr->Disks[sparecol].dev,
2355 raidPtr->raid_cinfo[sparecol].ci_vp,
2356 raidPtr->mod_counter);
2357 }
2358 }
2359 }
2360
2361
2362 void
2363 rf_update_component_labels(RF_Raid_t *raidPtr, int final)
2364 {
2365 RF_ComponentLabel_t clabel;
2366 int sparecol;
2367 int c;
2368 int j;
2369 int scol;
2370
2371 scol = -1;
2372
2373 /* XXX should do extra checks to make sure things really are clean,
2374 rather than blindly setting the clean bit... */
2375
2376 raidPtr->mod_counter++;
2377
2378 for (c = 0; c < raidPtr->numCol; c++) {
2379 if (raidPtr->Disks[c].status == rf_ds_optimal) {
2380 raidread_component_label(
2381 raidPtr->Disks[c].dev,
2382 raidPtr->raid_cinfo[c].ci_vp,
2383 &clabel);
2384 /* make sure status is noted */
2385 clabel.status = rf_ds_optimal;
2386 /* bump the counter */
2387 clabel.mod_counter = raidPtr->mod_counter;
2388
2389 raidwrite_component_label(
2390 raidPtr->Disks[c].dev,
2391 raidPtr->raid_cinfo[c].ci_vp,
2392 &clabel);
2393 if (final == RF_FINAL_COMPONENT_UPDATE) {
2394 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2395 raidmarkclean(
2396 raidPtr->Disks[c].dev,
2397 raidPtr->raid_cinfo[c].ci_vp,
2398 raidPtr->mod_counter);
2399 }
2400 }
2401 }
2402 /* else we don't touch it.. */
2403 }
2404
2405 for( c = 0; c < raidPtr->numSpare ; c++) {
2406 sparecol = raidPtr->numCol + c;
2407 /* Need to ensure that the reconstruct actually completed! */
2408 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
2409 /*
2410
2411 we claim this disk is "optimal" if it's
2412 rf_ds_used_spare, as that means it should be
2413 directly substitutable for the disk it replaced.
2414 We note that too...
2415
2416 */
2417
2418 for(j=0;j<raidPtr->numCol;j++) {
2419 if (raidPtr->Disks[j].spareCol == sparecol) {
2420 scol = j;
2421 break;
2422 }
2423 }
2424
2425 /* XXX shouldn't *really* need this... */
2426 raidread_component_label(
2427 raidPtr->Disks[sparecol].dev,
2428 raidPtr->raid_cinfo[sparecol].ci_vp,
2429 &clabel);
2430 /* make sure status is noted */
2431
2432 raid_init_component_label(raidPtr, &clabel);
2433
2434 clabel.mod_counter = raidPtr->mod_counter;
2435 clabel.column = scol;
2436 clabel.status = rf_ds_optimal;
2437
2438 raidwrite_component_label(
2439 raidPtr->Disks[sparecol].dev,
2440 raidPtr->raid_cinfo[sparecol].ci_vp,
2441 &clabel);
2442 if (final == RF_FINAL_COMPONENT_UPDATE) {
2443 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2444 raidmarkclean( raidPtr->Disks[sparecol].dev,
2445 raidPtr->raid_cinfo[sparecol].ci_vp,
2446 raidPtr->mod_counter);
2447 }
2448 }
2449 }
2450 }
2451 }
2452
2453 void
2454 rf_close_component(RF_Raid_t *raidPtr, struct vnode *vp, int auto_configured)
2455 {
2456 struct proc *p;
2457
2458 p = raidPtr->engine_thread;
2459
2460 if (vp != NULL) {
2461 if (auto_configured == 1) {
2462 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2463 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
2464 vput(vp);
2465
2466 } else {
2467 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2468 }
2469 }
2470 }
2471
2472
2473 void
2474 rf_UnconfigureVnodes(RF_Raid_t *raidPtr)
2475 {
2476 int r,c;
2477 struct vnode *vp;
2478 int acd;
2479
2480
2481 /* We take this opportunity to close the vnodes like we should.. */
2482
2483 for (c = 0; c < raidPtr->numCol; c++) {
2484 vp = raidPtr->raid_cinfo[c].ci_vp;
2485 acd = raidPtr->Disks[c].auto_configured;
2486 rf_close_component(raidPtr, vp, acd);
2487 raidPtr->raid_cinfo[c].ci_vp = NULL;
2488 raidPtr->Disks[c].auto_configured = 0;
2489 }
2490
2491 for (r = 0; r < raidPtr->numSpare; r++) {
2492 vp = raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp;
2493 acd = raidPtr->Disks[raidPtr->numCol + r].auto_configured;
2494 rf_close_component(raidPtr, vp, acd);
2495 raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp = NULL;
2496 raidPtr->Disks[raidPtr->numCol + r].auto_configured = 0;
2497 }
2498 }
2499
2500
2501 void
2502 rf_ReconThread(struct rf_recon_req *req)
2503 {
2504 int s;
2505 RF_Raid_t *raidPtr;
2506
2507 s = splbio();
2508 raidPtr = (RF_Raid_t *) req->raidPtr;
2509 raidPtr->recon_in_progress = 1;
2510
2511 rf_FailDisk((RF_Raid_t *) req->raidPtr, req->col,
2512 ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
2513
2514 /* XXX get rid of this! we don't need it at all.. */
2515 RF_Free(req, sizeof(*req));
2516
2517 raidPtr->recon_in_progress = 0;
2518 splx(s);
2519
2520 /* That's all... */
2521 kthread_exit(0); /* does not return */
2522 }
2523
2524 void
2525 rf_RewriteParityThread(RF_Raid_t *raidPtr)
2526 {
2527 int retcode;
2528 int s;
2529
2530 raidPtr->parity_rewrite_in_progress = 1;
2531 s = splbio();
2532 retcode = rf_RewriteParity(raidPtr);
2533 splx(s);
2534 if (retcode) {
2535 printf("raid%d: Error re-writing parity!\n",raidPtr->raidid);
2536 } else {
2537 /* set the clean bit! If we shutdown correctly,
2538 the clean bit on each component label will get
2539 set */
2540 raidPtr->parity_good = RF_RAID_CLEAN;
2541 }
2542 raidPtr->parity_rewrite_in_progress = 0;
2543
2544 /* Anyone waiting for us to stop? If so, inform them... */
2545 if (raidPtr->waitShutdown) {
2546 wakeup(&raidPtr->parity_rewrite_in_progress);
2547 }
2548
2549 /* That's all... */
2550 kthread_exit(0); /* does not return */
2551 }
2552
2553
2554 void
2555 rf_CopybackThread(RF_Raid_t *raidPtr)
2556 {
2557 int s;
2558
2559 raidPtr->copyback_in_progress = 1;
2560 s = splbio();
2561 rf_CopybackReconstructedData(raidPtr);
2562 splx(s);
2563 raidPtr->copyback_in_progress = 0;
2564
2565 /* That's all... */
2566 kthread_exit(0); /* does not return */
2567 }
2568
2569
2570 void
2571 rf_ReconstructInPlaceThread(struct rf_recon_req *req)
2572 {
2573 int s;
2574 RF_Raid_t *raidPtr;
2575
2576 s = splbio();
2577 raidPtr = req->raidPtr;
2578 raidPtr->recon_in_progress = 1;
2579 rf_ReconstructInPlace(raidPtr, req->col);
2580 RF_Free(req, sizeof(*req));
2581 raidPtr->recon_in_progress = 0;
2582 splx(s);
2583
2584 /* That's all... */
2585 kthread_exit(0); /* does not return */
2586 }
2587
2588 RF_AutoConfig_t *
2589 rf_find_raid_components()
2590 {
2591 struct vnode *vp;
2592 struct disklabel label;
2593 struct device *dv;
2594 dev_t dev;
2595 int bmajor;
2596 int error;
2597 int i;
2598 int good_one;
2599 RF_ComponentLabel_t *clabel;
2600 RF_AutoConfig_t *ac_list;
2601 RF_AutoConfig_t *ac;
2602
2603
2604 /* initialize the AutoConfig list */
2605 ac_list = NULL;
2606
2607 /* we begin by trolling through *all* the devices on the system */
2608
2609 for (dv = alldevs.tqh_first; dv != NULL;
2610 dv = dv->dv_list.tqe_next) {
2611
2612 /* we are only interested in disks... */
2613 if (dv->dv_class != DV_DISK)
2614 continue;
2615
2616 /* we don't care about floppies... */
2617 if (!strcmp(dv->dv_cfdata->cf_name,"fd")) {
2618 continue;
2619 }
2620
2621 /* we don't care about CD's... */
2622 if (!strcmp(dv->dv_cfdata->cf_name,"cd")) {
2623 continue;
2624 }
2625
2626 /* hdfd is the Atari/Hades floppy driver */
2627 if (!strcmp(dv->dv_cfdata->cf_name,"hdfd")) {
2628 continue;
2629 }
2630 /* fdisa is the Atari/Milan floppy driver */
2631 if (!strcmp(dv->dv_cfdata->cf_name,"fdisa")) {
2632 continue;
2633 }
2634
2635 /* need to find the device_name_to_block_device_major stuff */
2636 bmajor = devsw_name2blk(dv->dv_xname, NULL, 0);
2637
2638 /* get a vnode for the raw partition of this disk */
2639
2640 dev = MAKEDISKDEV(bmajor, dv->dv_unit, RAW_PART);
2641 if (bdevvp(dev, &vp))
2642 panic("RAID can't alloc vnode");
2643
2644 error = VOP_OPEN(vp, FREAD, NOCRED, 0);
2645
2646 if (error) {
2647 /* "Who cares." Continue looking
2648 for something that exists*/
2649 vput(vp);
2650 continue;
2651 }
2652
2653 /* Ok, the disk exists. Go get the disklabel. */
2654 error = VOP_IOCTL(vp, DIOCGDINFO, &label, FREAD, NOCRED, 0);
2655 if (error) {
2656 /*
2657 * XXX can't happen - open() would
2658 * have errored out (or faked up one)
2659 */
2660 printf("can't get label for dev %s%c (%d)!?!?\n",
2661 dv->dv_xname, 'a' + RAW_PART, error);
2662 }
2663
2664 /* don't need this any more. We'll allocate it again
2665 a little later if we really do... */
2666 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2667 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
2668 vput(vp);
2669
2670 for (i=0; i < label.d_npartitions; i++) {
2671 /* We only support partitions marked as RAID */
2672 if (label.d_partitions[i].p_fstype != FS_RAID)
2673 continue;
2674
2675 dev = MAKEDISKDEV(bmajor, dv->dv_unit, i);
2676 if (bdevvp(dev, &vp))
2677 panic("RAID can't alloc vnode");
2678
2679 error = VOP_OPEN(vp, FREAD, NOCRED, 0);
2680 if (error) {
2681 /* Whatever... */
2682 vput(vp);
2683 continue;
2684 }
2685
2686 good_one = 0;
2687
2688 clabel = (RF_ComponentLabel_t *)
2689 malloc(sizeof(RF_ComponentLabel_t),
2690 M_RAIDFRAME, M_NOWAIT);
2691 if (clabel == NULL) {
2692 /* XXX CLEANUP HERE */
2693 printf("RAID auto config: out of memory!\n");
2694 return(NULL); /* XXX probably should panic? */
2695 }
2696
2697 if (!raidread_component_label(dev, vp, clabel)) {
2698 /* Got the label. Does it look reasonable? */
2699 if (rf_reasonable_label(clabel) &&
2700 (clabel->partitionSize <=
2701 label.d_partitions[i].p_size)) {
2702 #if DEBUG
2703 printf("Component on: %s%c: %d\n",
2704 dv->dv_xname, 'a'+i,
2705 label.d_partitions[i].p_size);
2706 rf_print_component_label(clabel);
2707 #endif
2708 /* if it's reasonable, add it,
2709 else ignore it. */
2710 ac = (RF_AutoConfig_t *)
2711 malloc(sizeof(RF_AutoConfig_t),
2712 M_RAIDFRAME,
2713 M_NOWAIT);
2714 if (ac == NULL) {
2715 /* XXX should panic?? */
2716 return(NULL);
2717 }
2718
2719 sprintf(ac->devname, "%s%c",
2720 dv->dv_xname, 'a'+i);
2721 ac->dev = dev;
2722 ac->vp = vp;
2723 ac->clabel = clabel;
2724 ac->next = ac_list;
2725 ac_list = ac;
2726 good_one = 1;
2727 }
2728 }
2729 if (!good_one) {
2730 /* cleanup */
2731 free(clabel, M_RAIDFRAME);
2732 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2733 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
2734 vput(vp);
2735 }
2736 }
2737 }
2738 return(ac_list);
2739 }
2740
2741 static int
2742 rf_reasonable_label(RF_ComponentLabel_t *clabel)
2743 {
2744
2745 if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) ||
2746 (clabel->version==RF_COMPONENT_LABEL_VERSION)) &&
2747 ((clabel->clean == RF_RAID_CLEAN) ||
2748 (clabel->clean == RF_RAID_DIRTY)) &&
2749 clabel->row >=0 &&
2750 clabel->column >= 0 &&
2751 clabel->num_rows > 0 &&
2752 clabel->num_columns > 0 &&
2753 clabel->row < clabel->num_rows &&
2754 clabel->column < clabel->num_columns &&
2755 clabel->blockSize > 0 &&
2756 clabel->numBlocks > 0) {
2757 /* label looks reasonable enough... */
2758 return(1);
2759 }
2760 return(0);
2761 }
2762
2763
2764 #if DEBUG
2765 void
2766 rf_print_component_label(RF_ComponentLabel_t *clabel)
2767 {
2768 printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
2769 clabel->row, clabel->column,
2770 clabel->num_rows, clabel->num_columns);
2771 printf(" Version: %d Serial Number: %d Mod Counter: %d\n",
2772 clabel->version, clabel->serial_number,
2773 clabel->mod_counter);
2774 printf(" Clean: %s Status: %d\n",
2775 clabel->clean ? "Yes" : "No", clabel->status );
2776 printf(" sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
2777 clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
2778 printf(" RAID Level: %c blocksize: %d numBlocks: %d\n",
2779 (char) clabel->parityConfig, clabel->blockSize,
2780 clabel->numBlocks);
2781 printf(" Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No" );
2782 printf(" Contains root partition: %s\n",
2783 clabel->root_partition ? "Yes" : "No" );
2784 printf(" Last configured as: raid%d\n", clabel->last_unit );
2785 #if 0
2786 printf(" Config order: %d\n", clabel->config_order);
2787 #endif
2788
2789 }
2790 #endif
2791
2792 RF_ConfigSet_t *
2793 rf_create_auto_sets(RF_AutoConfig_t *ac_list)
2794 {
2795 RF_AutoConfig_t *ac;
2796 RF_ConfigSet_t *config_sets;
2797 RF_ConfigSet_t *cset;
2798 RF_AutoConfig_t *ac_next;
2799
2800
2801 config_sets = NULL;
2802
2803 /* Go through the AutoConfig list, and figure out which components
2804 belong to what sets. */
2805 ac = ac_list;
2806 while(ac!=NULL) {
2807 /* we're going to putz with ac->next, so save it here
2808 for use at the end of the loop */
2809 ac_next = ac->next;
2810
2811 if (config_sets == NULL) {
2812 /* will need at least this one... */
2813 config_sets = (RF_ConfigSet_t *)
2814 malloc(sizeof(RF_ConfigSet_t),
2815 M_RAIDFRAME, M_NOWAIT);
2816 if (config_sets == NULL) {
2817 panic("rf_create_auto_sets: No memory!");
2818 }
2819 /* this one is easy :) */
2820 config_sets->ac = ac;
2821 config_sets->next = NULL;
2822 config_sets->rootable = 0;
2823 ac->next = NULL;
2824 } else {
2825 /* which set does this component fit into? */
2826 cset = config_sets;
2827 while(cset!=NULL) {
2828 if (rf_does_it_fit(cset, ac)) {
2829 /* looks like it matches... */
2830 ac->next = cset->ac;
2831 cset->ac = ac;
2832 break;
2833 }
2834 cset = cset->next;
2835 }
2836 if (cset==NULL) {
2837 /* didn't find a match above... new set..*/
2838 cset = (RF_ConfigSet_t *)
2839 malloc(sizeof(RF_ConfigSet_t),
2840 M_RAIDFRAME, M_NOWAIT);
2841 if (cset == NULL) {
2842 panic("rf_create_auto_sets: No memory!");
2843 }
2844 cset->ac = ac;
2845 ac->next = NULL;
2846 cset->next = config_sets;
2847 cset->rootable = 0;
2848 config_sets = cset;
2849 }
2850 }
2851 ac = ac_next;
2852 }
2853
2854
2855 return(config_sets);
2856 }
2857
2858 static int
2859 rf_does_it_fit(RF_ConfigSet_t *cset, RF_AutoConfig_t *ac)
2860 {
2861 RF_ComponentLabel_t *clabel1, *clabel2;
2862
2863 /* If this one matches the *first* one in the set, that's good
2864 enough, since the other members of the set would have been
2865 through here too... */
2866 /* note that we are not checking partitionSize here..
2867
2868 Note that we are also not checking the mod_counters here.
2869 If everything else matches execpt the mod_counter, that's
2870 good enough for this test. We will deal with the mod_counters
2871 a little later in the autoconfiguration process.
2872
2873 (clabel1->mod_counter == clabel2->mod_counter) &&
2874
2875 The reason we don't check for this is that failed disks
2876 will have lower modification counts. If those disks are
2877 not added to the set they used to belong to, then they will
2878 form their own set, which may result in 2 different sets,
2879 for example, competing to be configured at raid0, and
2880 perhaps competing to be the root filesystem set. If the
2881 wrong ones get configured, or both attempt to become /,
2882 weird behaviour and or serious lossage will occur. Thus we
2883 need to bring them into the fold here, and kick them out at
2884 a later point.
2885
2886 */
2887
2888 clabel1 = cset->ac->clabel;
2889 clabel2 = ac->clabel;
2890 if ((clabel1->version == clabel2->version) &&
2891 (clabel1->serial_number == clabel2->serial_number) &&
2892 (clabel1->num_rows == clabel2->num_rows) &&
2893 (clabel1->num_columns == clabel2->num_columns) &&
2894 (clabel1->sectPerSU == clabel2->sectPerSU) &&
2895 (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
2896 (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
2897 (clabel1->parityConfig == clabel2->parityConfig) &&
2898 (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
2899 (clabel1->blockSize == clabel2->blockSize) &&
2900 (clabel1->numBlocks == clabel2->numBlocks) &&
2901 (clabel1->autoconfigure == clabel2->autoconfigure) &&
2902 (clabel1->root_partition == clabel2->root_partition) &&
2903 (clabel1->last_unit == clabel2->last_unit) &&
2904 (clabel1->config_order == clabel2->config_order)) {
2905 /* if it get's here, it almost *has* to be a match */
2906 } else {
2907 /* it's not consistent with somebody in the set..
2908 punt */
2909 return(0);
2910 }
2911 /* all was fine.. it must fit... */
2912 return(1);
2913 }
2914
2915 int
2916 rf_have_enough_components(RF_ConfigSet_t *cset)
2917 {
2918 RF_AutoConfig_t *ac;
2919 RF_AutoConfig_t *auto_config;
2920 RF_ComponentLabel_t *clabel;
2921 int c;
2922 int num_cols;
2923 int num_missing;
2924 int mod_counter;
2925 int mod_counter_found;
2926 int even_pair_failed;
2927 char parity_type;
2928
2929
2930 /* check to see that we have enough 'live' components
2931 of this set. If so, we can configure it if necessary */
2932
2933 num_cols = cset->ac->clabel->num_columns;
2934 parity_type = cset->ac->clabel->parityConfig;
2935
2936 /* XXX Check for duplicate components!?!?!? */
2937
2938 /* Determine what the mod_counter is supposed to be for this set. */
2939
2940 mod_counter_found = 0;
2941 mod_counter = 0;
2942 ac = cset->ac;
2943 while(ac!=NULL) {
2944 if (mod_counter_found==0) {
2945 mod_counter = ac->clabel->mod_counter;
2946 mod_counter_found = 1;
2947 } else {
2948 if (ac->clabel->mod_counter > mod_counter) {
2949 mod_counter = ac->clabel->mod_counter;
2950 }
2951 }
2952 ac = ac->next;
2953 }
2954
2955 num_missing = 0;
2956 auto_config = cset->ac;
2957
2958 even_pair_failed = 0;
2959 for(c=0; c<num_cols; c++) {
2960 ac = auto_config;
2961 while(ac!=NULL) {
2962 if ((ac->clabel->column == c) &&
2963 (ac->clabel->mod_counter == mod_counter)) {
2964 /* it's this one... */
2965 #if DEBUG
2966 printf("Found: %s at %d\n",
2967 ac->devname,c);
2968 #endif
2969 break;
2970 }
2971 ac=ac->next;
2972 }
2973 if (ac==NULL) {
2974 /* Didn't find one here! */
2975 /* special case for RAID 1, especially
2976 where there are more than 2
2977 components (where RAIDframe treats
2978 things a little differently :( ) */
2979 if (parity_type == '1') {
2980 if (c%2 == 0) { /* even component */
2981 even_pair_failed = 1;
2982 } else { /* odd component. If
2983 we're failed, and
2984 so is the even
2985 component, it's
2986 "Good Night, Charlie" */
2987 if (even_pair_failed == 1) {
2988 return(0);
2989 }
2990 }
2991 } else {
2992 /* normal accounting */
2993 num_missing++;
2994 }
2995 }
2996 if ((parity_type == '1') && (c%2 == 1)) {
2997 /* Just did an even component, and we didn't
2998 bail.. reset the even_pair_failed flag,
2999 and go on to the next component.... */
3000 even_pair_failed = 0;
3001 }
3002 }
3003
3004 clabel = cset->ac->clabel;
3005
3006 if (((clabel->parityConfig == '0') && (num_missing > 0)) ||
3007 ((clabel->parityConfig == '4') && (num_missing > 1)) ||
3008 ((clabel->parityConfig == '5') && (num_missing > 1))) {
3009 /* XXX this needs to be made *much* more general */
3010 /* Too many failures */
3011 return(0);
3012 }
3013 /* otherwise, all is well, and we've got enough to take a kick
3014 at autoconfiguring this set */
3015 return(1);
3016 }
3017
3018 void
3019 rf_create_configuration(RF_AutoConfig_t *ac, RF_Config_t *config,
3020 RF_Raid_t *raidPtr)
3021 {
3022 RF_ComponentLabel_t *clabel;
3023 int i;
3024
3025 clabel = ac->clabel;
3026
3027 /* 1. Fill in the common stuff */
3028 config->numRow = clabel->num_rows = 1;
3029 config->numCol = clabel->num_columns;
3030 config->numSpare = 0; /* XXX should this be set here? */
3031 config->sectPerSU = clabel->sectPerSU;
3032 config->SUsPerPU = clabel->SUsPerPU;
3033 config->SUsPerRU = clabel->SUsPerRU;
3034 config->parityConfig = clabel->parityConfig;
3035 /* XXX... */
3036 strcpy(config->diskQueueType,"fifo");
3037 config->maxOutstandingDiskReqs = clabel->maxOutstanding;
3038 config->layoutSpecificSize = 0; /* XXX ?? */
3039
3040 while(ac!=NULL) {
3041 /* row/col values will be in range due to the checks
3042 in reasonable_label() */
3043 strcpy(config->devnames[0][ac->clabel->column],
3044 ac->devname);
3045 ac = ac->next;
3046 }
3047
3048 for(i=0;i<RF_MAXDBGV;i++) {
3049 config->debugVars[i][0] = 0;
3050 }
3051 }
3052
3053 int
3054 rf_set_autoconfig(RF_Raid_t *raidPtr, int new_value)
3055 {
3056 RF_ComponentLabel_t clabel;
3057 struct vnode *vp;
3058 dev_t dev;
3059 int column;
3060 int sparecol;
3061
3062 raidPtr->autoconfigure = new_value;
3063
3064 for(column=0; column<raidPtr->numCol; column++) {
3065 if (raidPtr->Disks[column].status == rf_ds_optimal) {
3066 dev = raidPtr->Disks[column].dev;
3067 vp = raidPtr->raid_cinfo[column].ci_vp;
3068 raidread_component_label(dev, vp, &clabel);
3069 clabel.autoconfigure = new_value;
3070 raidwrite_component_label(dev, vp, &clabel);
3071 }
3072 }
3073 for(column = 0; column < raidPtr->numSpare ; column++) {
3074 sparecol = raidPtr->numCol + column;
3075 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3076 dev = raidPtr->Disks[sparecol].dev;
3077 vp = raidPtr->raid_cinfo[sparecol].ci_vp;
3078 raidread_component_label(dev, vp, &clabel);
3079 clabel.autoconfigure = new_value;
3080 raidwrite_component_label(dev, vp, &clabel);
3081 }
3082 }
3083 return(new_value);
3084 }
3085
3086 int
3087 rf_set_rootpartition(RF_Raid_t *raidPtr, int new_value)
3088 {
3089 RF_ComponentLabel_t clabel;
3090 struct vnode *vp;
3091 dev_t dev;
3092 int column;
3093 int sparecol;
3094
3095 raidPtr->root_partition = new_value;
3096 for(column=0; column<raidPtr->numCol; column++) {
3097 if (raidPtr->Disks[column].status == rf_ds_optimal) {
3098 dev = raidPtr->Disks[column].dev;
3099 vp = raidPtr->raid_cinfo[column].ci_vp;
3100 raidread_component_label(dev, vp, &clabel);
3101 clabel.root_partition = new_value;
3102 raidwrite_component_label(dev, vp, &clabel);
3103 }
3104 }
3105 for(column = 0; column < raidPtr->numSpare ; column++) {
3106 sparecol = raidPtr->numCol + column;
3107 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3108 dev = raidPtr->Disks[sparecol].dev;
3109 vp = raidPtr->raid_cinfo[sparecol].ci_vp;
3110 raidread_component_label(dev, vp, &clabel);
3111 clabel.root_partition = new_value;
3112 raidwrite_component_label(dev, vp, &clabel);
3113 }
3114 }
3115 return(new_value);
3116 }
3117
3118 void
3119 rf_release_all_vps(RF_ConfigSet_t *cset)
3120 {
3121 RF_AutoConfig_t *ac;
3122
3123 ac = cset->ac;
3124 while(ac!=NULL) {
3125 /* Close the vp, and give it back */
3126 if (ac->vp) {
3127 vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY);
3128 VOP_CLOSE(ac->vp, FREAD, NOCRED, 0);
3129 vput(ac->vp);
3130 ac->vp = NULL;
3131 }
3132 ac = ac->next;
3133 }
3134 }
3135
3136
3137 void
3138 rf_cleanup_config_set(RF_ConfigSet_t *cset)
3139 {
3140 RF_AutoConfig_t *ac;
3141 RF_AutoConfig_t *next_ac;
3142
3143 ac = cset->ac;
3144 while(ac!=NULL) {
3145 next_ac = ac->next;
3146 /* nuke the label */
3147 free(ac->clabel, M_RAIDFRAME);
3148 /* cleanup the config structure */
3149 free(ac, M_RAIDFRAME);
3150 /* "next.." */
3151 ac = next_ac;
3152 }
3153 /* and, finally, nuke the config set */
3154 free(cset, M_RAIDFRAME);
3155 }
3156
3157
3158 void
3159 raid_init_component_label(RF_Raid_t *raidPtr, RF_ComponentLabel_t *clabel)
3160 {
3161 /* current version number */
3162 clabel->version = RF_COMPONENT_LABEL_VERSION;
3163 clabel->serial_number = raidPtr->serial_number;
3164 clabel->mod_counter = raidPtr->mod_counter;
3165 clabel->num_rows = 1;
3166 clabel->num_columns = raidPtr->numCol;
3167 clabel->clean = RF_RAID_DIRTY; /* not clean */
3168 clabel->status = rf_ds_optimal; /* "It's good!" */
3169
3170 clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
3171 clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
3172 clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
3173
3174 clabel->blockSize = raidPtr->bytesPerSector;
3175 clabel->numBlocks = raidPtr->sectorsPerDisk;
3176
3177 /* XXX not portable */
3178 clabel->parityConfig = raidPtr->Layout.map->parityConfig;
3179 clabel->maxOutstanding = raidPtr->maxOutstanding;
3180 clabel->autoconfigure = raidPtr->autoconfigure;
3181 clabel->root_partition = raidPtr->root_partition;
3182 clabel->last_unit = raidPtr->raidid;
3183 clabel->config_order = raidPtr->config_order;
3184 }
3185
3186 int
3187 rf_auto_config_set(RF_ConfigSet_t *cset, int *unit)
3188 {
3189 RF_Raid_t *raidPtr;
3190 RF_Config_t *config;
3191 int raidID;
3192 int retcode;
3193
3194 #if DEBUG
3195 printf("RAID autoconfigure\n");
3196 #endif
3197
3198 retcode = 0;
3199 *unit = -1;
3200
3201 /* 1. Create a config structure */
3202
3203 config = (RF_Config_t *)malloc(sizeof(RF_Config_t),
3204 M_RAIDFRAME,
3205 M_NOWAIT);
3206 if (config==NULL) {
3207 printf("Out of mem!?!?\n");
3208 /* XXX do something more intelligent here. */
3209 return(1);
3210 }
3211
3212 memset(config, 0, sizeof(RF_Config_t));
3213
3214 /*
3215 2. Figure out what RAID ID this one is supposed to live at
3216 See if we can get the same RAID dev that it was configured
3217 on last time..
3218 */
3219
3220 raidID = cset->ac->clabel->last_unit;
3221 if ((raidID < 0) || (raidID >= numraid)) {
3222 /* let's not wander off into lala land. */
3223 raidID = numraid - 1;
3224 }
3225 if (raidPtrs[raidID]->valid != 0) {
3226
3227 /*
3228 Nope... Go looking for an alternative...
3229 Start high so we don't immediately use raid0 if that's
3230 not taken.
3231 */
3232
3233 for(raidID = numraid - 1; raidID >= 0; raidID--) {
3234 if (raidPtrs[raidID]->valid == 0) {
3235 /* can use this one! */
3236 break;
3237 }
3238 }
3239 }
3240
3241 if (raidID < 0) {
3242 /* punt... */
3243 printf("Unable to auto configure this set!\n");
3244 printf("(Out of RAID devs!)\n");
3245 return(1);
3246 }
3247
3248 #if DEBUG
3249 printf("Configuring raid%d:\n",raidID);
3250 #endif
3251
3252 raidPtr = raidPtrs[raidID];
3253
3254 /* XXX all this stuff should be done SOMEWHERE ELSE! */
3255 raidPtr->raidid = raidID;
3256 raidPtr->openings = RAIDOUTSTANDING;
3257
3258 /* 3. Build the configuration structure */
3259 rf_create_configuration(cset->ac, config, raidPtr);
3260
3261 /* 4. Do the configuration */
3262 retcode = rf_Configure(raidPtr, config, cset->ac);
3263
3264 if (retcode == 0) {
3265
3266 raidinit(raidPtrs[raidID]);
3267
3268 rf_markalldirty(raidPtrs[raidID]);
3269 raidPtrs[raidID]->autoconfigure = 1; /* XXX do this here? */
3270 if (cset->ac->clabel->root_partition==1) {
3271 /* everything configured just fine. Make a note
3272 that this set is eligible to be root. */
3273 cset->rootable = 1;
3274 /* XXX do this here? */
3275 raidPtrs[raidID]->root_partition = 1;
3276 }
3277 }
3278
3279 /* 5. Cleanup */
3280 free(config, M_RAIDFRAME);
3281
3282 *unit = raidID;
3283 return(retcode);
3284 }
3285
3286 void
3287 rf_disk_unbusy(RF_RaidAccessDesc_t *desc)
3288 {
3289 struct buf *bp;
3290
3291 bp = (struct buf *)desc->bp;
3292 disk_unbusy(&raid_softc[desc->raidPtr->raidid].sc_dkdev,
3293 (bp->b_bcount - bp->b_resid), (bp->b_flags & B_READ));
3294 }
3295