rf_netbsdkintf.c revision 1.186.2.1 1 /* $NetBSD: rf_netbsdkintf.c,v 1.186.2.1 2005/06/17 13:34:50 tron Exp $ */
2 /*-
3 * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
4 * All rights reserved.
5 *
6 * This code is derived from software contributed to The NetBSD Foundation
7 * by Greg Oster; Jason R. Thorpe.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed by the NetBSD
20 * Foundation, Inc. and its contributors.
21 * 4. Neither the name of The NetBSD Foundation nor the names of its
22 * contributors may be used to endorse or promote products derived
23 * from this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
26 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
36 */
37
38 /*
39 * Copyright (c) 1990, 1993
40 * The Regents of the University of California. All rights reserved.
41 *
42 * This code is derived from software contributed to Berkeley by
43 * the Systems Programming Group of the University of Utah Computer
44 * Science Department.
45 *
46 * Redistribution and use in source and binary forms, with or without
47 * modification, are permitted provided that the following conditions
48 * are met:
49 * 1. Redistributions of source code must retain the above copyright
50 * notice, this list of conditions and the following disclaimer.
51 * 2. Redistributions in binary form must reproduce the above copyright
52 * notice, this list of conditions and the following disclaimer in the
53 * documentation and/or other materials provided with the distribution.
54 * 3. Neither the name of the University nor the names of its contributors
55 * may be used to endorse or promote products derived from this software
56 * without specific prior written permission.
57 *
58 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
59 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
60 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
61 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
62 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
63 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
64 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
65 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
66 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
67 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
68 * SUCH DAMAGE.
69 *
70 * from: Utah $Hdr: cd.c 1.6 90/11/28$
71 *
72 * @(#)cd.c 8.2 (Berkeley) 11/16/93
73 */
74
75 /*
76 * Copyright (c) 1988 University of Utah.
77 *
78 * This code is derived from software contributed to Berkeley by
79 * the Systems Programming Group of the University of Utah Computer
80 * Science Department.
81 *
82 * Redistribution and use in source and binary forms, with or without
83 * modification, are permitted provided that the following conditions
84 * are met:
85 * 1. Redistributions of source code must retain the above copyright
86 * notice, this list of conditions and the following disclaimer.
87 * 2. Redistributions in binary form must reproduce the above copyright
88 * notice, this list of conditions and the following disclaimer in the
89 * documentation and/or other materials provided with the distribution.
90 * 3. All advertising materials mentioning features or use of this software
91 * must display the following acknowledgement:
92 * This product includes software developed by the University of
93 * California, Berkeley and its contributors.
94 * 4. Neither the name of the University nor the names of its contributors
95 * may be used to endorse or promote products derived from this software
96 * without specific prior written permission.
97 *
98 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
99 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
100 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
101 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
102 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
103 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
104 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
105 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
106 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
107 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
108 * SUCH DAMAGE.
109 *
110 * from: Utah $Hdr: cd.c 1.6 90/11/28$
111 *
112 * @(#)cd.c 8.2 (Berkeley) 11/16/93
113 */
114
115 /*
116 * Copyright (c) 1995 Carnegie-Mellon University.
117 * All rights reserved.
118 *
119 * Authors: Mark Holland, Jim Zelenka
120 *
121 * Permission to use, copy, modify and distribute this software and
122 * its documentation is hereby granted, provided that both the copyright
123 * notice and this permission notice appear in all copies of the
124 * software, derivative works or modified versions, and any portions
125 * thereof, and that both notices appear in supporting documentation.
126 *
127 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
128 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
129 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
130 *
131 * Carnegie Mellon requests users of this software to return to
132 *
133 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
134 * School of Computer Science
135 * Carnegie Mellon University
136 * Pittsburgh PA 15213-3890
137 *
138 * any improvements or extensions that they make and grant Carnegie the
139 * rights to redistribute these changes.
140 */
141
142 /***********************************************************
143 *
144 * rf_kintf.c -- the kernel interface routines for RAIDframe
145 *
146 ***********************************************************/
147
148 #include <sys/cdefs.h>
149 __KERNEL_RCSID(0, "$NetBSD: rf_netbsdkintf.c,v 1.186.2.1 2005/06/17 13:34:50 tron Exp $");
150
151 #include <sys/param.h>
152 #include <sys/errno.h>
153 #include <sys/pool.h>
154 #include <sys/proc.h>
155 #include <sys/queue.h>
156 #include <sys/disk.h>
157 #include <sys/device.h>
158 #include <sys/stat.h>
159 #include <sys/ioctl.h>
160 #include <sys/fcntl.h>
161 #include <sys/systm.h>
162 #include <sys/namei.h>
163 #include <sys/vnode.h>
164 #include <sys/disklabel.h>
165 #include <sys/conf.h>
166 #include <sys/lock.h>
167 #include <sys/buf.h>
168 #include <sys/bufq.h>
169 #include <sys/user.h>
170 #include <sys/reboot.h>
171
172 #include <dev/raidframe/raidframevar.h>
173 #include <dev/raidframe/raidframeio.h>
174 #include "raid.h"
175 #include "opt_raid_autoconfig.h"
176 #include "rf_raid.h"
177 #include "rf_copyback.h"
178 #include "rf_dag.h"
179 #include "rf_dagflags.h"
180 #include "rf_desc.h"
181 #include "rf_diskqueue.h"
182 #include "rf_etimer.h"
183 #include "rf_general.h"
184 #include "rf_kintf.h"
185 #include "rf_options.h"
186 #include "rf_driver.h"
187 #include "rf_parityscan.h"
188 #include "rf_threadstuff.h"
189
190 #ifdef DEBUG
191 int rf_kdebug_level = 0;
192 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
193 #else /* DEBUG */
194 #define db1_printf(a) { }
195 #endif /* DEBUG */
196
197 static RF_Raid_t **raidPtrs; /* global raid device descriptors */
198
199 RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex)
200
201 static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a
202 * spare table */
203 static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from
204 * installation process */
205
206 MALLOC_DEFINE(M_RAIDFRAME, "RAIDframe", "RAIDframe structures");
207
208 /* prototypes */
209 static void KernelWakeupFunc(struct buf *);
210 static void InitBP(struct buf *, struct vnode *, unsigned,
211 dev_t, RF_SectorNum_t, RF_SectorCount_t, caddr_t, void (*) (struct buf *),
212 void *, int, struct proc *);
213 static void raidinit(RF_Raid_t *);
214
215 void raidattach(int);
216
217 dev_type_open(raidopen);
218 dev_type_close(raidclose);
219 dev_type_read(raidread);
220 dev_type_write(raidwrite);
221 dev_type_ioctl(raidioctl);
222 dev_type_strategy(raidstrategy);
223 dev_type_dump(raiddump);
224 dev_type_size(raidsize);
225
226 const struct bdevsw raid_bdevsw = {
227 raidopen, raidclose, raidstrategy, raidioctl,
228 raiddump, raidsize, D_DISK
229 };
230
231 const struct cdevsw raid_cdevsw = {
232 raidopen, raidclose, raidread, raidwrite, raidioctl,
233 nostop, notty, nopoll, nommap, nokqfilter, D_DISK
234 };
235
236 /*
237 * Pilfered from ccd.c
238 */
239
240 struct raidbuf {
241 struct buf rf_buf; /* new I/O buf. MUST BE FIRST!!! */
242 struct buf *rf_obp; /* ptr. to original I/O buf */
243 RF_DiskQueueData_t *req;/* the request that this was part of.. */
244 };
245
246 /* XXX Not sure if the following should be replacing the raidPtrs above,
247 or if it should be used in conjunction with that...
248 */
249
250 struct raid_softc {
251 int sc_flags; /* flags */
252 int sc_cflags; /* configuration flags */
253 size_t sc_size; /* size of the raid device */
254 char sc_xname[20]; /* XXX external name */
255 struct disk sc_dkdev; /* generic disk device info */
256 struct bufq_state buf_queue; /* used for the device queue */
257 };
258 /* sc_flags */
259 #define RAIDF_INITED 0x01 /* unit has been initialized */
260 #define RAIDF_WLABEL 0x02 /* label area is writable */
261 #define RAIDF_LABELLING 0x04 /* unit is currently being labelled */
262 #define RAIDF_WANTED 0x40 /* someone is waiting to obtain a lock */
263 #define RAIDF_LOCKED 0x80 /* unit is locked */
264
265 #define raidunit(x) DISKUNIT(x)
266 int numraid = 0;
267
268 /*
269 * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
270 * Be aware that large numbers can allow the driver to consume a lot of
271 * kernel memory, especially on writes, and in degraded mode reads.
272 *
273 * For example: with a stripe width of 64 blocks (32k) and 5 disks,
274 * a single 64K write will typically require 64K for the old data,
275 * 64K for the old parity, and 64K for the new parity, for a total
276 * of 192K (if the parity buffer is not re-used immediately).
277 * Even it if is used immediately, that's still 128K, which when multiplied
278 * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
279 *
280 * Now in degraded mode, for example, a 64K read on the above setup may
281 * require data reconstruction, which will require *all* of the 4 remaining
282 * disks to participate -- 4 * 32K/disk == 128K again.
283 */
284
285 #ifndef RAIDOUTSTANDING
286 #define RAIDOUTSTANDING 6
287 #endif
288
289 #define RAIDLABELDEV(dev) \
290 (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
291
292 /* declared here, and made public, for the benefit of KVM stuff.. */
293 struct raid_softc *raid_softc;
294
295 static void raidgetdefaultlabel(RF_Raid_t *, struct raid_softc *,
296 struct disklabel *);
297 static void raidgetdisklabel(dev_t);
298 static void raidmakedisklabel(struct raid_softc *);
299
300 static int raidlock(struct raid_softc *);
301 static void raidunlock(struct raid_softc *);
302
303 static void rf_markalldirty(RF_Raid_t *);
304
305 struct device *raidrootdev;
306
307 void rf_ReconThread(struct rf_recon_req *);
308 void rf_RewriteParityThread(RF_Raid_t *raidPtr);
309 void rf_CopybackThread(RF_Raid_t *raidPtr);
310 void rf_ReconstructInPlaceThread(struct rf_recon_req *);
311 int rf_autoconfig(struct device *self);
312 void rf_buildroothack(RF_ConfigSet_t *);
313
314 RF_AutoConfig_t *rf_find_raid_components(void);
315 RF_ConfigSet_t *rf_create_auto_sets(RF_AutoConfig_t *);
316 static int rf_does_it_fit(RF_ConfigSet_t *,RF_AutoConfig_t *);
317 static int rf_reasonable_label(RF_ComponentLabel_t *);
318 void rf_create_configuration(RF_AutoConfig_t *,RF_Config_t *, RF_Raid_t *);
319 int rf_set_autoconfig(RF_Raid_t *, int);
320 int rf_set_rootpartition(RF_Raid_t *, int);
321 void rf_release_all_vps(RF_ConfigSet_t *);
322 void rf_cleanup_config_set(RF_ConfigSet_t *);
323 int rf_have_enough_components(RF_ConfigSet_t *);
324 int rf_auto_config_set(RF_ConfigSet_t *, int *);
325
326 static int raidautoconfig = 0; /* Debugging, mostly. Set to 0 to not
327 allow autoconfig to take place.
328 Note that this is overridden by having
329 RAID_AUTOCONFIG as an option in the
330 kernel config file. */
331
332 struct RF_Pools_s rf_pools;
333
334 void
335 raidattach(int num)
336 {
337 int raidID;
338 int i, rc;
339
340 #ifdef DEBUG
341 printf("raidattach: Asked for %d units\n", num);
342 #endif
343
344 if (num <= 0) {
345 #ifdef DIAGNOSTIC
346 panic("raidattach: count <= 0");
347 #endif
348 return;
349 }
350 /* This is where all the initialization stuff gets done. */
351
352 numraid = num;
353
354 /* Make some space for requested number of units... */
355
356 RF_Malloc(raidPtrs, num * sizeof(RF_Raid_t *), (RF_Raid_t **));
357 if (raidPtrs == NULL) {
358 panic("raidPtrs is NULL!!");
359 }
360
361 /* Initialize the component buffer pool. */
362 rf_pool_init(&rf_pools.cbuf, sizeof(struct raidbuf),
363 "raidpl", num * RAIDOUTSTANDING,
364 2 * num * RAIDOUTSTANDING);
365
366 rf_mutex_init(&rf_sparet_wait_mutex);
367
368 rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
369
370 for (i = 0; i < num; i++)
371 raidPtrs[i] = NULL;
372 rc = rf_BootRaidframe();
373 if (rc == 0)
374 printf("Kernelized RAIDframe activated\n");
375 else
376 panic("Serious error booting RAID!!");
377
378 /* put together some datastructures like the CCD device does.. This
379 * lets us lock the device and what-not when it gets opened. */
380
381 raid_softc = (struct raid_softc *)
382 malloc(num * sizeof(struct raid_softc),
383 M_RAIDFRAME, M_NOWAIT);
384 if (raid_softc == NULL) {
385 printf("WARNING: no memory for RAIDframe driver\n");
386 return;
387 }
388
389 memset(raid_softc, 0, num * sizeof(struct raid_softc));
390
391 raidrootdev = (struct device *)malloc(num * sizeof(struct device),
392 M_RAIDFRAME, M_NOWAIT);
393 if (raidrootdev == NULL) {
394 panic("No memory for RAIDframe driver!!?!?!");
395 }
396
397 for (raidID = 0; raidID < num; raidID++) {
398 bufq_alloc(&raid_softc[raidID].buf_queue, BUFQ_FCFS);
399
400 raidrootdev[raidID].dv_class = DV_DISK;
401 raidrootdev[raidID].dv_cfdata = NULL;
402 raidrootdev[raidID].dv_unit = raidID;
403 raidrootdev[raidID].dv_parent = NULL;
404 raidrootdev[raidID].dv_flags = 0;
405 snprintf(raidrootdev[raidID].dv_xname,
406 sizeof(raidrootdev[raidID].dv_xname), "raid%d", raidID);
407
408 RF_Malloc(raidPtrs[raidID], sizeof(RF_Raid_t),
409 (RF_Raid_t *));
410 if (raidPtrs[raidID] == NULL) {
411 printf("WARNING: raidPtrs[%d] is NULL\n", raidID);
412 numraid = raidID;
413 return;
414 }
415 }
416
417 #ifdef RAID_AUTOCONFIG
418 raidautoconfig = 1;
419 #endif
420
421 /*
422 * Register a finalizer which will be used to auto-config RAID
423 * sets once all real hardware devices have been found.
424 */
425 if (config_finalize_register(NULL, rf_autoconfig) != 0)
426 printf("WARNING: unable to register RAIDframe finalizer\n");
427 }
428
429 int
430 rf_autoconfig(struct device *self)
431 {
432 RF_AutoConfig_t *ac_list;
433 RF_ConfigSet_t *config_sets;
434
435 if (raidautoconfig == 0)
436 return (0);
437
438 /* XXX This code can only be run once. */
439 raidautoconfig = 0;
440
441 /* 1. locate all RAID components on the system */
442 #ifdef DEBUG
443 printf("Searching for RAID components...\n");
444 #endif
445 ac_list = rf_find_raid_components();
446
447 /* 2. Sort them into their respective sets. */
448 config_sets = rf_create_auto_sets(ac_list);
449
450 /*
451 * 3. Evaluate each set andconfigure the valid ones.
452 * This gets done in rf_buildroothack().
453 */
454 rf_buildroothack(config_sets);
455
456 return (1);
457 }
458
459 void
460 rf_buildroothack(RF_ConfigSet_t *config_sets)
461 {
462 RF_ConfigSet_t *cset;
463 RF_ConfigSet_t *next_cset;
464 int retcode;
465 int raidID;
466 int rootID;
467 int num_root;
468
469 rootID = 0;
470 num_root = 0;
471 cset = config_sets;
472 while(cset != NULL ) {
473 next_cset = cset->next;
474 if (rf_have_enough_components(cset) &&
475 cset->ac->clabel->autoconfigure==1) {
476 retcode = rf_auto_config_set(cset,&raidID);
477 if (!retcode) {
478 if (cset->rootable) {
479 rootID = raidID;
480 num_root++;
481 }
482 } else {
483 /* The autoconfig didn't work :( */
484 #if DEBUG
485 printf("Autoconfig failed with code %d for raid%d\n", retcode, raidID);
486 #endif
487 rf_release_all_vps(cset);
488 }
489 } else {
490 /* we're not autoconfiguring this set...
491 release the associated resources */
492 rf_release_all_vps(cset);
493 }
494 /* cleanup */
495 rf_cleanup_config_set(cset);
496 cset = next_cset;
497 }
498
499 /* we found something bootable... */
500
501 if (num_root == 1) {
502 booted_device = &raidrootdev[rootID];
503 } else if (num_root > 1) {
504 /* we can't guess.. require the user to answer... */
505 boothowto |= RB_ASKNAME;
506 }
507 }
508
509
510 int
511 raidsize(dev_t dev)
512 {
513 struct raid_softc *rs;
514 struct disklabel *lp;
515 int part, unit, omask, size;
516
517 unit = raidunit(dev);
518 if (unit >= numraid)
519 return (-1);
520 rs = &raid_softc[unit];
521
522 if ((rs->sc_flags & RAIDF_INITED) == 0)
523 return (-1);
524
525 part = DISKPART(dev);
526 omask = rs->sc_dkdev.dk_openmask & (1 << part);
527 lp = rs->sc_dkdev.dk_label;
528
529 if (omask == 0 && raidopen(dev, 0, S_IFBLK, curproc))
530 return (-1);
531
532 if (lp->d_partitions[part].p_fstype != FS_SWAP)
533 size = -1;
534 else
535 size = lp->d_partitions[part].p_size *
536 (lp->d_secsize / DEV_BSIZE);
537
538 if (omask == 0 && raidclose(dev, 0, S_IFBLK, curproc))
539 return (-1);
540
541 return (size);
542
543 }
544
545 int
546 raiddump(dev_t dev, daddr_t blkno, caddr_t va, size_t size)
547 {
548 /* Not implemented. */
549 return ENXIO;
550 }
551 /* ARGSUSED */
552 int
553 raidopen(dev_t dev, int flags, int fmt, struct proc *p)
554 {
555 int unit = raidunit(dev);
556 struct raid_softc *rs;
557 struct disklabel *lp;
558 int part, pmask;
559 int error = 0;
560
561 if (unit >= numraid)
562 return (ENXIO);
563 rs = &raid_softc[unit];
564
565 if ((error = raidlock(rs)) != 0)
566 return (error);
567 lp = rs->sc_dkdev.dk_label;
568
569 part = DISKPART(dev);
570 pmask = (1 << part);
571
572 if ((rs->sc_flags & RAIDF_INITED) &&
573 (rs->sc_dkdev.dk_openmask == 0))
574 raidgetdisklabel(dev);
575
576 /* make sure that this partition exists */
577
578 if (part != RAW_PART) {
579 if (((rs->sc_flags & RAIDF_INITED) == 0) ||
580 ((part >= lp->d_npartitions) ||
581 (lp->d_partitions[part].p_fstype == FS_UNUSED))) {
582 error = ENXIO;
583 raidunlock(rs);
584 return (error);
585 }
586 }
587 /* Prevent this unit from being unconfigured while open. */
588 switch (fmt) {
589 case S_IFCHR:
590 rs->sc_dkdev.dk_copenmask |= pmask;
591 break;
592
593 case S_IFBLK:
594 rs->sc_dkdev.dk_bopenmask |= pmask;
595 break;
596 }
597
598 if ((rs->sc_dkdev.dk_openmask == 0) &&
599 ((rs->sc_flags & RAIDF_INITED) != 0)) {
600 /* First one... mark things as dirty... Note that we *MUST*
601 have done a configure before this. I DO NOT WANT TO BE
602 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
603 THAT THEY BELONG TOGETHER!!!!! */
604 /* XXX should check to see if we're only open for reading
605 here... If so, we needn't do this, but then need some
606 other way of keeping track of what's happened.. */
607
608 rf_markalldirty( raidPtrs[unit] );
609 }
610
611
612 rs->sc_dkdev.dk_openmask =
613 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
614
615 raidunlock(rs);
616
617 return (error);
618
619
620 }
621 /* ARGSUSED */
622 int
623 raidclose(dev_t dev, int flags, int fmt, struct proc *p)
624 {
625 int unit = raidunit(dev);
626 struct raid_softc *rs;
627 int error = 0;
628 int part;
629
630 if (unit >= numraid)
631 return (ENXIO);
632 rs = &raid_softc[unit];
633
634 if ((error = raidlock(rs)) != 0)
635 return (error);
636
637 part = DISKPART(dev);
638
639 /* ...that much closer to allowing unconfiguration... */
640 switch (fmt) {
641 case S_IFCHR:
642 rs->sc_dkdev.dk_copenmask &= ~(1 << part);
643 break;
644
645 case S_IFBLK:
646 rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
647 break;
648 }
649 rs->sc_dkdev.dk_openmask =
650 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
651
652 if ((rs->sc_dkdev.dk_openmask == 0) &&
653 ((rs->sc_flags & RAIDF_INITED) != 0)) {
654 /* Last one... device is not unconfigured yet.
655 Device shutdown has taken care of setting the
656 clean bits if RAIDF_INITED is not set
657 mark things as clean... */
658
659 rf_update_component_labels(raidPtrs[unit],
660 RF_FINAL_COMPONENT_UPDATE);
661 if (doing_shutdown) {
662 /* last one, and we're going down, so
663 lights out for this RAID set too. */
664 error = rf_Shutdown(raidPtrs[unit]);
665
666 /* It's no longer initialized... */
667 rs->sc_flags &= ~RAIDF_INITED;
668
669 /* Detach the disk. */
670 disk_detach(&rs->sc_dkdev);
671 }
672 }
673
674 raidunlock(rs);
675 return (0);
676
677 }
678
679 void
680 raidstrategy(struct buf *bp)
681 {
682 int s;
683
684 unsigned int raidID = raidunit(bp->b_dev);
685 RF_Raid_t *raidPtr;
686 struct raid_softc *rs = &raid_softc[raidID];
687 int wlabel;
688
689 if ((rs->sc_flags & RAIDF_INITED) ==0) {
690 bp->b_error = ENXIO;
691 bp->b_flags |= B_ERROR;
692 bp->b_resid = bp->b_bcount;
693 biodone(bp);
694 return;
695 }
696 if (raidID >= numraid || !raidPtrs[raidID]) {
697 bp->b_error = ENODEV;
698 bp->b_flags |= B_ERROR;
699 bp->b_resid = bp->b_bcount;
700 biodone(bp);
701 return;
702 }
703 raidPtr = raidPtrs[raidID];
704 if (!raidPtr->valid) {
705 bp->b_error = ENODEV;
706 bp->b_flags |= B_ERROR;
707 bp->b_resid = bp->b_bcount;
708 biodone(bp);
709 return;
710 }
711 if (bp->b_bcount == 0) {
712 db1_printf(("b_bcount is zero..\n"));
713 biodone(bp);
714 return;
715 }
716
717 /*
718 * Do bounds checking and adjust transfer. If there's an
719 * error, the bounds check will flag that for us.
720 */
721
722 wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
723 if (DISKPART(bp->b_dev) != RAW_PART)
724 if (bounds_check_with_label(&rs->sc_dkdev, bp, wlabel) <= 0) {
725 db1_printf(("Bounds check failed!!:%d %d\n",
726 (int) bp->b_blkno, (int) wlabel));
727 biodone(bp);
728 return;
729 }
730 s = splbio();
731
732 bp->b_resid = 0;
733
734 /* stuff it onto our queue */
735 BUFQ_PUT(&rs->buf_queue, bp);
736
737 raidstart(raidPtrs[raidID]);
738
739 splx(s);
740 }
741 /* ARGSUSED */
742 int
743 raidread(dev_t dev, struct uio *uio, int flags)
744 {
745 int unit = raidunit(dev);
746 struct raid_softc *rs;
747
748 if (unit >= numraid)
749 return (ENXIO);
750 rs = &raid_softc[unit];
751
752 if ((rs->sc_flags & RAIDF_INITED) == 0)
753 return (ENXIO);
754
755 return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
756
757 }
758 /* ARGSUSED */
759 int
760 raidwrite(dev_t dev, struct uio *uio, int flags)
761 {
762 int unit = raidunit(dev);
763 struct raid_softc *rs;
764
765 if (unit >= numraid)
766 return (ENXIO);
767 rs = &raid_softc[unit];
768
769 if ((rs->sc_flags & RAIDF_INITED) == 0)
770 return (ENXIO);
771
772 return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
773
774 }
775
776 int
777 raidioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct proc *p)
778 {
779 int unit = raidunit(dev);
780 int error = 0;
781 int part, pmask;
782 struct raid_softc *rs;
783 RF_Config_t *k_cfg, *u_cfg;
784 RF_Raid_t *raidPtr;
785 RF_RaidDisk_t *diskPtr;
786 RF_AccTotals_t *totals;
787 RF_DeviceConfig_t *d_cfg, **ucfgp;
788 u_char *specific_buf;
789 int retcode = 0;
790 int column;
791 int raidid;
792 struct rf_recon_req *rrcopy, *rr;
793 RF_ComponentLabel_t *clabel;
794 RF_ComponentLabel_t ci_label;
795 RF_ComponentLabel_t **clabel_ptr;
796 RF_SingleComponent_t *sparePtr,*componentPtr;
797 RF_SingleComponent_t hot_spare;
798 RF_SingleComponent_t component;
799 RF_ProgressInfo_t progressInfo, **progressInfoPtr;
800 int i, j, d;
801 #ifdef __HAVE_OLD_DISKLABEL
802 struct disklabel newlabel;
803 #endif
804
805 if (unit >= numraid)
806 return (ENXIO);
807 rs = &raid_softc[unit];
808 raidPtr = raidPtrs[unit];
809
810 db1_printf(("raidioctl: %d %d %d %d\n", (int) dev,
811 (int) DISKPART(dev), (int) unit, (int) cmd));
812
813 /* Must be open for writes for these commands... */
814 switch (cmd) {
815 case DIOCSDINFO:
816 case DIOCWDINFO:
817 #ifdef __HAVE_OLD_DISKLABEL
818 case ODIOCWDINFO:
819 case ODIOCSDINFO:
820 #endif
821 case DIOCWLABEL:
822 if ((flag & FWRITE) == 0)
823 return (EBADF);
824 }
825
826 /* Must be initialized for these... */
827 switch (cmd) {
828 case DIOCGDINFO:
829 case DIOCSDINFO:
830 case DIOCWDINFO:
831 #ifdef __HAVE_OLD_DISKLABEL
832 case ODIOCGDINFO:
833 case ODIOCWDINFO:
834 case ODIOCSDINFO:
835 case ODIOCGDEFLABEL:
836 #endif
837 case DIOCGPART:
838 case DIOCWLABEL:
839 case DIOCGDEFLABEL:
840 case RAIDFRAME_SHUTDOWN:
841 case RAIDFRAME_REWRITEPARITY:
842 case RAIDFRAME_GET_INFO:
843 case RAIDFRAME_RESET_ACCTOTALS:
844 case RAIDFRAME_GET_ACCTOTALS:
845 case RAIDFRAME_KEEP_ACCTOTALS:
846 case RAIDFRAME_GET_SIZE:
847 case RAIDFRAME_FAIL_DISK:
848 case RAIDFRAME_COPYBACK:
849 case RAIDFRAME_CHECK_RECON_STATUS:
850 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
851 case RAIDFRAME_GET_COMPONENT_LABEL:
852 case RAIDFRAME_SET_COMPONENT_LABEL:
853 case RAIDFRAME_ADD_HOT_SPARE:
854 case RAIDFRAME_REMOVE_HOT_SPARE:
855 case RAIDFRAME_INIT_LABELS:
856 case RAIDFRAME_REBUILD_IN_PLACE:
857 case RAIDFRAME_CHECK_PARITY:
858 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
859 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
860 case RAIDFRAME_CHECK_COPYBACK_STATUS:
861 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
862 case RAIDFRAME_SET_AUTOCONFIG:
863 case RAIDFRAME_SET_ROOT:
864 case RAIDFRAME_DELETE_COMPONENT:
865 case RAIDFRAME_INCORPORATE_HOT_SPARE:
866 if ((rs->sc_flags & RAIDF_INITED) == 0)
867 return (ENXIO);
868 }
869
870 switch (cmd) {
871
872 /* configure the system */
873 case RAIDFRAME_CONFIGURE:
874
875 if (raidPtr->valid) {
876 /* There is a valid RAID set running on this unit! */
877 printf("raid%d: Device already configured!\n",unit);
878 return(EINVAL);
879 }
880
881 /* copy-in the configuration information */
882 /* data points to a pointer to the configuration structure */
883
884 u_cfg = *((RF_Config_t **) data);
885 RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
886 if (k_cfg == NULL) {
887 return (ENOMEM);
888 }
889 retcode = copyin(u_cfg, k_cfg, sizeof(RF_Config_t));
890 if (retcode) {
891 RF_Free(k_cfg, sizeof(RF_Config_t));
892 db1_printf(("rf_ioctl: retcode=%d copyin.1\n",
893 retcode));
894 return (retcode);
895 }
896 /* allocate a buffer for the layout-specific data, and copy it
897 * in */
898 if (k_cfg->layoutSpecificSize) {
899 if (k_cfg->layoutSpecificSize > 10000) {
900 /* sanity check */
901 RF_Free(k_cfg, sizeof(RF_Config_t));
902 return (EINVAL);
903 }
904 RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
905 (u_char *));
906 if (specific_buf == NULL) {
907 RF_Free(k_cfg, sizeof(RF_Config_t));
908 return (ENOMEM);
909 }
910 retcode = copyin(k_cfg->layoutSpecific, specific_buf,
911 k_cfg->layoutSpecificSize);
912 if (retcode) {
913 RF_Free(k_cfg, sizeof(RF_Config_t));
914 RF_Free(specific_buf,
915 k_cfg->layoutSpecificSize);
916 db1_printf(("rf_ioctl: retcode=%d copyin.2\n",
917 retcode));
918 return (retcode);
919 }
920 } else
921 specific_buf = NULL;
922 k_cfg->layoutSpecific = specific_buf;
923
924 /* should do some kind of sanity check on the configuration.
925 * Store the sum of all the bytes in the last byte? */
926
927 /* configure the system */
928
929 /*
930 * Clear the entire RAID descriptor, just to make sure
931 * there is no stale data left in the case of a
932 * reconfiguration
933 */
934 memset((char *) raidPtr, 0, sizeof(RF_Raid_t));
935 raidPtr->raidid = unit;
936
937 retcode = rf_Configure(raidPtr, k_cfg, NULL);
938
939 if (retcode == 0) {
940
941 /* allow this many simultaneous IO's to
942 this RAID device */
943 raidPtr->openings = RAIDOUTSTANDING;
944
945 raidinit(raidPtr);
946 rf_markalldirty(raidPtr);
947 }
948 /* free the buffers. No return code here. */
949 if (k_cfg->layoutSpecificSize) {
950 RF_Free(specific_buf, k_cfg->layoutSpecificSize);
951 }
952 RF_Free(k_cfg, sizeof(RF_Config_t));
953
954 return (retcode);
955
956 /* shutdown the system */
957 case RAIDFRAME_SHUTDOWN:
958
959 if ((error = raidlock(rs)) != 0)
960 return (error);
961
962 /*
963 * If somebody has a partition mounted, we shouldn't
964 * shutdown.
965 */
966
967 part = DISKPART(dev);
968 pmask = (1 << part);
969 if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
970 ((rs->sc_dkdev.dk_bopenmask & pmask) &&
971 (rs->sc_dkdev.dk_copenmask & pmask))) {
972 raidunlock(rs);
973 return (EBUSY);
974 }
975
976 retcode = rf_Shutdown(raidPtr);
977
978 /* It's no longer initialized... */
979 rs->sc_flags &= ~RAIDF_INITED;
980
981 /* Detach the disk. */
982 disk_detach(&rs->sc_dkdev);
983
984 raidunlock(rs);
985
986 return (retcode);
987 case RAIDFRAME_GET_COMPONENT_LABEL:
988 clabel_ptr = (RF_ComponentLabel_t **) data;
989 /* need to read the component label for the disk indicated
990 by row,column in clabel */
991
992 /* For practice, let's get it directly fromdisk, rather
993 than from the in-core copy */
994 RF_Malloc( clabel, sizeof( RF_ComponentLabel_t ),
995 (RF_ComponentLabel_t *));
996 if (clabel == NULL)
997 return (ENOMEM);
998
999 memset((char *) clabel, 0, sizeof(RF_ComponentLabel_t));
1000
1001 retcode = copyin( *clabel_ptr, clabel,
1002 sizeof(RF_ComponentLabel_t));
1003
1004 if (retcode) {
1005 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
1006 return(retcode);
1007 }
1008
1009 clabel->row = 0; /* Don't allow looking at anything else.*/
1010
1011 column = clabel->column;
1012
1013 if ((column < 0) || (column >= raidPtr->numCol +
1014 raidPtr->numSpare)) {
1015 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
1016 return(EINVAL);
1017 }
1018
1019 raidread_component_label(raidPtr->Disks[column].dev,
1020 raidPtr->raid_cinfo[column].ci_vp,
1021 clabel );
1022
1023 retcode = copyout(clabel, *clabel_ptr,
1024 sizeof(RF_ComponentLabel_t));
1025 RF_Free(clabel, sizeof(RF_ComponentLabel_t));
1026 return (retcode);
1027
1028 case RAIDFRAME_SET_COMPONENT_LABEL:
1029 clabel = (RF_ComponentLabel_t *) data;
1030
1031 /* XXX check the label for valid stuff... */
1032 /* Note that some things *should not* get modified --
1033 the user should be re-initing the labels instead of
1034 trying to patch things.
1035 */
1036
1037 raidid = raidPtr->raidid;
1038 #if DEBUG
1039 printf("raid%d: Got component label:\n", raidid);
1040 printf("raid%d: Version: %d\n", raidid, clabel->version);
1041 printf("raid%d: Serial Number: %d\n", raidid, clabel->serial_number);
1042 printf("raid%d: Mod counter: %d\n", raidid, clabel->mod_counter);
1043 printf("raid%d: Column: %d\n", raidid, clabel->column);
1044 printf("raid%d: Num Columns: %d\n", raidid, clabel->num_columns);
1045 printf("raid%d: Clean: %d\n", raidid, clabel->clean);
1046 printf("raid%d: Status: %d\n", raidid, clabel->status);
1047 #endif
1048 clabel->row = 0;
1049 column = clabel->column;
1050
1051 if ((column < 0) || (column >= raidPtr->numCol)) {
1052 return(EINVAL);
1053 }
1054
1055 /* XXX this isn't allowed to do anything for now :-) */
1056
1057 /* XXX and before it is, we need to fill in the rest
1058 of the fields!?!?!?! */
1059 #if 0
1060 raidwrite_component_label(
1061 raidPtr->Disks[column].dev,
1062 raidPtr->raid_cinfo[column].ci_vp,
1063 clabel );
1064 #endif
1065 return (0);
1066
1067 case RAIDFRAME_INIT_LABELS:
1068 clabel = (RF_ComponentLabel_t *) data;
1069 /*
1070 we only want the serial number from
1071 the above. We get all the rest of the information
1072 from the config that was used to create this RAID
1073 set.
1074 */
1075
1076 raidPtr->serial_number = clabel->serial_number;
1077
1078 raid_init_component_label(raidPtr, &ci_label);
1079 ci_label.serial_number = clabel->serial_number;
1080 ci_label.row = 0; /* we dont' pretend to support more */
1081
1082 for(column=0;column<raidPtr->numCol;column++) {
1083 diskPtr = &raidPtr->Disks[column];
1084 if (!RF_DEAD_DISK(diskPtr->status)) {
1085 ci_label.partitionSize = diskPtr->partitionSize;
1086 ci_label.column = column;
1087 raidwrite_component_label(
1088 raidPtr->Disks[column].dev,
1089 raidPtr->raid_cinfo[column].ci_vp,
1090 &ci_label );
1091 }
1092 }
1093
1094 return (retcode);
1095 case RAIDFRAME_SET_AUTOCONFIG:
1096 d = rf_set_autoconfig(raidPtr, *(int *) data);
1097 printf("raid%d: New autoconfig value is: %d\n",
1098 raidPtr->raidid, d);
1099 *(int *) data = d;
1100 return (retcode);
1101
1102 case RAIDFRAME_SET_ROOT:
1103 d = rf_set_rootpartition(raidPtr, *(int *) data);
1104 printf("raid%d: New rootpartition value is: %d\n",
1105 raidPtr->raidid, d);
1106 *(int *) data = d;
1107 return (retcode);
1108
1109 /* initialize all parity */
1110 case RAIDFRAME_REWRITEPARITY:
1111
1112 if (raidPtr->Layout.map->faultsTolerated == 0) {
1113 /* Parity for RAID 0 is trivially correct */
1114 raidPtr->parity_good = RF_RAID_CLEAN;
1115 return(0);
1116 }
1117
1118 if (raidPtr->parity_rewrite_in_progress == 1) {
1119 /* Re-write is already in progress! */
1120 return(EINVAL);
1121 }
1122
1123 retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
1124 rf_RewriteParityThread,
1125 raidPtr,"raid_parity");
1126 return (retcode);
1127
1128
1129 case RAIDFRAME_ADD_HOT_SPARE:
1130 sparePtr = (RF_SingleComponent_t *) data;
1131 memcpy( &hot_spare, sparePtr, sizeof(RF_SingleComponent_t));
1132 retcode = rf_add_hot_spare(raidPtr, &hot_spare);
1133 return(retcode);
1134
1135 case RAIDFRAME_REMOVE_HOT_SPARE:
1136 return(retcode);
1137
1138 case RAIDFRAME_DELETE_COMPONENT:
1139 componentPtr = (RF_SingleComponent_t *)data;
1140 memcpy( &component, componentPtr,
1141 sizeof(RF_SingleComponent_t));
1142 retcode = rf_delete_component(raidPtr, &component);
1143 return(retcode);
1144
1145 case RAIDFRAME_INCORPORATE_HOT_SPARE:
1146 componentPtr = (RF_SingleComponent_t *)data;
1147 memcpy( &component, componentPtr,
1148 sizeof(RF_SingleComponent_t));
1149 retcode = rf_incorporate_hot_spare(raidPtr, &component);
1150 return(retcode);
1151
1152 case RAIDFRAME_REBUILD_IN_PLACE:
1153
1154 if (raidPtr->Layout.map->faultsTolerated == 0) {
1155 /* Can't do this on a RAID 0!! */
1156 return(EINVAL);
1157 }
1158
1159 if (raidPtr->recon_in_progress == 1) {
1160 /* a reconstruct is already in progress! */
1161 return(EINVAL);
1162 }
1163
1164 componentPtr = (RF_SingleComponent_t *) data;
1165 memcpy( &component, componentPtr,
1166 sizeof(RF_SingleComponent_t));
1167 component.row = 0; /* we don't support any more */
1168 column = component.column;
1169
1170 if ((column < 0) || (column >= raidPtr->numCol)) {
1171 return(EINVAL);
1172 }
1173
1174 RF_LOCK_MUTEX(raidPtr->mutex);
1175 if ((raidPtr->Disks[column].status == rf_ds_optimal) &&
1176 (raidPtr->numFailures > 0)) {
1177 /* XXX 0 above shouldn't be constant!!! */
1178 /* some component other than this has failed.
1179 Let's not make things worse than they already
1180 are... */
1181 printf("raid%d: Unable to reconstruct to disk at:\n",
1182 raidPtr->raidid);
1183 printf("raid%d: Col: %d Too many failures.\n",
1184 raidPtr->raidid, column);
1185 RF_UNLOCK_MUTEX(raidPtr->mutex);
1186 return (EINVAL);
1187 }
1188 if (raidPtr->Disks[column].status ==
1189 rf_ds_reconstructing) {
1190 printf("raid%d: Unable to reconstruct to disk at:\n",
1191 raidPtr->raidid);
1192 printf("raid%d: Col: %d Reconstruction already occuring!\n", raidPtr->raidid, column);
1193
1194 RF_UNLOCK_MUTEX(raidPtr->mutex);
1195 return (EINVAL);
1196 }
1197 if (raidPtr->Disks[column].status == rf_ds_spared) {
1198 RF_UNLOCK_MUTEX(raidPtr->mutex);
1199 return (EINVAL);
1200 }
1201 RF_UNLOCK_MUTEX(raidPtr->mutex);
1202
1203 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1204 if (rrcopy == NULL)
1205 return(ENOMEM);
1206
1207 rrcopy->raidPtr = (void *) raidPtr;
1208 rrcopy->col = column;
1209
1210 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1211 rf_ReconstructInPlaceThread,
1212 rrcopy,"raid_reconip");
1213 return(retcode);
1214
1215 case RAIDFRAME_GET_INFO:
1216 if (!raidPtr->valid)
1217 return (ENODEV);
1218 ucfgp = (RF_DeviceConfig_t **) data;
1219 RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t),
1220 (RF_DeviceConfig_t *));
1221 if (d_cfg == NULL)
1222 return (ENOMEM);
1223 memset((char *) d_cfg, 0, sizeof(RF_DeviceConfig_t));
1224 d_cfg->rows = 1; /* there is only 1 row now */
1225 d_cfg->cols = raidPtr->numCol;
1226 d_cfg->ndevs = raidPtr->numCol;
1227 if (d_cfg->ndevs >= RF_MAX_DISKS) {
1228 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1229 return (ENOMEM);
1230 }
1231 d_cfg->nspares = raidPtr->numSpare;
1232 if (d_cfg->nspares >= RF_MAX_DISKS) {
1233 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1234 return (ENOMEM);
1235 }
1236 d_cfg->maxqdepth = raidPtr->maxQueueDepth;
1237 d = 0;
1238 for (j = 0; j < d_cfg->cols; j++) {
1239 d_cfg->devs[d] = raidPtr->Disks[j];
1240 d++;
1241 }
1242 for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) {
1243 d_cfg->spares[i] = raidPtr->Disks[j];
1244 }
1245 retcode = copyout(d_cfg, *ucfgp, sizeof(RF_DeviceConfig_t));
1246 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1247
1248 return (retcode);
1249
1250 case RAIDFRAME_CHECK_PARITY:
1251 *(int *) data = raidPtr->parity_good;
1252 return (0);
1253
1254 case RAIDFRAME_RESET_ACCTOTALS:
1255 memset(&raidPtr->acc_totals, 0, sizeof(raidPtr->acc_totals));
1256 return (0);
1257
1258 case RAIDFRAME_GET_ACCTOTALS:
1259 totals = (RF_AccTotals_t *) data;
1260 *totals = raidPtr->acc_totals;
1261 return (0);
1262
1263 case RAIDFRAME_KEEP_ACCTOTALS:
1264 raidPtr->keep_acc_totals = *(int *)data;
1265 return (0);
1266
1267 case RAIDFRAME_GET_SIZE:
1268 *(int *) data = raidPtr->totalSectors;
1269 return (0);
1270
1271 /* fail a disk & optionally start reconstruction */
1272 case RAIDFRAME_FAIL_DISK:
1273
1274 if (raidPtr->Layout.map->faultsTolerated == 0) {
1275 /* Can't do this on a RAID 0!! */
1276 return(EINVAL);
1277 }
1278
1279 rr = (struct rf_recon_req *) data;
1280 rr->row = 0;
1281 if (rr->col < 0 || rr->col >= raidPtr->numCol)
1282 return (EINVAL);
1283
1284
1285 RF_LOCK_MUTEX(raidPtr->mutex);
1286 if (raidPtr->status == rf_rs_reconstructing) {
1287 /* you can't fail a disk while we're reconstructing! */
1288 /* XXX wrong for RAID6 */
1289 RF_UNLOCK_MUTEX(raidPtr->mutex);
1290 return (EINVAL);
1291 }
1292 if ((raidPtr->Disks[rr->col].status ==
1293 rf_ds_optimal) && (raidPtr->numFailures > 0)) {
1294 /* some other component has failed. Let's not make
1295 things worse. XXX wrong for RAID6 */
1296 RF_UNLOCK_MUTEX(raidPtr->mutex);
1297 return (EINVAL);
1298 }
1299 if (raidPtr->Disks[rr->col].status == rf_ds_spared) {
1300 /* Can't fail a spared disk! */
1301 RF_UNLOCK_MUTEX(raidPtr->mutex);
1302 return (EINVAL);
1303 }
1304 RF_UNLOCK_MUTEX(raidPtr->mutex);
1305
1306 /* make a copy of the recon request so that we don't rely on
1307 * the user's buffer */
1308 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1309 if (rrcopy == NULL)
1310 return(ENOMEM);
1311 memcpy(rrcopy, rr, sizeof(*rr));
1312 rrcopy->raidPtr = (void *) raidPtr;
1313
1314 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1315 rf_ReconThread,
1316 rrcopy,"raid_recon");
1317 return (0);
1318
1319 /* invoke a copyback operation after recon on whatever disk
1320 * needs it, if any */
1321 case RAIDFRAME_COPYBACK:
1322
1323 if (raidPtr->Layout.map->faultsTolerated == 0) {
1324 /* This makes no sense on a RAID 0!! */
1325 return(EINVAL);
1326 }
1327
1328 if (raidPtr->copyback_in_progress == 1) {
1329 /* Copyback is already in progress! */
1330 return(EINVAL);
1331 }
1332
1333 retcode = RF_CREATE_THREAD(raidPtr->copyback_thread,
1334 rf_CopybackThread,
1335 raidPtr,"raid_copyback");
1336 return (retcode);
1337
1338 /* return the percentage completion of reconstruction */
1339 case RAIDFRAME_CHECK_RECON_STATUS:
1340 if (raidPtr->Layout.map->faultsTolerated == 0) {
1341 /* This makes no sense on a RAID 0, so tell the
1342 user it's done. */
1343 *(int *) data = 100;
1344 return(0);
1345 }
1346 if (raidPtr->status != rf_rs_reconstructing)
1347 *(int *) data = 100;
1348 else {
1349 if (raidPtr->reconControl->numRUsTotal > 0) {
1350 *(int *) data = (raidPtr->reconControl->numRUsComplete * 100 / raidPtr->reconControl->numRUsTotal);
1351 } else {
1352 *(int *) data = 0;
1353 }
1354 }
1355 return (0);
1356 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
1357 progressInfoPtr = (RF_ProgressInfo_t **) data;
1358 if (raidPtr->status != rf_rs_reconstructing) {
1359 progressInfo.remaining = 0;
1360 progressInfo.completed = 100;
1361 progressInfo.total = 100;
1362 } else {
1363 progressInfo.total =
1364 raidPtr->reconControl->numRUsTotal;
1365 progressInfo.completed =
1366 raidPtr->reconControl->numRUsComplete;
1367 progressInfo.remaining = progressInfo.total -
1368 progressInfo.completed;
1369 }
1370 retcode = copyout(&progressInfo, *progressInfoPtr,
1371 sizeof(RF_ProgressInfo_t));
1372 return (retcode);
1373
1374 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1375 if (raidPtr->Layout.map->faultsTolerated == 0) {
1376 /* This makes no sense on a RAID 0, so tell the
1377 user it's done. */
1378 *(int *) data = 100;
1379 return(0);
1380 }
1381 if (raidPtr->parity_rewrite_in_progress == 1) {
1382 *(int *) data = 100 *
1383 raidPtr->parity_rewrite_stripes_done /
1384 raidPtr->Layout.numStripe;
1385 } else {
1386 *(int *) data = 100;
1387 }
1388 return (0);
1389
1390 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
1391 progressInfoPtr = (RF_ProgressInfo_t **) data;
1392 if (raidPtr->parity_rewrite_in_progress == 1) {
1393 progressInfo.total = raidPtr->Layout.numStripe;
1394 progressInfo.completed =
1395 raidPtr->parity_rewrite_stripes_done;
1396 progressInfo.remaining = progressInfo.total -
1397 progressInfo.completed;
1398 } else {
1399 progressInfo.remaining = 0;
1400 progressInfo.completed = 100;
1401 progressInfo.total = 100;
1402 }
1403 retcode = copyout(&progressInfo, *progressInfoPtr,
1404 sizeof(RF_ProgressInfo_t));
1405 return (retcode);
1406
1407 case RAIDFRAME_CHECK_COPYBACK_STATUS:
1408 if (raidPtr->Layout.map->faultsTolerated == 0) {
1409 /* This makes no sense on a RAID 0 */
1410 *(int *) data = 100;
1411 return(0);
1412 }
1413 if (raidPtr->copyback_in_progress == 1) {
1414 *(int *) data = 100 * raidPtr->copyback_stripes_done /
1415 raidPtr->Layout.numStripe;
1416 } else {
1417 *(int *) data = 100;
1418 }
1419 return (0);
1420
1421 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
1422 progressInfoPtr = (RF_ProgressInfo_t **) data;
1423 if (raidPtr->copyback_in_progress == 1) {
1424 progressInfo.total = raidPtr->Layout.numStripe;
1425 progressInfo.completed =
1426 raidPtr->copyback_stripes_done;
1427 progressInfo.remaining = progressInfo.total -
1428 progressInfo.completed;
1429 } else {
1430 progressInfo.remaining = 0;
1431 progressInfo.completed = 100;
1432 progressInfo.total = 100;
1433 }
1434 retcode = copyout(&progressInfo, *progressInfoPtr,
1435 sizeof(RF_ProgressInfo_t));
1436 return (retcode);
1437
1438 /* the sparetable daemon calls this to wait for the kernel to
1439 * need a spare table. this ioctl does not return until a
1440 * spare table is needed. XXX -- calling mpsleep here in the
1441 * ioctl code is almost certainly wrong and evil. -- XXX XXX
1442 * -- I should either compute the spare table in the kernel,
1443 * or have a different -- XXX XXX -- interface (a different
1444 * character device) for delivering the table -- XXX */
1445 #if 0
1446 case RAIDFRAME_SPARET_WAIT:
1447 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1448 while (!rf_sparet_wait_queue)
1449 mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE);
1450 waitreq = rf_sparet_wait_queue;
1451 rf_sparet_wait_queue = rf_sparet_wait_queue->next;
1452 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1453
1454 /* structure assignment */
1455 *((RF_SparetWait_t *) data) = *waitreq;
1456
1457 RF_Free(waitreq, sizeof(*waitreq));
1458 return (0);
1459
1460 /* wakes up a process waiting on SPARET_WAIT and puts an error
1461 * code in it that will cause the dameon to exit */
1462 case RAIDFRAME_ABORT_SPARET_WAIT:
1463 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1464 waitreq->fcol = -1;
1465 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1466 waitreq->next = rf_sparet_wait_queue;
1467 rf_sparet_wait_queue = waitreq;
1468 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1469 wakeup(&rf_sparet_wait_queue);
1470 return (0);
1471
1472 /* used by the spare table daemon to deliver a spare table
1473 * into the kernel */
1474 case RAIDFRAME_SEND_SPARET:
1475
1476 /* install the spare table */
1477 retcode = rf_SetSpareTable(raidPtr, *(void **) data);
1478
1479 /* respond to the requestor. the return status of the spare
1480 * table installation is passed in the "fcol" field */
1481 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1482 waitreq->fcol = retcode;
1483 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1484 waitreq->next = rf_sparet_resp_queue;
1485 rf_sparet_resp_queue = waitreq;
1486 wakeup(&rf_sparet_resp_queue);
1487 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1488
1489 return (retcode);
1490 #endif
1491
1492 default:
1493 break; /* fall through to the os-specific code below */
1494
1495 }
1496
1497 if (!raidPtr->valid)
1498 return (EINVAL);
1499
1500 /*
1501 * Add support for "regular" device ioctls here.
1502 */
1503
1504 switch (cmd) {
1505 case DIOCGDINFO:
1506 *(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
1507 break;
1508 #ifdef __HAVE_OLD_DISKLABEL
1509 case ODIOCGDINFO:
1510 newlabel = *(rs->sc_dkdev.dk_label);
1511 if (newlabel.d_npartitions > OLDMAXPARTITIONS)
1512 return ENOTTY;
1513 memcpy(data, &newlabel, sizeof (struct olddisklabel));
1514 break;
1515 #endif
1516
1517 case DIOCGPART:
1518 ((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
1519 ((struct partinfo *) data)->part =
1520 &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
1521 break;
1522
1523 case DIOCWDINFO:
1524 case DIOCSDINFO:
1525 #ifdef __HAVE_OLD_DISKLABEL
1526 case ODIOCWDINFO:
1527 case ODIOCSDINFO:
1528 #endif
1529 {
1530 struct disklabel *lp;
1531 #ifdef __HAVE_OLD_DISKLABEL
1532 if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) {
1533 memset(&newlabel, 0, sizeof newlabel);
1534 memcpy(&newlabel, data, sizeof (struct olddisklabel));
1535 lp = &newlabel;
1536 } else
1537 #endif
1538 lp = (struct disklabel *)data;
1539
1540 if ((error = raidlock(rs)) != 0)
1541 return (error);
1542
1543 rs->sc_flags |= RAIDF_LABELLING;
1544
1545 error = setdisklabel(rs->sc_dkdev.dk_label,
1546 lp, 0, rs->sc_dkdev.dk_cpulabel);
1547 if (error == 0) {
1548 if (cmd == DIOCWDINFO
1549 #ifdef __HAVE_OLD_DISKLABEL
1550 || cmd == ODIOCWDINFO
1551 #endif
1552 )
1553 error = writedisklabel(RAIDLABELDEV(dev),
1554 raidstrategy, rs->sc_dkdev.dk_label,
1555 rs->sc_dkdev.dk_cpulabel);
1556 }
1557 rs->sc_flags &= ~RAIDF_LABELLING;
1558
1559 raidunlock(rs);
1560
1561 if (error)
1562 return (error);
1563 break;
1564 }
1565
1566 case DIOCWLABEL:
1567 if (*(int *) data != 0)
1568 rs->sc_flags |= RAIDF_WLABEL;
1569 else
1570 rs->sc_flags &= ~RAIDF_WLABEL;
1571 break;
1572
1573 case DIOCGDEFLABEL:
1574 raidgetdefaultlabel(raidPtr, rs, (struct disklabel *) data);
1575 break;
1576
1577 #ifdef __HAVE_OLD_DISKLABEL
1578 case ODIOCGDEFLABEL:
1579 raidgetdefaultlabel(raidPtr, rs, &newlabel);
1580 if (newlabel.d_npartitions > OLDMAXPARTITIONS)
1581 return ENOTTY;
1582 memcpy(data, &newlabel, sizeof (struct olddisklabel));
1583 break;
1584 #endif
1585
1586 default:
1587 retcode = ENOTTY;
1588 }
1589 return (retcode);
1590
1591 }
1592
1593
1594 /* raidinit -- complete the rest of the initialization for the
1595 RAIDframe device. */
1596
1597
1598 static void
1599 raidinit(RF_Raid_t *raidPtr)
1600 {
1601 struct raid_softc *rs;
1602 int unit;
1603
1604 unit = raidPtr->raidid;
1605
1606 rs = &raid_softc[unit];
1607
1608 /* XXX should check return code first... */
1609 rs->sc_flags |= RAIDF_INITED;
1610
1611 /* XXX doesn't check bounds. */
1612 snprintf(rs->sc_xname, sizeof(rs->sc_xname), "raid%d", unit);
1613
1614 rs->sc_dkdev.dk_name = rs->sc_xname;
1615
1616 /* disk_attach actually creates space for the CPU disklabel, among
1617 * other things, so it's critical to call this *BEFORE* we try putzing
1618 * with disklabels. */
1619
1620 disk_attach(&rs->sc_dkdev);
1621
1622 /* XXX There may be a weird interaction here between this, and
1623 * protectedSectors, as used in RAIDframe. */
1624
1625 rs->sc_size = raidPtr->totalSectors;
1626 }
1627 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
1628 /* wake up the daemon & tell it to get us a spare table
1629 * XXX
1630 * the entries in the queues should be tagged with the raidPtr
1631 * so that in the extremely rare case that two recons happen at once,
1632 * we know for which device were requesting a spare table
1633 * XXX
1634 *
1635 * XXX This code is not currently used. GO
1636 */
1637 int
1638 rf_GetSpareTableFromDaemon(RF_SparetWait_t *req)
1639 {
1640 int retcode;
1641
1642 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1643 req->next = rf_sparet_wait_queue;
1644 rf_sparet_wait_queue = req;
1645 wakeup(&rf_sparet_wait_queue);
1646
1647 /* mpsleep unlocks the mutex */
1648 while (!rf_sparet_resp_queue) {
1649 tsleep(&rf_sparet_resp_queue, PRIBIO,
1650 "raidframe getsparetable", 0);
1651 }
1652 req = rf_sparet_resp_queue;
1653 rf_sparet_resp_queue = req->next;
1654 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1655
1656 retcode = req->fcol;
1657 RF_Free(req, sizeof(*req)); /* this is not the same req as we
1658 * alloc'd */
1659 return (retcode);
1660 }
1661 #endif
1662
1663 /* a wrapper around rf_DoAccess that extracts appropriate info from the
1664 * bp & passes it down.
1665 * any calls originating in the kernel must use non-blocking I/O
1666 * do some extra sanity checking to return "appropriate" error values for
1667 * certain conditions (to make some standard utilities work)
1668 *
1669 * Formerly known as: rf_DoAccessKernel
1670 */
1671 void
1672 raidstart(RF_Raid_t *raidPtr)
1673 {
1674 RF_SectorCount_t num_blocks, pb, sum;
1675 RF_RaidAddr_t raid_addr;
1676 struct partition *pp;
1677 daddr_t blocknum;
1678 int unit;
1679 struct raid_softc *rs;
1680 int do_async;
1681 struct buf *bp;
1682 int rc;
1683
1684 unit = raidPtr->raidid;
1685 rs = &raid_softc[unit];
1686
1687 /* quick check to see if anything has died recently */
1688 RF_LOCK_MUTEX(raidPtr->mutex);
1689 if (raidPtr->numNewFailures > 0) {
1690 RF_UNLOCK_MUTEX(raidPtr->mutex);
1691 rf_update_component_labels(raidPtr,
1692 RF_NORMAL_COMPONENT_UPDATE);
1693 RF_LOCK_MUTEX(raidPtr->mutex);
1694 raidPtr->numNewFailures--;
1695 }
1696
1697 /* Check to see if we're at the limit... */
1698 while (raidPtr->openings > 0) {
1699 RF_UNLOCK_MUTEX(raidPtr->mutex);
1700
1701 /* get the next item, if any, from the queue */
1702 if ((bp = BUFQ_GET(&rs->buf_queue)) == NULL) {
1703 /* nothing more to do */
1704 return;
1705 }
1706
1707 /* Ok, for the bp we have here, bp->b_blkno is relative to the
1708 * partition.. Need to make it absolute to the underlying
1709 * device.. */
1710
1711 blocknum = bp->b_blkno;
1712 if (DISKPART(bp->b_dev) != RAW_PART) {
1713 pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
1714 blocknum += pp->p_offset;
1715 }
1716
1717 db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
1718 (int) blocknum));
1719
1720 db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
1721 db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
1722
1723 /* *THIS* is where we adjust what block we're going to...
1724 * but DO NOT TOUCH bp->b_blkno!!! */
1725 raid_addr = blocknum;
1726
1727 num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
1728 pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
1729 sum = raid_addr + num_blocks + pb;
1730 if (1 || rf_debugKernelAccess) {
1731 db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
1732 (int) raid_addr, (int) sum, (int) num_blocks,
1733 (int) pb, (int) bp->b_resid));
1734 }
1735 if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
1736 || (sum < num_blocks) || (sum < pb)) {
1737 bp->b_error = ENOSPC;
1738 bp->b_flags |= B_ERROR;
1739 bp->b_resid = bp->b_bcount;
1740 biodone(bp);
1741 RF_LOCK_MUTEX(raidPtr->mutex);
1742 continue;
1743 }
1744 /*
1745 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
1746 */
1747
1748 if (bp->b_bcount & raidPtr->sectorMask) {
1749 bp->b_error = EINVAL;
1750 bp->b_flags |= B_ERROR;
1751 bp->b_resid = bp->b_bcount;
1752 biodone(bp);
1753 RF_LOCK_MUTEX(raidPtr->mutex);
1754 continue;
1755
1756 }
1757 db1_printf(("Calling DoAccess..\n"));
1758
1759
1760 RF_LOCK_MUTEX(raidPtr->mutex);
1761 raidPtr->openings--;
1762 RF_UNLOCK_MUTEX(raidPtr->mutex);
1763
1764 /*
1765 * Everything is async.
1766 */
1767 do_async = 1;
1768
1769 disk_busy(&rs->sc_dkdev);
1770
1771 /* XXX we're still at splbio() here... do we *really*
1772 need to be? */
1773
1774 /* don't ever condition on bp->b_flags & B_WRITE.
1775 * always condition on B_READ instead */
1776
1777 rc = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
1778 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
1779 do_async, raid_addr, num_blocks,
1780 bp->b_data, bp, RF_DAG_NONBLOCKING_IO);
1781
1782 if (rc) {
1783 bp->b_error = rc;
1784 bp->b_flags |= B_ERROR;
1785 bp->b_resid = bp->b_bcount;
1786 biodone(bp);
1787 /* continue loop */
1788 }
1789
1790 RF_LOCK_MUTEX(raidPtr->mutex);
1791 }
1792 RF_UNLOCK_MUTEX(raidPtr->mutex);
1793 }
1794
1795
1796
1797
1798 /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */
1799
1800 int
1801 rf_DispatchKernelIO(RF_DiskQueue_t *queue, RF_DiskQueueData_t *req)
1802 {
1803 int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
1804 struct buf *bp;
1805 struct raidbuf *raidbp = NULL;
1806
1807 req->queue = queue;
1808
1809 #if DIAGNOSTIC
1810 if (queue->raidPtr->raidid >= numraid) {
1811 printf("Invalid unit number: %d %d\n", queue->raidPtr->raidid,
1812 numraid);
1813 panic("Invalid Unit number in rf_DispatchKernelIO");
1814 }
1815 #endif
1816
1817 bp = req->bp;
1818 #if 1
1819 /* XXX when there is a physical disk failure, someone is passing us a
1820 * buffer that contains old stuff!! Attempt to deal with this problem
1821 * without taking a performance hit... (not sure where the real bug
1822 * is. It's buried in RAIDframe somewhere) :-( GO ) */
1823
1824 if (bp->b_flags & B_ERROR) {
1825 bp->b_flags &= ~B_ERROR;
1826 }
1827 if (bp->b_error != 0) {
1828 bp->b_error = 0;
1829 }
1830 #endif
1831 raidbp = pool_get(&rf_pools.cbuf, PR_NOWAIT);
1832 if (raidbp == NULL) {
1833 bp->b_flags |= B_ERROR;
1834 bp->b_error = ENOMEM;
1835 return (ENOMEM);
1836 }
1837 BUF_INIT(&raidbp->rf_buf);
1838
1839 /*
1840 * context for raidiodone
1841 */
1842 raidbp->rf_obp = bp;
1843 raidbp->req = req;
1844
1845 BIO_COPYPRIO(&raidbp->rf_buf, bp);
1846
1847 switch (req->type) {
1848 case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */
1849 /* XXX need to do something extra here.. */
1850 /* I'm leaving this in, as I've never actually seen it used,
1851 * and I'd like folks to report it... GO */
1852 printf(("WAKEUP CALLED\n"));
1853 queue->numOutstanding++;
1854
1855 /* XXX need to glue the original buffer into this?? */
1856
1857 KernelWakeupFunc(&raidbp->rf_buf);
1858 break;
1859
1860 case RF_IO_TYPE_READ:
1861 case RF_IO_TYPE_WRITE:
1862 #if RF_ACC_TRACE > 0
1863 if (req->tracerec) {
1864 RF_ETIMER_START(req->tracerec->timer);
1865 }
1866 #endif
1867 InitBP(&raidbp->rf_buf, queue->rf_cinfo->ci_vp,
1868 op | bp->b_flags, queue->rf_cinfo->ci_dev,
1869 req->sectorOffset, req->numSector,
1870 req->buf, KernelWakeupFunc, (void *) req,
1871 queue->raidPtr->logBytesPerSector, req->b_proc);
1872
1873 if (rf_debugKernelAccess) {
1874 db1_printf(("dispatch: bp->b_blkno = %ld\n",
1875 (long) bp->b_blkno));
1876 }
1877 queue->numOutstanding++;
1878 queue->last_deq_sector = req->sectorOffset;
1879 /* acc wouldn't have been let in if there were any pending
1880 * reqs at any other priority */
1881 queue->curPriority = req->priority;
1882
1883 db1_printf(("Going for %c to unit %d col %d\n",
1884 req->type, queue->raidPtr->raidid,
1885 queue->col));
1886 db1_printf(("sector %d count %d (%d bytes) %d\n",
1887 (int) req->sectorOffset, (int) req->numSector,
1888 (int) (req->numSector <<
1889 queue->raidPtr->logBytesPerSector),
1890 (int) queue->raidPtr->logBytesPerSector));
1891 if ((raidbp->rf_buf.b_flags & B_READ) == 0) {
1892 raidbp->rf_buf.b_vp->v_numoutput++;
1893 }
1894 VOP_STRATEGY(raidbp->rf_buf.b_vp, &raidbp->rf_buf);
1895
1896 break;
1897
1898 default:
1899 panic("bad req->type in rf_DispatchKernelIO");
1900 }
1901 db1_printf(("Exiting from DispatchKernelIO\n"));
1902
1903 return (0);
1904 }
1905 /* this is the callback function associated with a I/O invoked from
1906 kernel code.
1907 */
1908 static void
1909 KernelWakeupFunc(struct buf *vbp)
1910 {
1911 RF_DiskQueueData_t *req = NULL;
1912 RF_DiskQueue_t *queue;
1913 struct raidbuf *raidbp = (struct raidbuf *) vbp;
1914 struct buf *bp;
1915 int s;
1916
1917 s = splbio();
1918 db1_printf(("recovering the request queue:\n"));
1919 req = raidbp->req;
1920
1921 bp = raidbp->rf_obp;
1922
1923 queue = (RF_DiskQueue_t *) req->queue;
1924
1925 if (raidbp->rf_buf.b_flags & B_ERROR) {
1926 bp->b_flags |= B_ERROR;
1927 bp->b_error = raidbp->rf_buf.b_error ?
1928 raidbp->rf_buf.b_error : EIO;
1929 }
1930
1931 /* XXX methinks this could be wrong... */
1932 #if 1
1933 bp->b_resid = raidbp->rf_buf.b_resid;
1934 #endif
1935 #if RF_ACC_TRACE > 0
1936 if (req->tracerec) {
1937 RF_ETIMER_STOP(req->tracerec->timer);
1938 RF_ETIMER_EVAL(req->tracerec->timer);
1939 RF_LOCK_MUTEX(rf_tracing_mutex);
1940 req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1941 req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1942 req->tracerec->num_phys_ios++;
1943 RF_UNLOCK_MUTEX(rf_tracing_mutex);
1944 }
1945 #endif
1946 bp->b_bcount = raidbp->rf_buf.b_bcount; /* XXXX ?? */
1947
1948 /* XXX Ok, let's get aggressive... If B_ERROR is set, let's go
1949 * ballistic, and mark the component as hosed... */
1950
1951 if (bp->b_flags & B_ERROR) {
1952 /* Mark the disk as dead */
1953 /* but only mark it once... */
1954 /* and only if it wouldn't leave this RAID set
1955 completely broken */
1956 if ((queue->raidPtr->Disks[queue->col].status ==
1957 rf_ds_optimal) && (queue->raidPtr->numFailures <
1958 queue->raidPtr->Layout.map->faultsTolerated)) {
1959 printf("raid%d: IO Error. Marking %s as failed.\n",
1960 queue->raidPtr->raidid,
1961 queue->raidPtr->Disks[queue->col].devname);
1962 queue->raidPtr->Disks[queue->col].status =
1963 rf_ds_failed;
1964 queue->raidPtr->status = rf_rs_degraded;
1965 queue->raidPtr->numFailures++;
1966 queue->raidPtr->numNewFailures++;
1967 } else { /* Disk is already dead... */
1968 /* printf("Disk already marked as dead!\n"); */
1969 }
1970
1971 }
1972
1973 pool_put(&rf_pools.cbuf, raidbp);
1974
1975 /* Fill in the error value */
1976
1977 req->error = (bp->b_flags & B_ERROR) ? bp->b_error : 0;
1978
1979 simple_lock(&queue->raidPtr->iodone_lock);
1980
1981 /* Drop this one on the "finished" queue... */
1982 TAILQ_INSERT_TAIL(&(queue->raidPtr->iodone), req, iodone_entries);
1983
1984 /* Let the raidio thread know there is work to be done. */
1985 wakeup(&(queue->raidPtr->iodone));
1986
1987 simple_unlock(&queue->raidPtr->iodone_lock);
1988
1989 splx(s);
1990 }
1991
1992
1993
1994 /*
1995 * initialize a buf structure for doing an I/O in the kernel.
1996 */
1997 static void
1998 InitBP(struct buf *bp, struct vnode *b_vp, unsigned rw_flag, dev_t dev,
1999 RF_SectorNum_t startSect, RF_SectorCount_t numSect, caddr_t bf,
2000 void (*cbFunc) (struct buf *), void *cbArg, int logBytesPerSector,
2001 struct proc *b_proc)
2002 {
2003 /* bp->b_flags = B_PHYS | rw_flag; */
2004 bp->b_flags = B_CALL | rw_flag; /* XXX need B_PHYS here too??? */
2005 bp->b_bcount = numSect << logBytesPerSector;
2006 bp->b_bufsize = bp->b_bcount;
2007 bp->b_error = 0;
2008 bp->b_dev = dev;
2009 bp->b_data = bf;
2010 bp->b_blkno = startSect;
2011 bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */
2012 if (bp->b_bcount == 0) {
2013 panic("bp->b_bcount is zero in InitBP!!");
2014 }
2015 bp->b_proc = b_proc;
2016 bp->b_iodone = cbFunc;
2017 bp->b_vp = b_vp;
2018
2019 }
2020
2021 static void
2022 raidgetdefaultlabel(RF_Raid_t *raidPtr, struct raid_softc *rs,
2023 struct disklabel *lp)
2024 {
2025 memset(lp, 0, sizeof(*lp));
2026
2027 /* fabricate a label... */
2028 lp->d_secperunit = raidPtr->totalSectors;
2029 lp->d_secsize = raidPtr->bytesPerSector;
2030 lp->d_nsectors = raidPtr->Layout.dataSectorsPerStripe;
2031 lp->d_ntracks = 4 * raidPtr->numCol;
2032 lp->d_ncylinders = raidPtr->totalSectors /
2033 (lp->d_nsectors * lp->d_ntracks);
2034 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
2035
2036 strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
2037 lp->d_type = DTYPE_RAID;
2038 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
2039 lp->d_rpm = 3600;
2040 lp->d_interleave = 1;
2041 lp->d_flags = 0;
2042
2043 lp->d_partitions[RAW_PART].p_offset = 0;
2044 lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors;
2045 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
2046 lp->d_npartitions = RAW_PART + 1;
2047
2048 lp->d_magic = DISKMAGIC;
2049 lp->d_magic2 = DISKMAGIC;
2050 lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
2051
2052 }
2053 /*
2054 * Read the disklabel from the raid device. If one is not present, fake one
2055 * up.
2056 */
2057 static void
2058 raidgetdisklabel(dev_t dev)
2059 {
2060 int unit = raidunit(dev);
2061 struct raid_softc *rs = &raid_softc[unit];
2062 const char *errstring;
2063 struct disklabel *lp = rs->sc_dkdev.dk_label;
2064 struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel;
2065 RF_Raid_t *raidPtr;
2066
2067 db1_printf(("Getting the disklabel...\n"));
2068
2069 memset(clp, 0, sizeof(*clp));
2070
2071 raidPtr = raidPtrs[unit];
2072
2073 raidgetdefaultlabel(raidPtr, rs, lp);
2074
2075 /*
2076 * Call the generic disklabel extraction routine.
2077 */
2078 errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
2079 rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
2080 if (errstring)
2081 raidmakedisklabel(rs);
2082 else {
2083 int i;
2084 struct partition *pp;
2085
2086 /*
2087 * Sanity check whether the found disklabel is valid.
2088 *
2089 * This is necessary since total size of the raid device
2090 * may vary when an interleave is changed even though exactly
2091 * same componets are used, and old disklabel may used
2092 * if that is found.
2093 */
2094 if (lp->d_secperunit != rs->sc_size)
2095 printf("raid%d: WARNING: %s: "
2096 "total sector size in disklabel (%d) != "
2097 "the size of raid (%ld)\n", unit, rs->sc_xname,
2098 lp->d_secperunit, (long) rs->sc_size);
2099 for (i = 0; i < lp->d_npartitions; i++) {
2100 pp = &lp->d_partitions[i];
2101 if (pp->p_offset + pp->p_size > rs->sc_size)
2102 printf("raid%d: WARNING: %s: end of partition `%c' "
2103 "exceeds the size of raid (%ld)\n",
2104 unit, rs->sc_xname, 'a' + i, (long) rs->sc_size);
2105 }
2106 }
2107
2108 }
2109 /*
2110 * Take care of things one might want to take care of in the event
2111 * that a disklabel isn't present.
2112 */
2113 static void
2114 raidmakedisklabel(struct raid_softc *rs)
2115 {
2116 struct disklabel *lp = rs->sc_dkdev.dk_label;
2117 db1_printf(("Making a label..\n"));
2118
2119 /*
2120 * For historical reasons, if there's no disklabel present
2121 * the raw partition must be marked FS_BSDFFS.
2122 */
2123
2124 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
2125
2126 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
2127
2128 lp->d_checksum = dkcksum(lp);
2129 }
2130 /*
2131 * Lookup the provided name in the filesystem. If the file exists,
2132 * is a valid block device, and isn't being used by anyone else,
2133 * set *vpp to the file's vnode.
2134 * You'll find the original of this in ccd.c
2135 */
2136 int
2137 raidlookup(char *path, struct proc *p, struct vnode **vpp)
2138 {
2139 struct nameidata nd;
2140 struct vnode *vp;
2141 struct vattr va;
2142 int error;
2143
2144 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, path, p);
2145 if ((error = vn_open(&nd, FREAD | FWRITE, 0)) != 0) {
2146 return (error);
2147 }
2148 vp = nd.ni_vp;
2149 if (vp->v_usecount > 1) {
2150 VOP_UNLOCK(vp, 0);
2151 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2152 return (EBUSY);
2153 }
2154 if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)) != 0) {
2155 VOP_UNLOCK(vp, 0);
2156 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2157 return (error);
2158 }
2159 /* XXX: eventually we should handle VREG, too. */
2160 if (va.va_type != VBLK) {
2161 VOP_UNLOCK(vp, 0);
2162 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2163 return (ENOTBLK);
2164 }
2165 VOP_UNLOCK(vp, 0);
2166 *vpp = vp;
2167 return (0);
2168 }
2169 /*
2170 * Wait interruptibly for an exclusive lock.
2171 *
2172 * XXX
2173 * Several drivers do this; it should be abstracted and made MP-safe.
2174 * (Hmm... where have we seen this warning before :-> GO )
2175 */
2176 static int
2177 raidlock(struct raid_softc *rs)
2178 {
2179 int error;
2180
2181 while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
2182 rs->sc_flags |= RAIDF_WANTED;
2183 if ((error =
2184 tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
2185 return (error);
2186 }
2187 rs->sc_flags |= RAIDF_LOCKED;
2188 return (0);
2189 }
2190 /*
2191 * Unlock and wake up any waiters.
2192 */
2193 static void
2194 raidunlock(struct raid_softc *rs)
2195 {
2196
2197 rs->sc_flags &= ~RAIDF_LOCKED;
2198 if ((rs->sc_flags & RAIDF_WANTED) != 0) {
2199 rs->sc_flags &= ~RAIDF_WANTED;
2200 wakeup(rs);
2201 }
2202 }
2203
2204
2205 #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */
2206 #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */
2207
2208 int
2209 raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter)
2210 {
2211 RF_ComponentLabel_t clabel;
2212 raidread_component_label(dev, b_vp, &clabel);
2213 clabel.mod_counter = mod_counter;
2214 clabel.clean = RF_RAID_CLEAN;
2215 raidwrite_component_label(dev, b_vp, &clabel);
2216 return(0);
2217 }
2218
2219
2220 int
2221 raidmarkdirty(dev_t dev, struct vnode *b_vp, int mod_counter)
2222 {
2223 RF_ComponentLabel_t clabel;
2224 raidread_component_label(dev, b_vp, &clabel);
2225 clabel.mod_counter = mod_counter;
2226 clabel.clean = RF_RAID_DIRTY;
2227 raidwrite_component_label(dev, b_vp, &clabel);
2228 return(0);
2229 }
2230
2231 /* ARGSUSED */
2232 int
2233 raidread_component_label(dev_t dev, struct vnode *b_vp,
2234 RF_ComponentLabel_t *clabel)
2235 {
2236 struct buf *bp;
2237 const struct bdevsw *bdev;
2238 int error;
2239
2240 /* XXX should probably ensure that we don't try to do this if
2241 someone has changed rf_protected_sectors. */
2242
2243 if (b_vp == NULL) {
2244 /* For whatever reason, this component is not valid.
2245 Don't try to read a component label from it. */
2246 return(EINVAL);
2247 }
2248
2249 /* get a block of the appropriate size... */
2250 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2251 bp->b_dev = dev;
2252
2253 /* get our ducks in a row for the read */
2254 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2255 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2256 bp->b_flags |= B_READ;
2257 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2258
2259 bdev = bdevsw_lookup(bp->b_dev);
2260 if (bdev == NULL)
2261 return (ENXIO);
2262 (*bdev->d_strategy)(bp);
2263
2264 error = biowait(bp);
2265
2266 if (!error) {
2267 memcpy(clabel, bp->b_data,
2268 sizeof(RF_ComponentLabel_t));
2269 }
2270
2271 brelse(bp);
2272 return(error);
2273 }
2274 /* ARGSUSED */
2275 int
2276 raidwrite_component_label(dev_t dev, struct vnode *b_vp,
2277 RF_ComponentLabel_t *clabel)
2278 {
2279 struct buf *bp;
2280 const struct bdevsw *bdev;
2281 int error;
2282
2283 /* get a block of the appropriate size... */
2284 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2285 bp->b_dev = dev;
2286
2287 /* get our ducks in a row for the write */
2288 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2289 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2290 bp->b_flags |= B_WRITE;
2291 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2292
2293 memset(bp->b_data, 0, RF_COMPONENT_INFO_SIZE );
2294
2295 memcpy(bp->b_data, clabel, sizeof(RF_ComponentLabel_t));
2296
2297 bdev = bdevsw_lookup(bp->b_dev);
2298 if (bdev == NULL)
2299 return (ENXIO);
2300 (*bdev->d_strategy)(bp);
2301 error = biowait(bp);
2302 brelse(bp);
2303 if (error) {
2304 #if 1
2305 printf("Failed to write RAID component info!\n");
2306 #endif
2307 }
2308
2309 return(error);
2310 }
2311
2312 void
2313 rf_markalldirty(RF_Raid_t *raidPtr)
2314 {
2315 RF_ComponentLabel_t clabel;
2316 int sparecol;
2317 int c;
2318 int j;
2319 int scol = -1;
2320
2321 raidPtr->mod_counter++;
2322 for (c = 0; c < raidPtr->numCol; c++) {
2323 /* we don't want to touch (at all) a disk that has
2324 failed */
2325 if (!RF_DEAD_DISK(raidPtr->Disks[c].status)) {
2326 raidread_component_label(
2327 raidPtr->Disks[c].dev,
2328 raidPtr->raid_cinfo[c].ci_vp,
2329 &clabel);
2330 if (clabel.status == rf_ds_spared) {
2331 /* XXX do something special...
2332 but whatever you do, don't
2333 try to access it!! */
2334 } else {
2335 raidmarkdirty(
2336 raidPtr->Disks[c].dev,
2337 raidPtr->raid_cinfo[c].ci_vp,
2338 raidPtr->mod_counter);
2339 }
2340 }
2341 }
2342
2343 for( c = 0; c < raidPtr->numSpare ; c++) {
2344 sparecol = raidPtr->numCol + c;
2345 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
2346 /*
2347
2348 we claim this disk is "optimal" if it's
2349 rf_ds_used_spare, as that means it should be
2350 directly substitutable for the disk it replaced.
2351 We note that too...
2352
2353 */
2354
2355 for(j=0;j<raidPtr->numCol;j++) {
2356 if (raidPtr->Disks[j].spareCol == sparecol) {
2357 scol = j;
2358 break;
2359 }
2360 }
2361
2362 raidread_component_label(
2363 raidPtr->Disks[sparecol].dev,
2364 raidPtr->raid_cinfo[sparecol].ci_vp,
2365 &clabel);
2366 /* make sure status is noted */
2367
2368 raid_init_component_label(raidPtr, &clabel);
2369
2370 clabel.row = 0;
2371 clabel.column = scol;
2372 /* Note: we *don't* change status from rf_ds_used_spare
2373 to rf_ds_optimal */
2374 /* clabel.status = rf_ds_optimal; */
2375
2376 raidmarkdirty(raidPtr->Disks[sparecol].dev,
2377 raidPtr->raid_cinfo[sparecol].ci_vp,
2378 raidPtr->mod_counter);
2379 }
2380 }
2381 }
2382
2383
2384 void
2385 rf_update_component_labels(RF_Raid_t *raidPtr, int final)
2386 {
2387 RF_ComponentLabel_t clabel;
2388 int sparecol;
2389 int c;
2390 int j;
2391 int scol;
2392
2393 scol = -1;
2394
2395 /* XXX should do extra checks to make sure things really are clean,
2396 rather than blindly setting the clean bit... */
2397
2398 raidPtr->mod_counter++;
2399
2400 for (c = 0; c < raidPtr->numCol; c++) {
2401 if (raidPtr->Disks[c].status == rf_ds_optimal) {
2402 raidread_component_label(
2403 raidPtr->Disks[c].dev,
2404 raidPtr->raid_cinfo[c].ci_vp,
2405 &clabel);
2406 /* make sure status is noted */
2407 clabel.status = rf_ds_optimal;
2408 /* bump the counter */
2409 clabel.mod_counter = raidPtr->mod_counter;
2410
2411 raidwrite_component_label(
2412 raidPtr->Disks[c].dev,
2413 raidPtr->raid_cinfo[c].ci_vp,
2414 &clabel);
2415 if (final == RF_FINAL_COMPONENT_UPDATE) {
2416 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2417 raidmarkclean(
2418 raidPtr->Disks[c].dev,
2419 raidPtr->raid_cinfo[c].ci_vp,
2420 raidPtr->mod_counter);
2421 }
2422 }
2423 }
2424 /* else we don't touch it.. */
2425 }
2426
2427 for( c = 0; c < raidPtr->numSpare ; c++) {
2428 sparecol = raidPtr->numCol + c;
2429 /* Need to ensure that the reconstruct actually completed! */
2430 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
2431 /*
2432
2433 we claim this disk is "optimal" if it's
2434 rf_ds_used_spare, as that means it should be
2435 directly substitutable for the disk it replaced.
2436 We note that too...
2437
2438 */
2439
2440 for(j=0;j<raidPtr->numCol;j++) {
2441 if (raidPtr->Disks[j].spareCol == sparecol) {
2442 scol = j;
2443 break;
2444 }
2445 }
2446
2447 /* XXX shouldn't *really* need this... */
2448 raidread_component_label(
2449 raidPtr->Disks[sparecol].dev,
2450 raidPtr->raid_cinfo[sparecol].ci_vp,
2451 &clabel);
2452 /* make sure status is noted */
2453
2454 raid_init_component_label(raidPtr, &clabel);
2455
2456 clabel.mod_counter = raidPtr->mod_counter;
2457 clabel.column = scol;
2458 clabel.status = rf_ds_optimal;
2459
2460 raidwrite_component_label(
2461 raidPtr->Disks[sparecol].dev,
2462 raidPtr->raid_cinfo[sparecol].ci_vp,
2463 &clabel);
2464 if (final == RF_FINAL_COMPONENT_UPDATE) {
2465 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2466 raidmarkclean( raidPtr->Disks[sparecol].dev,
2467 raidPtr->raid_cinfo[sparecol].ci_vp,
2468 raidPtr->mod_counter);
2469 }
2470 }
2471 }
2472 }
2473 }
2474
2475 void
2476 rf_close_component(RF_Raid_t *raidPtr, struct vnode *vp, int auto_configured)
2477 {
2478 struct proc *p;
2479
2480 p = raidPtr->engine_thread;
2481
2482 if (vp != NULL) {
2483 if (auto_configured == 1) {
2484 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2485 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
2486 vput(vp);
2487
2488 } else {
2489 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2490 }
2491 }
2492 }
2493
2494
2495 void
2496 rf_UnconfigureVnodes(RF_Raid_t *raidPtr)
2497 {
2498 int r,c;
2499 struct vnode *vp;
2500 int acd;
2501
2502
2503 /* We take this opportunity to close the vnodes like we should.. */
2504
2505 for (c = 0; c < raidPtr->numCol; c++) {
2506 vp = raidPtr->raid_cinfo[c].ci_vp;
2507 acd = raidPtr->Disks[c].auto_configured;
2508 rf_close_component(raidPtr, vp, acd);
2509 raidPtr->raid_cinfo[c].ci_vp = NULL;
2510 raidPtr->Disks[c].auto_configured = 0;
2511 }
2512
2513 for (r = 0; r < raidPtr->numSpare; r++) {
2514 vp = raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp;
2515 acd = raidPtr->Disks[raidPtr->numCol + r].auto_configured;
2516 rf_close_component(raidPtr, vp, acd);
2517 raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp = NULL;
2518 raidPtr->Disks[raidPtr->numCol + r].auto_configured = 0;
2519 }
2520 }
2521
2522
2523 void
2524 rf_ReconThread(struct rf_recon_req *req)
2525 {
2526 int s;
2527 RF_Raid_t *raidPtr;
2528
2529 s = splbio();
2530 raidPtr = (RF_Raid_t *) req->raidPtr;
2531 raidPtr->recon_in_progress = 1;
2532
2533 rf_FailDisk((RF_Raid_t *) req->raidPtr, req->col,
2534 ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
2535
2536 RF_Free(req, sizeof(*req));
2537
2538 raidPtr->recon_in_progress = 0;
2539 splx(s);
2540
2541 /* That's all... */
2542 kthread_exit(0); /* does not return */
2543 }
2544
2545 void
2546 rf_RewriteParityThread(RF_Raid_t *raidPtr)
2547 {
2548 int retcode;
2549 int s;
2550
2551 raidPtr->parity_rewrite_stripes_done = 0;
2552 raidPtr->parity_rewrite_in_progress = 1;
2553 s = splbio();
2554 retcode = rf_RewriteParity(raidPtr);
2555 splx(s);
2556 if (retcode) {
2557 printf("raid%d: Error re-writing parity!\n",raidPtr->raidid);
2558 } else {
2559 /* set the clean bit! If we shutdown correctly,
2560 the clean bit on each component label will get
2561 set */
2562 raidPtr->parity_good = RF_RAID_CLEAN;
2563 }
2564 raidPtr->parity_rewrite_in_progress = 0;
2565
2566 /* Anyone waiting for us to stop? If so, inform them... */
2567 if (raidPtr->waitShutdown) {
2568 wakeup(&raidPtr->parity_rewrite_in_progress);
2569 }
2570
2571 /* That's all... */
2572 kthread_exit(0); /* does not return */
2573 }
2574
2575
2576 void
2577 rf_CopybackThread(RF_Raid_t *raidPtr)
2578 {
2579 int s;
2580
2581 raidPtr->copyback_in_progress = 1;
2582 s = splbio();
2583 rf_CopybackReconstructedData(raidPtr);
2584 splx(s);
2585 raidPtr->copyback_in_progress = 0;
2586
2587 /* That's all... */
2588 kthread_exit(0); /* does not return */
2589 }
2590
2591
2592 void
2593 rf_ReconstructInPlaceThread(struct rf_recon_req *req)
2594 {
2595 int s;
2596 RF_Raid_t *raidPtr;
2597
2598 s = splbio();
2599 raidPtr = req->raidPtr;
2600 raidPtr->recon_in_progress = 1;
2601 rf_ReconstructInPlace(raidPtr, req->col);
2602 RF_Free(req, sizeof(*req));
2603 raidPtr->recon_in_progress = 0;
2604 splx(s);
2605
2606 /* That's all... */
2607 kthread_exit(0); /* does not return */
2608 }
2609
2610 RF_AutoConfig_t *
2611 rf_find_raid_components()
2612 {
2613 struct vnode *vp;
2614 struct disklabel label;
2615 struct device *dv;
2616 dev_t dev;
2617 int bmajor;
2618 int error;
2619 int i;
2620 int good_one;
2621 RF_ComponentLabel_t *clabel;
2622 RF_AutoConfig_t *ac_list;
2623 RF_AutoConfig_t *ac;
2624
2625
2626 /* initialize the AutoConfig list */
2627 ac_list = NULL;
2628
2629 /* we begin by trolling through *all* the devices on the system */
2630
2631 for (dv = alldevs.tqh_first; dv != NULL;
2632 dv = dv->dv_list.tqe_next) {
2633
2634 /* we are only interested in disks... */
2635 if (dv->dv_class != DV_DISK)
2636 continue;
2637
2638 /* we don't care about floppies... */
2639 if (!strcmp(dv->dv_cfdata->cf_name,"fd")) {
2640 continue;
2641 }
2642
2643 /* we don't care about CD's... */
2644 if (!strcmp(dv->dv_cfdata->cf_name,"cd")) {
2645 continue;
2646 }
2647
2648 /* hdfd is the Atari/Hades floppy driver */
2649 if (!strcmp(dv->dv_cfdata->cf_name,"hdfd")) {
2650 continue;
2651 }
2652 /* fdisa is the Atari/Milan floppy driver */
2653 if (!strcmp(dv->dv_cfdata->cf_name,"fdisa")) {
2654 continue;
2655 }
2656
2657 /* need to find the device_name_to_block_device_major stuff */
2658 bmajor = devsw_name2blk(dv->dv_xname, NULL, 0);
2659
2660 /* get a vnode for the raw partition of this disk */
2661
2662 dev = MAKEDISKDEV(bmajor, dv->dv_unit, RAW_PART);
2663 if (bdevvp(dev, &vp))
2664 panic("RAID can't alloc vnode");
2665
2666 error = VOP_OPEN(vp, FREAD, NOCRED, 0);
2667
2668 if (error) {
2669 /* "Who cares." Continue looking
2670 for something that exists*/
2671 vput(vp);
2672 continue;
2673 }
2674
2675 /* Ok, the disk exists. Go get the disklabel. */
2676 error = VOP_IOCTL(vp, DIOCGDINFO, &label, FREAD, NOCRED, 0);
2677 if (error) {
2678 /*
2679 * XXX can't happen - open() would
2680 * have errored out (or faked up one)
2681 */
2682 if (error != ENOTTY)
2683 printf("RAIDframe: can't get label for dev "
2684 "%s (%d)\n", dv->dv_xname, error);
2685 }
2686
2687 /* don't need this any more. We'll allocate it again
2688 a little later if we really do... */
2689 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2690 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
2691 vput(vp);
2692
2693 if (error)
2694 continue;
2695
2696 for (i=0; i < label.d_npartitions; i++) {
2697 /* We only support partitions marked as RAID */
2698 if (label.d_partitions[i].p_fstype != FS_RAID)
2699 continue;
2700
2701 dev = MAKEDISKDEV(bmajor, dv->dv_unit, i);
2702 if (bdevvp(dev, &vp))
2703 panic("RAID can't alloc vnode");
2704
2705 error = VOP_OPEN(vp, FREAD, NOCRED, 0);
2706 if (error) {
2707 /* Whatever... */
2708 vput(vp);
2709 continue;
2710 }
2711
2712 good_one = 0;
2713
2714 clabel = (RF_ComponentLabel_t *)
2715 malloc(sizeof(RF_ComponentLabel_t),
2716 M_RAIDFRAME, M_NOWAIT);
2717 if (clabel == NULL) {
2718 /* XXX CLEANUP HERE */
2719 printf("RAID auto config: out of memory!\n");
2720 return(NULL); /* XXX probably should panic? */
2721 }
2722
2723 if (!raidread_component_label(dev, vp, clabel)) {
2724 /* Got the label. Does it look reasonable? */
2725 if (rf_reasonable_label(clabel) &&
2726 (clabel->partitionSize <=
2727 label.d_partitions[i].p_size)) {
2728 #if DEBUG
2729 printf("Component on: %s%c: %d\n",
2730 dv->dv_xname, 'a'+i,
2731 label.d_partitions[i].p_size);
2732 rf_print_component_label(clabel);
2733 #endif
2734 /* if it's reasonable, add it,
2735 else ignore it. */
2736 ac = (RF_AutoConfig_t *)
2737 malloc(sizeof(RF_AutoConfig_t),
2738 M_RAIDFRAME,
2739 M_NOWAIT);
2740 if (ac == NULL) {
2741 /* XXX should panic?? */
2742 return(NULL);
2743 }
2744
2745 snprintf(ac->devname,
2746 sizeof(ac->devname), "%s%c",
2747 dv->dv_xname, 'a'+i);
2748 ac->dev = dev;
2749 ac->vp = vp;
2750 ac->clabel = clabel;
2751 ac->next = ac_list;
2752 ac_list = ac;
2753 good_one = 1;
2754 }
2755 }
2756 if (!good_one) {
2757 /* cleanup */
2758 free(clabel, M_RAIDFRAME);
2759 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2760 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
2761 vput(vp);
2762 }
2763 }
2764 }
2765 return(ac_list);
2766 }
2767
2768 static int
2769 rf_reasonable_label(RF_ComponentLabel_t *clabel)
2770 {
2771
2772 if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) ||
2773 (clabel->version==RF_COMPONENT_LABEL_VERSION)) &&
2774 ((clabel->clean == RF_RAID_CLEAN) ||
2775 (clabel->clean == RF_RAID_DIRTY)) &&
2776 clabel->row >=0 &&
2777 clabel->column >= 0 &&
2778 clabel->num_rows > 0 &&
2779 clabel->num_columns > 0 &&
2780 clabel->row < clabel->num_rows &&
2781 clabel->column < clabel->num_columns &&
2782 clabel->blockSize > 0 &&
2783 clabel->numBlocks > 0) {
2784 /* label looks reasonable enough... */
2785 return(1);
2786 }
2787 return(0);
2788 }
2789
2790
2791 #if DEBUG
2792 void
2793 rf_print_component_label(RF_ComponentLabel_t *clabel)
2794 {
2795 printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
2796 clabel->row, clabel->column,
2797 clabel->num_rows, clabel->num_columns);
2798 printf(" Version: %d Serial Number: %d Mod Counter: %d\n",
2799 clabel->version, clabel->serial_number,
2800 clabel->mod_counter);
2801 printf(" Clean: %s Status: %d\n",
2802 clabel->clean ? "Yes" : "No", clabel->status );
2803 printf(" sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
2804 clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
2805 printf(" RAID Level: %c blocksize: %d numBlocks: %d\n",
2806 (char) clabel->parityConfig, clabel->blockSize,
2807 clabel->numBlocks);
2808 printf(" Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No" );
2809 printf(" Contains root partition: %s\n",
2810 clabel->root_partition ? "Yes" : "No" );
2811 printf(" Last configured as: raid%d\n", clabel->last_unit );
2812 #if 0
2813 printf(" Config order: %d\n", clabel->config_order);
2814 #endif
2815
2816 }
2817 #endif
2818
2819 RF_ConfigSet_t *
2820 rf_create_auto_sets(RF_AutoConfig_t *ac_list)
2821 {
2822 RF_AutoConfig_t *ac;
2823 RF_ConfigSet_t *config_sets;
2824 RF_ConfigSet_t *cset;
2825 RF_AutoConfig_t *ac_next;
2826
2827
2828 config_sets = NULL;
2829
2830 /* Go through the AutoConfig list, and figure out which components
2831 belong to what sets. */
2832 ac = ac_list;
2833 while(ac!=NULL) {
2834 /* we're going to putz with ac->next, so save it here
2835 for use at the end of the loop */
2836 ac_next = ac->next;
2837
2838 if (config_sets == NULL) {
2839 /* will need at least this one... */
2840 config_sets = (RF_ConfigSet_t *)
2841 malloc(sizeof(RF_ConfigSet_t),
2842 M_RAIDFRAME, M_NOWAIT);
2843 if (config_sets == NULL) {
2844 panic("rf_create_auto_sets: No memory!");
2845 }
2846 /* this one is easy :) */
2847 config_sets->ac = ac;
2848 config_sets->next = NULL;
2849 config_sets->rootable = 0;
2850 ac->next = NULL;
2851 } else {
2852 /* which set does this component fit into? */
2853 cset = config_sets;
2854 while(cset!=NULL) {
2855 if (rf_does_it_fit(cset, ac)) {
2856 /* looks like it matches... */
2857 ac->next = cset->ac;
2858 cset->ac = ac;
2859 break;
2860 }
2861 cset = cset->next;
2862 }
2863 if (cset==NULL) {
2864 /* didn't find a match above... new set..*/
2865 cset = (RF_ConfigSet_t *)
2866 malloc(sizeof(RF_ConfigSet_t),
2867 M_RAIDFRAME, M_NOWAIT);
2868 if (cset == NULL) {
2869 panic("rf_create_auto_sets: No memory!");
2870 }
2871 cset->ac = ac;
2872 ac->next = NULL;
2873 cset->next = config_sets;
2874 cset->rootable = 0;
2875 config_sets = cset;
2876 }
2877 }
2878 ac = ac_next;
2879 }
2880
2881
2882 return(config_sets);
2883 }
2884
2885 static int
2886 rf_does_it_fit(RF_ConfigSet_t *cset, RF_AutoConfig_t *ac)
2887 {
2888 RF_ComponentLabel_t *clabel1, *clabel2;
2889
2890 /* If this one matches the *first* one in the set, that's good
2891 enough, since the other members of the set would have been
2892 through here too... */
2893 /* note that we are not checking partitionSize here..
2894
2895 Note that we are also not checking the mod_counters here.
2896 If everything else matches execpt the mod_counter, that's
2897 good enough for this test. We will deal with the mod_counters
2898 a little later in the autoconfiguration process.
2899
2900 (clabel1->mod_counter == clabel2->mod_counter) &&
2901
2902 The reason we don't check for this is that failed disks
2903 will have lower modification counts. If those disks are
2904 not added to the set they used to belong to, then they will
2905 form their own set, which may result in 2 different sets,
2906 for example, competing to be configured at raid0, and
2907 perhaps competing to be the root filesystem set. If the
2908 wrong ones get configured, or both attempt to become /,
2909 weird behaviour and or serious lossage will occur. Thus we
2910 need to bring them into the fold here, and kick them out at
2911 a later point.
2912
2913 */
2914
2915 clabel1 = cset->ac->clabel;
2916 clabel2 = ac->clabel;
2917 if ((clabel1->version == clabel2->version) &&
2918 (clabel1->serial_number == clabel2->serial_number) &&
2919 (clabel1->num_rows == clabel2->num_rows) &&
2920 (clabel1->num_columns == clabel2->num_columns) &&
2921 (clabel1->sectPerSU == clabel2->sectPerSU) &&
2922 (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
2923 (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
2924 (clabel1->parityConfig == clabel2->parityConfig) &&
2925 (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
2926 (clabel1->blockSize == clabel2->blockSize) &&
2927 (clabel1->numBlocks == clabel2->numBlocks) &&
2928 (clabel1->autoconfigure == clabel2->autoconfigure) &&
2929 (clabel1->root_partition == clabel2->root_partition) &&
2930 (clabel1->last_unit == clabel2->last_unit) &&
2931 (clabel1->config_order == clabel2->config_order)) {
2932 /* if it get's here, it almost *has* to be a match */
2933 } else {
2934 /* it's not consistent with somebody in the set..
2935 punt */
2936 return(0);
2937 }
2938 /* all was fine.. it must fit... */
2939 return(1);
2940 }
2941
2942 int
2943 rf_have_enough_components(RF_ConfigSet_t *cset)
2944 {
2945 RF_AutoConfig_t *ac;
2946 RF_AutoConfig_t *auto_config;
2947 RF_ComponentLabel_t *clabel;
2948 int c;
2949 int num_cols;
2950 int num_missing;
2951 int mod_counter;
2952 int mod_counter_found;
2953 int even_pair_failed;
2954 char parity_type;
2955
2956
2957 /* check to see that we have enough 'live' components
2958 of this set. If so, we can configure it if necessary */
2959
2960 num_cols = cset->ac->clabel->num_columns;
2961 parity_type = cset->ac->clabel->parityConfig;
2962
2963 /* XXX Check for duplicate components!?!?!? */
2964
2965 /* Determine what the mod_counter is supposed to be for this set. */
2966
2967 mod_counter_found = 0;
2968 mod_counter = 0;
2969 ac = cset->ac;
2970 while(ac!=NULL) {
2971 if (mod_counter_found==0) {
2972 mod_counter = ac->clabel->mod_counter;
2973 mod_counter_found = 1;
2974 } else {
2975 if (ac->clabel->mod_counter > mod_counter) {
2976 mod_counter = ac->clabel->mod_counter;
2977 }
2978 }
2979 ac = ac->next;
2980 }
2981
2982 num_missing = 0;
2983 auto_config = cset->ac;
2984
2985 even_pair_failed = 0;
2986 for(c=0; c<num_cols; c++) {
2987 ac = auto_config;
2988 while(ac!=NULL) {
2989 if ((ac->clabel->column == c) &&
2990 (ac->clabel->mod_counter == mod_counter)) {
2991 /* it's this one... */
2992 #if DEBUG
2993 printf("Found: %s at %d\n",
2994 ac->devname,c);
2995 #endif
2996 break;
2997 }
2998 ac=ac->next;
2999 }
3000 if (ac==NULL) {
3001 /* Didn't find one here! */
3002 /* special case for RAID 1, especially
3003 where there are more than 2
3004 components (where RAIDframe treats
3005 things a little differently :( ) */
3006 if (parity_type == '1') {
3007 if (c%2 == 0) { /* even component */
3008 even_pair_failed = 1;
3009 } else { /* odd component. If
3010 we're failed, and
3011 so is the even
3012 component, it's
3013 "Good Night, Charlie" */
3014 if (even_pair_failed == 1) {
3015 return(0);
3016 }
3017 }
3018 } else {
3019 /* normal accounting */
3020 num_missing++;
3021 }
3022 }
3023 if ((parity_type == '1') && (c%2 == 1)) {
3024 /* Just did an even component, and we didn't
3025 bail.. reset the even_pair_failed flag,
3026 and go on to the next component.... */
3027 even_pair_failed = 0;
3028 }
3029 }
3030
3031 clabel = cset->ac->clabel;
3032
3033 if (((clabel->parityConfig == '0') && (num_missing > 0)) ||
3034 ((clabel->parityConfig == '4') && (num_missing > 1)) ||
3035 ((clabel->parityConfig == '5') && (num_missing > 1))) {
3036 /* XXX this needs to be made *much* more general */
3037 /* Too many failures */
3038 return(0);
3039 }
3040 /* otherwise, all is well, and we've got enough to take a kick
3041 at autoconfiguring this set */
3042 return(1);
3043 }
3044
3045 void
3046 rf_create_configuration(RF_AutoConfig_t *ac, RF_Config_t *config,
3047 RF_Raid_t *raidPtr)
3048 {
3049 RF_ComponentLabel_t *clabel;
3050 int i;
3051
3052 clabel = ac->clabel;
3053
3054 /* 1. Fill in the common stuff */
3055 config->numRow = clabel->num_rows = 1;
3056 config->numCol = clabel->num_columns;
3057 config->numSpare = 0; /* XXX should this be set here? */
3058 config->sectPerSU = clabel->sectPerSU;
3059 config->SUsPerPU = clabel->SUsPerPU;
3060 config->SUsPerRU = clabel->SUsPerRU;
3061 config->parityConfig = clabel->parityConfig;
3062 /* XXX... */
3063 strcpy(config->diskQueueType,"fifo");
3064 config->maxOutstandingDiskReqs = clabel->maxOutstanding;
3065 config->layoutSpecificSize = 0; /* XXX ?? */
3066
3067 while(ac!=NULL) {
3068 /* row/col values will be in range due to the checks
3069 in reasonable_label() */
3070 strcpy(config->devnames[0][ac->clabel->column],
3071 ac->devname);
3072 ac = ac->next;
3073 }
3074
3075 for(i=0;i<RF_MAXDBGV;i++) {
3076 config->debugVars[i][0] = 0;
3077 }
3078 }
3079
3080 int
3081 rf_set_autoconfig(RF_Raid_t *raidPtr, int new_value)
3082 {
3083 RF_ComponentLabel_t clabel;
3084 struct vnode *vp;
3085 dev_t dev;
3086 int column;
3087 int sparecol;
3088
3089 raidPtr->autoconfigure = new_value;
3090
3091 for(column=0; column<raidPtr->numCol; column++) {
3092 if (raidPtr->Disks[column].status == rf_ds_optimal) {
3093 dev = raidPtr->Disks[column].dev;
3094 vp = raidPtr->raid_cinfo[column].ci_vp;
3095 raidread_component_label(dev, vp, &clabel);
3096 clabel.autoconfigure = new_value;
3097 raidwrite_component_label(dev, vp, &clabel);
3098 }
3099 }
3100 for(column = 0; column < raidPtr->numSpare ; column++) {
3101 sparecol = raidPtr->numCol + column;
3102 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3103 dev = raidPtr->Disks[sparecol].dev;
3104 vp = raidPtr->raid_cinfo[sparecol].ci_vp;
3105 raidread_component_label(dev, vp, &clabel);
3106 clabel.autoconfigure = new_value;
3107 raidwrite_component_label(dev, vp, &clabel);
3108 }
3109 }
3110 return(new_value);
3111 }
3112
3113 int
3114 rf_set_rootpartition(RF_Raid_t *raidPtr, int new_value)
3115 {
3116 RF_ComponentLabel_t clabel;
3117 struct vnode *vp;
3118 dev_t dev;
3119 int column;
3120 int sparecol;
3121
3122 raidPtr->root_partition = new_value;
3123 for(column=0; column<raidPtr->numCol; column++) {
3124 if (raidPtr->Disks[column].status == rf_ds_optimal) {
3125 dev = raidPtr->Disks[column].dev;
3126 vp = raidPtr->raid_cinfo[column].ci_vp;
3127 raidread_component_label(dev, vp, &clabel);
3128 clabel.root_partition = new_value;
3129 raidwrite_component_label(dev, vp, &clabel);
3130 }
3131 }
3132 for(column = 0; column < raidPtr->numSpare ; column++) {
3133 sparecol = raidPtr->numCol + column;
3134 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3135 dev = raidPtr->Disks[sparecol].dev;
3136 vp = raidPtr->raid_cinfo[sparecol].ci_vp;
3137 raidread_component_label(dev, vp, &clabel);
3138 clabel.root_partition = new_value;
3139 raidwrite_component_label(dev, vp, &clabel);
3140 }
3141 }
3142 return(new_value);
3143 }
3144
3145 void
3146 rf_release_all_vps(RF_ConfigSet_t *cset)
3147 {
3148 RF_AutoConfig_t *ac;
3149
3150 ac = cset->ac;
3151 while(ac!=NULL) {
3152 /* Close the vp, and give it back */
3153 if (ac->vp) {
3154 vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY);
3155 VOP_CLOSE(ac->vp, FREAD, NOCRED, 0);
3156 vput(ac->vp);
3157 ac->vp = NULL;
3158 }
3159 ac = ac->next;
3160 }
3161 }
3162
3163
3164 void
3165 rf_cleanup_config_set(RF_ConfigSet_t *cset)
3166 {
3167 RF_AutoConfig_t *ac;
3168 RF_AutoConfig_t *next_ac;
3169
3170 ac = cset->ac;
3171 while(ac!=NULL) {
3172 next_ac = ac->next;
3173 /* nuke the label */
3174 free(ac->clabel, M_RAIDFRAME);
3175 /* cleanup the config structure */
3176 free(ac, M_RAIDFRAME);
3177 /* "next.." */
3178 ac = next_ac;
3179 }
3180 /* and, finally, nuke the config set */
3181 free(cset, M_RAIDFRAME);
3182 }
3183
3184
3185 void
3186 raid_init_component_label(RF_Raid_t *raidPtr, RF_ComponentLabel_t *clabel)
3187 {
3188 /* current version number */
3189 clabel->version = RF_COMPONENT_LABEL_VERSION;
3190 clabel->serial_number = raidPtr->serial_number;
3191 clabel->mod_counter = raidPtr->mod_counter;
3192 clabel->num_rows = 1;
3193 clabel->num_columns = raidPtr->numCol;
3194 clabel->clean = RF_RAID_DIRTY; /* not clean */
3195 clabel->status = rf_ds_optimal; /* "It's good!" */
3196
3197 clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
3198 clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
3199 clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
3200
3201 clabel->blockSize = raidPtr->bytesPerSector;
3202 clabel->numBlocks = raidPtr->sectorsPerDisk;
3203
3204 /* XXX not portable */
3205 clabel->parityConfig = raidPtr->Layout.map->parityConfig;
3206 clabel->maxOutstanding = raidPtr->maxOutstanding;
3207 clabel->autoconfigure = raidPtr->autoconfigure;
3208 clabel->root_partition = raidPtr->root_partition;
3209 clabel->last_unit = raidPtr->raidid;
3210 clabel->config_order = raidPtr->config_order;
3211 }
3212
3213 int
3214 rf_auto_config_set(RF_ConfigSet_t *cset, int *unit)
3215 {
3216 RF_Raid_t *raidPtr;
3217 RF_Config_t *config;
3218 int raidID;
3219 int retcode;
3220
3221 #if DEBUG
3222 printf("RAID autoconfigure\n");
3223 #endif
3224
3225 retcode = 0;
3226 *unit = -1;
3227
3228 /* 1. Create a config structure */
3229
3230 config = (RF_Config_t *)malloc(sizeof(RF_Config_t),
3231 M_RAIDFRAME,
3232 M_NOWAIT);
3233 if (config==NULL) {
3234 printf("Out of mem!?!?\n");
3235 /* XXX do something more intelligent here. */
3236 return(1);
3237 }
3238
3239 memset(config, 0, sizeof(RF_Config_t));
3240
3241 /*
3242 2. Figure out what RAID ID this one is supposed to live at
3243 See if we can get the same RAID dev that it was configured
3244 on last time..
3245 */
3246
3247 raidID = cset->ac->clabel->last_unit;
3248 if ((raidID < 0) || (raidID >= numraid)) {
3249 /* let's not wander off into lala land. */
3250 raidID = numraid - 1;
3251 }
3252 if (raidPtrs[raidID]->valid != 0) {
3253
3254 /*
3255 Nope... Go looking for an alternative...
3256 Start high so we don't immediately use raid0 if that's
3257 not taken.
3258 */
3259
3260 for(raidID = numraid - 1; raidID >= 0; raidID--) {
3261 if (raidPtrs[raidID]->valid == 0) {
3262 /* can use this one! */
3263 break;
3264 }
3265 }
3266 }
3267
3268 if (raidID < 0) {
3269 /* punt... */
3270 printf("Unable to auto configure this set!\n");
3271 printf("(Out of RAID devs!)\n");
3272 return(1);
3273 }
3274
3275 #if DEBUG
3276 printf("Configuring raid%d:\n",raidID);
3277 #endif
3278
3279 raidPtr = raidPtrs[raidID];
3280
3281 /* XXX all this stuff should be done SOMEWHERE ELSE! */
3282 raidPtr->raidid = raidID;
3283 raidPtr->openings = RAIDOUTSTANDING;
3284
3285 /* 3. Build the configuration structure */
3286 rf_create_configuration(cset->ac, config, raidPtr);
3287
3288 /* 4. Do the configuration */
3289 retcode = rf_Configure(raidPtr, config, cset->ac);
3290
3291 if (retcode == 0) {
3292
3293 raidinit(raidPtrs[raidID]);
3294
3295 rf_markalldirty(raidPtrs[raidID]);
3296 raidPtrs[raidID]->autoconfigure = 1; /* XXX do this here? */
3297 if (cset->ac->clabel->root_partition==1) {
3298 /* everything configured just fine. Make a note
3299 that this set is eligible to be root. */
3300 cset->rootable = 1;
3301 /* XXX do this here? */
3302 raidPtrs[raidID]->root_partition = 1;
3303 }
3304 }
3305
3306 /* 5. Cleanup */
3307 free(config, M_RAIDFRAME);
3308
3309 *unit = raidID;
3310 return(retcode);
3311 }
3312
3313 void
3314 rf_disk_unbusy(RF_RaidAccessDesc_t *desc)
3315 {
3316 struct buf *bp;
3317
3318 bp = (struct buf *)desc->bp;
3319 disk_unbusy(&raid_softc[desc->raidPtr->raidid].sc_dkdev,
3320 (bp->b_bcount - bp->b_resid), (bp->b_flags & B_READ));
3321 }
3322
3323 void
3324 rf_pool_init(struct pool *p, size_t size, const char *w_chan,
3325 size_t xmin, size_t xmax)
3326 {
3327 pool_init(p, size, 0, 0, 0, w_chan, NULL);
3328 pool_sethiwat(p, xmax);
3329 pool_prime(p, xmin);
3330 pool_setlowat(p, xmin);
3331 }
3332