rf_netbsdkintf.c revision 1.211 1 /* $NetBSD: rf_netbsdkintf.c,v 1.211 2006/08/07 17:08:18 oster Exp $ */
2 /*-
3 * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
4 * All rights reserved.
5 *
6 * This code is derived from software contributed to The NetBSD Foundation
7 * by Greg Oster; Jason R. Thorpe.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed by the NetBSD
20 * Foundation, Inc. and its contributors.
21 * 4. Neither the name of The NetBSD Foundation nor the names of its
22 * contributors may be used to endorse or promote products derived
23 * from this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
26 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
36 */
37
38 /*
39 * Copyright (c) 1990, 1993
40 * The Regents of the University of California. All rights reserved.
41 *
42 * This code is derived from software contributed to Berkeley by
43 * the Systems Programming Group of the University of Utah Computer
44 * Science Department.
45 *
46 * Redistribution and use in source and binary forms, with or without
47 * modification, are permitted provided that the following conditions
48 * are met:
49 * 1. Redistributions of source code must retain the above copyright
50 * notice, this list of conditions and the following disclaimer.
51 * 2. Redistributions in binary form must reproduce the above copyright
52 * notice, this list of conditions and the following disclaimer in the
53 * documentation and/or other materials provided with the distribution.
54 * 3. Neither the name of the University nor the names of its contributors
55 * may be used to endorse or promote products derived from this software
56 * without specific prior written permission.
57 *
58 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
59 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
60 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
61 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
62 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
63 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
64 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
65 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
66 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
67 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
68 * SUCH DAMAGE.
69 *
70 * from: Utah $Hdr: cd.c 1.6 90/11/28$
71 *
72 * @(#)cd.c 8.2 (Berkeley) 11/16/93
73 */
74
75 /*
76 * Copyright (c) 1988 University of Utah.
77 *
78 * This code is derived from software contributed to Berkeley by
79 * the Systems Programming Group of the University of Utah Computer
80 * Science Department.
81 *
82 * Redistribution and use in source and binary forms, with or without
83 * modification, are permitted provided that the following conditions
84 * are met:
85 * 1. Redistributions of source code must retain the above copyright
86 * notice, this list of conditions and the following disclaimer.
87 * 2. Redistributions in binary form must reproduce the above copyright
88 * notice, this list of conditions and the following disclaimer in the
89 * documentation and/or other materials provided with the distribution.
90 * 3. All advertising materials mentioning features or use of this software
91 * must display the following acknowledgement:
92 * This product includes software developed by the University of
93 * California, Berkeley and its contributors.
94 * 4. Neither the name of the University nor the names of its contributors
95 * may be used to endorse or promote products derived from this software
96 * without specific prior written permission.
97 *
98 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
99 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
100 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
101 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
102 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
103 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
104 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
105 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
106 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
107 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
108 * SUCH DAMAGE.
109 *
110 * from: Utah $Hdr: cd.c 1.6 90/11/28$
111 *
112 * @(#)cd.c 8.2 (Berkeley) 11/16/93
113 */
114
115 /*
116 * Copyright (c) 1995 Carnegie-Mellon University.
117 * All rights reserved.
118 *
119 * Authors: Mark Holland, Jim Zelenka
120 *
121 * Permission to use, copy, modify and distribute this software and
122 * its documentation is hereby granted, provided that both the copyright
123 * notice and this permission notice appear in all copies of the
124 * software, derivative works or modified versions, and any portions
125 * thereof, and that both notices appear in supporting documentation.
126 *
127 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
128 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
129 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
130 *
131 * Carnegie Mellon requests users of this software to return to
132 *
133 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
134 * School of Computer Science
135 * Carnegie Mellon University
136 * Pittsburgh PA 15213-3890
137 *
138 * any improvements or extensions that they make and grant Carnegie the
139 * rights to redistribute these changes.
140 */
141
142 /***********************************************************
143 *
144 * rf_kintf.c -- the kernel interface routines for RAIDframe
145 *
146 ***********************************************************/
147
148 #include <sys/cdefs.h>
149 __KERNEL_RCSID(0, "$NetBSD: rf_netbsdkintf.c,v 1.211 2006/08/07 17:08:18 oster Exp $");
150
151 #include <sys/param.h>
152 #include <sys/errno.h>
153 #include <sys/pool.h>
154 #include <sys/proc.h>
155 #include <sys/queue.h>
156 #include <sys/disk.h>
157 #include <sys/device.h>
158 #include <sys/stat.h>
159 #include <sys/ioctl.h>
160 #include <sys/fcntl.h>
161 #include <sys/systm.h>
162 #include <sys/namei.h>
163 #include <sys/vnode.h>
164 #include <sys/disklabel.h>
165 #include <sys/conf.h>
166 #include <sys/lock.h>
167 #include <sys/buf.h>
168 #include <sys/bufq.h>
169 #include <sys/user.h>
170 #include <sys/reboot.h>
171 #include <sys/kauth.h>
172
173 #include <dev/raidframe/raidframevar.h>
174 #include <dev/raidframe/raidframeio.h>
175 #include "raid.h"
176 #include "opt_raid_autoconfig.h"
177 #include "rf_raid.h"
178 #include "rf_copyback.h"
179 #include "rf_dag.h"
180 #include "rf_dagflags.h"
181 #include "rf_desc.h"
182 #include "rf_diskqueue.h"
183 #include "rf_etimer.h"
184 #include "rf_general.h"
185 #include "rf_kintf.h"
186 #include "rf_options.h"
187 #include "rf_driver.h"
188 #include "rf_parityscan.h"
189 #include "rf_threadstuff.h"
190
191 #ifdef DEBUG
192 int rf_kdebug_level = 0;
193 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
194 #else /* DEBUG */
195 #define db1_printf(a) { }
196 #endif /* DEBUG */
197
198 static RF_Raid_t **raidPtrs; /* global raid device descriptors */
199
200 RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex)
201
202 static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a
203 * spare table */
204 static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from
205 * installation process */
206
207 MALLOC_DEFINE(M_RAIDFRAME, "RAIDframe", "RAIDframe structures");
208
209 /* prototypes */
210 static void KernelWakeupFunc(struct buf *);
211 static void InitBP(struct buf *, struct vnode *, unsigned,
212 dev_t, RF_SectorNum_t, RF_SectorCount_t, caddr_t, void (*) (struct buf *),
213 void *, int, struct proc *);
214 static void raidinit(RF_Raid_t *);
215
216 void raidattach(int);
217
218 dev_type_open(raidopen);
219 dev_type_close(raidclose);
220 dev_type_read(raidread);
221 dev_type_write(raidwrite);
222 dev_type_ioctl(raidioctl);
223 dev_type_strategy(raidstrategy);
224 dev_type_dump(raiddump);
225 dev_type_size(raidsize);
226
227 const struct bdevsw raid_bdevsw = {
228 raidopen, raidclose, raidstrategy, raidioctl,
229 raiddump, raidsize, D_DISK
230 };
231
232 const struct cdevsw raid_cdevsw = {
233 raidopen, raidclose, raidread, raidwrite, raidioctl,
234 nostop, notty, nopoll, nommap, nokqfilter, D_DISK
235 };
236
237 /* XXX Not sure if the following should be replacing the raidPtrs above,
238 or if it should be used in conjunction with that...
239 */
240
241 struct raid_softc {
242 int sc_flags; /* flags */
243 int sc_cflags; /* configuration flags */
244 size_t sc_size; /* size of the raid device */
245 char sc_xname[20]; /* XXX external name */
246 struct disk sc_dkdev; /* generic disk device info */
247 struct bufq_state *buf_queue; /* used for the device queue */
248 };
249 /* sc_flags */
250 #define RAIDF_INITED 0x01 /* unit has been initialized */
251 #define RAIDF_WLABEL 0x02 /* label area is writable */
252 #define RAIDF_LABELLING 0x04 /* unit is currently being labelled */
253 #define RAIDF_WANTED 0x40 /* someone is waiting to obtain a lock */
254 #define RAIDF_LOCKED 0x80 /* unit is locked */
255
256 #define raidunit(x) DISKUNIT(x)
257 int numraid = 0;
258
259 extern struct cfdriver raid_cd;
260
261 /*
262 * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
263 * Be aware that large numbers can allow the driver to consume a lot of
264 * kernel memory, especially on writes, and in degraded mode reads.
265 *
266 * For example: with a stripe width of 64 blocks (32k) and 5 disks,
267 * a single 64K write will typically require 64K for the old data,
268 * 64K for the old parity, and 64K for the new parity, for a total
269 * of 192K (if the parity buffer is not re-used immediately).
270 * Even it if is used immediately, that's still 128K, which when multiplied
271 * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
272 *
273 * Now in degraded mode, for example, a 64K read on the above setup may
274 * require data reconstruction, which will require *all* of the 4 remaining
275 * disks to participate -- 4 * 32K/disk == 128K again.
276 */
277
278 #ifndef RAIDOUTSTANDING
279 #define RAIDOUTSTANDING 6
280 #endif
281
282 #define RAIDLABELDEV(dev) \
283 (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
284
285 /* declared here, and made public, for the benefit of KVM stuff.. */
286 struct raid_softc *raid_softc;
287
288 static void raidgetdefaultlabel(RF_Raid_t *, struct raid_softc *,
289 struct disklabel *);
290 static void raidgetdisklabel(dev_t);
291 static void raidmakedisklabel(struct raid_softc *);
292
293 static int raidlock(struct raid_softc *);
294 static void raidunlock(struct raid_softc *);
295
296 static void rf_markalldirty(RF_Raid_t *);
297
298 struct device *raidrootdev;
299
300 void rf_ReconThread(struct rf_recon_req *);
301 void rf_RewriteParityThread(RF_Raid_t *raidPtr);
302 void rf_CopybackThread(RF_Raid_t *raidPtr);
303 void rf_ReconstructInPlaceThread(struct rf_recon_req *);
304 int rf_autoconfig(struct device *self);
305 void rf_buildroothack(RF_ConfigSet_t *);
306
307 RF_AutoConfig_t *rf_find_raid_components(void);
308 RF_ConfigSet_t *rf_create_auto_sets(RF_AutoConfig_t *);
309 static int rf_does_it_fit(RF_ConfigSet_t *,RF_AutoConfig_t *);
310 static int rf_reasonable_label(RF_ComponentLabel_t *);
311 void rf_create_configuration(RF_AutoConfig_t *,RF_Config_t *, RF_Raid_t *);
312 int rf_set_autoconfig(RF_Raid_t *, int);
313 int rf_set_rootpartition(RF_Raid_t *, int);
314 void rf_release_all_vps(RF_ConfigSet_t *);
315 void rf_cleanup_config_set(RF_ConfigSet_t *);
316 int rf_have_enough_components(RF_ConfigSet_t *);
317 int rf_auto_config_set(RF_ConfigSet_t *, int *);
318
319 static int raidautoconfig = 0; /* Debugging, mostly. Set to 0 to not
320 allow autoconfig to take place.
321 Note that this is overridden by having
322 RAID_AUTOCONFIG as an option in the
323 kernel config file. */
324
325 struct RF_Pools_s rf_pools;
326
327 void
328 raidattach(int num)
329 {
330 int raidID;
331 int i, rc;
332
333 #ifdef DEBUG
334 printf("raidattach: Asked for %d units\n", num);
335 #endif
336
337 if (num <= 0) {
338 #ifdef DIAGNOSTIC
339 panic("raidattach: count <= 0");
340 #endif
341 return;
342 }
343 /* This is where all the initialization stuff gets done. */
344
345 numraid = num;
346
347 /* Make some space for requested number of units... */
348
349 RF_Malloc(raidPtrs, num * sizeof(RF_Raid_t *), (RF_Raid_t **));
350 if (raidPtrs == NULL) {
351 panic("raidPtrs is NULL!!");
352 }
353
354 rf_mutex_init(&rf_sparet_wait_mutex);
355
356 rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
357
358 for (i = 0; i < num; i++)
359 raidPtrs[i] = NULL;
360 rc = rf_BootRaidframe();
361 if (rc == 0)
362 printf("Kernelized RAIDframe activated\n");
363 else
364 panic("Serious error booting RAID!!");
365
366 /* put together some datastructures like the CCD device does.. This
367 * lets us lock the device and what-not when it gets opened. */
368
369 raid_softc = (struct raid_softc *)
370 malloc(num * sizeof(struct raid_softc),
371 M_RAIDFRAME, M_NOWAIT);
372 if (raid_softc == NULL) {
373 printf("WARNING: no memory for RAIDframe driver\n");
374 return;
375 }
376
377 memset(raid_softc, 0, num * sizeof(struct raid_softc));
378
379 raidrootdev = (struct device *)malloc(num * sizeof(struct device),
380 M_RAIDFRAME, M_NOWAIT);
381 if (raidrootdev == NULL) {
382 panic("No memory for RAIDframe driver!!?!?!");
383 }
384
385 for (raidID = 0; raidID < num; raidID++) {
386 bufq_alloc(&raid_softc[raidID].buf_queue, "fcfs", 0);
387 pseudo_disk_init(&raid_softc[raidID].sc_dkdev);
388
389 /* XXXJRT Should use config_attach_pseudo() */
390
391 raidrootdev[raidID].dv_class = DV_DISK;
392 raidrootdev[raidID].dv_cfdata = NULL;
393 raidrootdev[raidID].dv_unit = raidID;
394 raidrootdev[raidID].dv_parent = NULL;
395 raidrootdev[raidID].dv_flags = 0;
396 raidrootdev[raidID].dv_cfdriver = &raid_cd;
397 snprintf(raidrootdev[raidID].dv_xname,
398 sizeof(raidrootdev[raidID].dv_xname), "raid%d", raidID);
399
400 RF_Malloc(raidPtrs[raidID], sizeof(RF_Raid_t),
401 (RF_Raid_t *));
402 if (raidPtrs[raidID] == NULL) {
403 printf("WARNING: raidPtrs[%d] is NULL\n", raidID);
404 numraid = raidID;
405 return;
406 }
407 }
408
409 #ifdef RAID_AUTOCONFIG
410 raidautoconfig = 1;
411 #endif
412
413 /*
414 * Register a finalizer which will be used to auto-config RAID
415 * sets once all real hardware devices have been found.
416 */
417 if (config_finalize_register(NULL, rf_autoconfig) != 0)
418 printf("WARNING: unable to register RAIDframe finalizer\n");
419 }
420
421 int
422 rf_autoconfig(struct device *self)
423 {
424 RF_AutoConfig_t *ac_list;
425 RF_ConfigSet_t *config_sets;
426
427 if (raidautoconfig == 0)
428 return (0);
429
430 /* XXX This code can only be run once. */
431 raidautoconfig = 0;
432
433 /* 1. locate all RAID components on the system */
434 #ifdef DEBUG
435 printf("Searching for RAID components...\n");
436 #endif
437 ac_list = rf_find_raid_components();
438
439 /* 2. Sort them into their respective sets. */
440 config_sets = rf_create_auto_sets(ac_list);
441
442 /*
443 * 3. Evaluate each set andconfigure the valid ones.
444 * This gets done in rf_buildroothack().
445 */
446 rf_buildroothack(config_sets);
447
448 return (1);
449 }
450
451 void
452 rf_buildroothack(RF_ConfigSet_t *config_sets)
453 {
454 RF_ConfigSet_t *cset;
455 RF_ConfigSet_t *next_cset;
456 int retcode;
457 int raidID;
458 int rootID;
459 int num_root;
460
461 rootID = 0;
462 num_root = 0;
463 cset = config_sets;
464 while(cset != NULL ) {
465 next_cset = cset->next;
466 if (rf_have_enough_components(cset) &&
467 cset->ac->clabel->autoconfigure==1) {
468 retcode = rf_auto_config_set(cset,&raidID);
469 if (!retcode) {
470 if (cset->rootable) {
471 rootID = raidID;
472 num_root++;
473 }
474 } else {
475 /* The autoconfig didn't work :( */
476 #if DEBUG
477 printf("Autoconfig failed with code %d for raid%d\n", retcode, raidID);
478 #endif
479 rf_release_all_vps(cset);
480 }
481 } else {
482 /* we're not autoconfiguring this set...
483 release the associated resources */
484 rf_release_all_vps(cset);
485 }
486 /* cleanup */
487 rf_cleanup_config_set(cset);
488 cset = next_cset;
489 }
490
491 /* we found something bootable... */
492
493 if (num_root == 1) {
494 booted_device = &raidrootdev[rootID];
495 } else if (num_root > 1) {
496 /* we can't guess.. require the user to answer... */
497 boothowto |= RB_ASKNAME;
498 }
499 }
500
501
502 int
503 raidsize(dev_t dev)
504 {
505 struct raid_softc *rs;
506 struct disklabel *lp;
507 int part, unit, omask, size;
508
509 unit = raidunit(dev);
510 if (unit >= numraid)
511 return (-1);
512 rs = &raid_softc[unit];
513
514 if ((rs->sc_flags & RAIDF_INITED) == 0)
515 return (-1);
516
517 part = DISKPART(dev);
518 omask = rs->sc_dkdev.dk_openmask & (1 << part);
519 lp = rs->sc_dkdev.dk_label;
520
521 if (omask == 0 && raidopen(dev, 0, S_IFBLK, curlwp))
522 return (-1);
523
524 if (lp->d_partitions[part].p_fstype != FS_SWAP)
525 size = -1;
526 else
527 size = lp->d_partitions[part].p_size *
528 (lp->d_secsize / DEV_BSIZE);
529
530 if (omask == 0 && raidclose(dev, 0, S_IFBLK, curlwp))
531 return (-1);
532
533 return (size);
534
535 }
536
537 int
538 raiddump(dev_t dev, daddr_t blkno, caddr_t va, size_t size)
539 {
540 /* Not implemented. */
541 return ENXIO;
542 }
543 /* ARGSUSED */
544 int
545 raidopen(dev_t dev, int flags, int fmt, struct lwp *l)
546 {
547 int unit = raidunit(dev);
548 struct raid_softc *rs;
549 struct disklabel *lp;
550 int part, pmask;
551 int error = 0;
552
553 if (unit >= numraid)
554 return (ENXIO);
555 rs = &raid_softc[unit];
556
557 if ((error = raidlock(rs)) != 0)
558 return (error);
559 lp = rs->sc_dkdev.dk_label;
560
561 part = DISKPART(dev);
562 pmask = (1 << part);
563
564 if ((rs->sc_flags & RAIDF_INITED) &&
565 (rs->sc_dkdev.dk_openmask == 0))
566 raidgetdisklabel(dev);
567
568 /* make sure that this partition exists */
569
570 if (part != RAW_PART) {
571 if (((rs->sc_flags & RAIDF_INITED) == 0) ||
572 ((part >= lp->d_npartitions) ||
573 (lp->d_partitions[part].p_fstype == FS_UNUSED))) {
574 error = ENXIO;
575 raidunlock(rs);
576 return (error);
577 }
578 }
579 /* Prevent this unit from being unconfigured while open. */
580 switch (fmt) {
581 case S_IFCHR:
582 rs->sc_dkdev.dk_copenmask |= pmask;
583 break;
584
585 case S_IFBLK:
586 rs->sc_dkdev.dk_bopenmask |= pmask;
587 break;
588 }
589
590 if ((rs->sc_dkdev.dk_openmask == 0) &&
591 ((rs->sc_flags & RAIDF_INITED) != 0)) {
592 /* First one... mark things as dirty... Note that we *MUST*
593 have done a configure before this. I DO NOT WANT TO BE
594 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
595 THAT THEY BELONG TOGETHER!!!!! */
596 /* XXX should check to see if we're only open for reading
597 here... If so, we needn't do this, but then need some
598 other way of keeping track of what's happened.. */
599
600 rf_markalldirty( raidPtrs[unit] );
601 }
602
603
604 rs->sc_dkdev.dk_openmask =
605 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
606
607 raidunlock(rs);
608
609 return (error);
610
611
612 }
613 /* ARGSUSED */
614 int
615 raidclose(dev_t dev, int flags, int fmt, struct lwp *l)
616 {
617 int unit = raidunit(dev);
618 struct raid_softc *rs;
619 int error = 0;
620 int part;
621
622 if (unit >= numraid)
623 return (ENXIO);
624 rs = &raid_softc[unit];
625
626 if ((error = raidlock(rs)) != 0)
627 return (error);
628
629 part = DISKPART(dev);
630
631 /* ...that much closer to allowing unconfiguration... */
632 switch (fmt) {
633 case S_IFCHR:
634 rs->sc_dkdev.dk_copenmask &= ~(1 << part);
635 break;
636
637 case S_IFBLK:
638 rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
639 break;
640 }
641 rs->sc_dkdev.dk_openmask =
642 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
643
644 if ((rs->sc_dkdev.dk_openmask == 0) &&
645 ((rs->sc_flags & RAIDF_INITED) != 0)) {
646 /* Last one... device is not unconfigured yet.
647 Device shutdown has taken care of setting the
648 clean bits if RAIDF_INITED is not set
649 mark things as clean... */
650
651 rf_update_component_labels(raidPtrs[unit],
652 RF_FINAL_COMPONENT_UPDATE);
653 if (doing_shutdown) {
654 /* last one, and we're going down, so
655 lights out for this RAID set too. */
656 error = rf_Shutdown(raidPtrs[unit]);
657
658 /* It's no longer initialized... */
659 rs->sc_flags &= ~RAIDF_INITED;
660
661 /* Detach the disk. */
662 pseudo_disk_detach(&rs->sc_dkdev);
663 }
664 }
665
666 raidunlock(rs);
667 return (0);
668
669 }
670
671 void
672 raidstrategy(struct buf *bp)
673 {
674 int s;
675
676 unsigned int raidID = raidunit(bp->b_dev);
677 RF_Raid_t *raidPtr;
678 struct raid_softc *rs = &raid_softc[raidID];
679 int wlabel;
680
681 if ((rs->sc_flags & RAIDF_INITED) ==0) {
682 bp->b_error = ENXIO;
683 bp->b_flags |= B_ERROR;
684 goto done;
685 }
686 if (raidID >= numraid || !raidPtrs[raidID]) {
687 bp->b_error = ENODEV;
688 bp->b_flags |= B_ERROR;
689 goto done;
690 }
691 raidPtr = raidPtrs[raidID];
692 if (!raidPtr->valid) {
693 bp->b_error = ENODEV;
694 bp->b_flags |= B_ERROR;
695 goto done;
696 }
697 if (bp->b_bcount == 0) {
698 db1_printf(("b_bcount is zero..\n"));
699 goto done;
700 }
701
702 /*
703 * Do bounds checking and adjust transfer. If there's an
704 * error, the bounds check will flag that for us.
705 */
706
707 wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
708 if (DISKPART(bp->b_dev) == RAW_PART) {
709 uint64_t size; /* device size in DEV_BSIZE unit */
710
711 if (raidPtr->logBytesPerSector > DEV_BSHIFT) {
712 size = raidPtr->totalSectors <<
713 (raidPtr->logBytesPerSector - DEV_BSHIFT);
714 } else {
715 size = raidPtr->totalSectors >>
716 (DEV_BSHIFT - raidPtr->logBytesPerSector);
717 }
718 if (bounds_check_with_mediasize(bp, DEV_BSIZE, size) <= 0) {
719 goto done;
720 }
721 } else {
722 if (bounds_check_with_label(&rs->sc_dkdev, bp, wlabel) <= 0) {
723 db1_printf(("Bounds check failed!!:%d %d\n",
724 (int) bp->b_blkno, (int) wlabel));
725 goto done;
726 }
727 }
728 s = splbio();
729
730 bp->b_resid = 0;
731
732 /* stuff it onto our queue */
733 BUFQ_PUT(rs->buf_queue, bp);
734
735 /* scheduled the IO to happen at the next convenient time */
736 wakeup(&(raidPtrs[raidID]->iodone));
737
738 splx(s);
739 return;
740
741 done:
742 bp->b_resid = bp->b_bcount;
743 biodone(bp);
744 }
745 /* ARGSUSED */
746 int
747 raidread(dev_t dev, struct uio *uio, int flags)
748 {
749 int unit = raidunit(dev);
750 struct raid_softc *rs;
751
752 if (unit >= numraid)
753 return (ENXIO);
754 rs = &raid_softc[unit];
755
756 if ((rs->sc_flags & RAIDF_INITED) == 0)
757 return (ENXIO);
758
759 return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
760
761 }
762 /* ARGSUSED */
763 int
764 raidwrite(dev_t dev, struct uio *uio, int flags)
765 {
766 int unit = raidunit(dev);
767 struct raid_softc *rs;
768
769 if (unit >= numraid)
770 return (ENXIO);
771 rs = &raid_softc[unit];
772
773 if ((rs->sc_flags & RAIDF_INITED) == 0)
774 return (ENXIO);
775
776 return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
777
778 }
779
780 int
781 raidioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct lwp *l)
782 {
783 int unit = raidunit(dev);
784 int error = 0;
785 int part, pmask;
786 struct raid_softc *rs;
787 RF_Config_t *k_cfg, *u_cfg;
788 RF_Raid_t *raidPtr;
789 RF_RaidDisk_t *diskPtr;
790 RF_AccTotals_t *totals;
791 RF_DeviceConfig_t *d_cfg, **ucfgp;
792 u_char *specific_buf;
793 int retcode = 0;
794 int column;
795 int raidid;
796 struct rf_recon_req *rrcopy, *rr;
797 RF_ComponentLabel_t *clabel;
798 RF_ComponentLabel_t *ci_label;
799 RF_ComponentLabel_t **clabel_ptr;
800 RF_SingleComponent_t *sparePtr,*componentPtr;
801 RF_SingleComponent_t component;
802 RF_ProgressInfo_t progressInfo, **progressInfoPtr;
803 int i, j, d;
804 #ifdef __HAVE_OLD_DISKLABEL
805 struct disklabel newlabel;
806 #endif
807
808 if (unit >= numraid)
809 return (ENXIO);
810 rs = &raid_softc[unit];
811 raidPtr = raidPtrs[unit];
812
813 db1_printf(("raidioctl: %d %d %d %d\n", (int) dev,
814 (int) DISKPART(dev), (int) unit, (int) cmd));
815
816 /* Must be open for writes for these commands... */
817 switch (cmd) {
818 case DIOCSDINFO:
819 case DIOCWDINFO:
820 #ifdef __HAVE_OLD_DISKLABEL
821 case ODIOCWDINFO:
822 case ODIOCSDINFO:
823 #endif
824 case DIOCWLABEL:
825 if ((flag & FWRITE) == 0)
826 return (EBADF);
827 }
828
829 /* Must be initialized for these... */
830 switch (cmd) {
831 case DIOCGDINFO:
832 case DIOCSDINFO:
833 case DIOCWDINFO:
834 #ifdef __HAVE_OLD_DISKLABEL
835 case ODIOCGDINFO:
836 case ODIOCWDINFO:
837 case ODIOCSDINFO:
838 case ODIOCGDEFLABEL:
839 #endif
840 case DIOCGPART:
841 case DIOCWLABEL:
842 case DIOCGDEFLABEL:
843 case RAIDFRAME_SHUTDOWN:
844 case RAIDFRAME_REWRITEPARITY:
845 case RAIDFRAME_GET_INFO:
846 case RAIDFRAME_RESET_ACCTOTALS:
847 case RAIDFRAME_GET_ACCTOTALS:
848 case RAIDFRAME_KEEP_ACCTOTALS:
849 case RAIDFRAME_GET_SIZE:
850 case RAIDFRAME_FAIL_DISK:
851 case RAIDFRAME_COPYBACK:
852 case RAIDFRAME_CHECK_RECON_STATUS:
853 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
854 case RAIDFRAME_GET_COMPONENT_LABEL:
855 case RAIDFRAME_SET_COMPONENT_LABEL:
856 case RAIDFRAME_ADD_HOT_SPARE:
857 case RAIDFRAME_REMOVE_HOT_SPARE:
858 case RAIDFRAME_INIT_LABELS:
859 case RAIDFRAME_REBUILD_IN_PLACE:
860 case RAIDFRAME_CHECK_PARITY:
861 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
862 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
863 case RAIDFRAME_CHECK_COPYBACK_STATUS:
864 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
865 case RAIDFRAME_SET_AUTOCONFIG:
866 case RAIDFRAME_SET_ROOT:
867 case RAIDFRAME_DELETE_COMPONENT:
868 case RAIDFRAME_INCORPORATE_HOT_SPARE:
869 if ((rs->sc_flags & RAIDF_INITED) == 0)
870 return (ENXIO);
871 }
872
873 switch (cmd) {
874
875 /* configure the system */
876 case RAIDFRAME_CONFIGURE:
877
878 if (raidPtr->valid) {
879 /* There is a valid RAID set running on this unit! */
880 printf("raid%d: Device already configured!\n",unit);
881 return(EINVAL);
882 }
883
884 /* copy-in the configuration information */
885 /* data points to a pointer to the configuration structure */
886
887 u_cfg = *((RF_Config_t **) data);
888 RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
889 if (k_cfg == NULL) {
890 return (ENOMEM);
891 }
892 retcode = copyin(u_cfg, k_cfg, sizeof(RF_Config_t));
893 if (retcode) {
894 RF_Free(k_cfg, sizeof(RF_Config_t));
895 db1_printf(("rf_ioctl: retcode=%d copyin.1\n",
896 retcode));
897 return (retcode);
898 }
899 /* allocate a buffer for the layout-specific data, and copy it
900 * in */
901 if (k_cfg->layoutSpecificSize) {
902 if (k_cfg->layoutSpecificSize > 10000) {
903 /* sanity check */
904 RF_Free(k_cfg, sizeof(RF_Config_t));
905 return (EINVAL);
906 }
907 RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
908 (u_char *));
909 if (specific_buf == NULL) {
910 RF_Free(k_cfg, sizeof(RF_Config_t));
911 return (ENOMEM);
912 }
913 retcode = copyin(k_cfg->layoutSpecific, specific_buf,
914 k_cfg->layoutSpecificSize);
915 if (retcode) {
916 RF_Free(k_cfg, sizeof(RF_Config_t));
917 RF_Free(specific_buf,
918 k_cfg->layoutSpecificSize);
919 db1_printf(("rf_ioctl: retcode=%d copyin.2\n",
920 retcode));
921 return (retcode);
922 }
923 } else
924 specific_buf = NULL;
925 k_cfg->layoutSpecific = specific_buf;
926
927 /* should do some kind of sanity check on the configuration.
928 * Store the sum of all the bytes in the last byte? */
929
930 /* configure the system */
931
932 /*
933 * Clear the entire RAID descriptor, just to make sure
934 * there is no stale data left in the case of a
935 * reconfiguration
936 */
937 memset((char *) raidPtr, 0, sizeof(RF_Raid_t));
938 raidPtr->raidid = unit;
939
940 retcode = rf_Configure(raidPtr, k_cfg, NULL);
941
942 if (retcode == 0) {
943
944 /* allow this many simultaneous IO's to
945 this RAID device */
946 raidPtr->openings = RAIDOUTSTANDING;
947
948 raidinit(raidPtr);
949 rf_markalldirty(raidPtr);
950 }
951 /* free the buffers. No return code here. */
952 if (k_cfg->layoutSpecificSize) {
953 RF_Free(specific_buf, k_cfg->layoutSpecificSize);
954 }
955 RF_Free(k_cfg, sizeof(RF_Config_t));
956
957 return (retcode);
958
959 /* shutdown the system */
960 case RAIDFRAME_SHUTDOWN:
961
962 if ((error = raidlock(rs)) != 0)
963 return (error);
964
965 /*
966 * If somebody has a partition mounted, we shouldn't
967 * shutdown.
968 */
969
970 part = DISKPART(dev);
971 pmask = (1 << part);
972 if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
973 ((rs->sc_dkdev.dk_bopenmask & pmask) &&
974 (rs->sc_dkdev.dk_copenmask & pmask))) {
975 raidunlock(rs);
976 return (EBUSY);
977 }
978
979 retcode = rf_Shutdown(raidPtr);
980
981 /* It's no longer initialized... */
982 rs->sc_flags &= ~RAIDF_INITED;
983
984 /* Detach the disk. */
985 pseudo_disk_detach(&rs->sc_dkdev);
986
987 raidunlock(rs);
988
989 return (retcode);
990 case RAIDFRAME_GET_COMPONENT_LABEL:
991 clabel_ptr = (RF_ComponentLabel_t **) data;
992 /* need to read the component label for the disk indicated
993 by row,column in clabel */
994
995 /* For practice, let's get it directly fromdisk, rather
996 than from the in-core copy */
997 RF_Malloc( clabel, sizeof( RF_ComponentLabel_t ),
998 (RF_ComponentLabel_t *));
999 if (clabel == NULL)
1000 return (ENOMEM);
1001
1002 retcode = copyin( *clabel_ptr, clabel,
1003 sizeof(RF_ComponentLabel_t));
1004
1005 if (retcode) {
1006 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
1007 return(retcode);
1008 }
1009
1010 clabel->row = 0; /* Don't allow looking at anything else.*/
1011
1012 column = clabel->column;
1013
1014 if ((column < 0) || (column >= raidPtr->numCol +
1015 raidPtr->numSpare)) {
1016 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
1017 return(EINVAL);
1018 }
1019
1020 retcode = raidread_component_label(raidPtr->Disks[column].dev,
1021 raidPtr->raid_cinfo[column].ci_vp,
1022 clabel );
1023
1024 if (retcode == 0) {
1025 retcode = copyout(clabel, *clabel_ptr,
1026 sizeof(RF_ComponentLabel_t));
1027 }
1028 RF_Free(clabel, sizeof(RF_ComponentLabel_t));
1029 return (retcode);
1030
1031 case RAIDFRAME_SET_COMPONENT_LABEL:
1032 clabel = (RF_ComponentLabel_t *) data;
1033
1034 /* XXX check the label for valid stuff... */
1035 /* Note that some things *should not* get modified --
1036 the user should be re-initing the labels instead of
1037 trying to patch things.
1038 */
1039
1040 raidid = raidPtr->raidid;
1041 #if DEBUG
1042 printf("raid%d: Got component label:\n", raidid);
1043 printf("raid%d: Version: %d\n", raidid, clabel->version);
1044 printf("raid%d: Serial Number: %d\n", raidid, clabel->serial_number);
1045 printf("raid%d: Mod counter: %d\n", raidid, clabel->mod_counter);
1046 printf("raid%d: Column: %d\n", raidid, clabel->column);
1047 printf("raid%d: Num Columns: %d\n", raidid, clabel->num_columns);
1048 printf("raid%d: Clean: %d\n", raidid, clabel->clean);
1049 printf("raid%d: Status: %d\n", raidid, clabel->status);
1050 #endif
1051 clabel->row = 0;
1052 column = clabel->column;
1053
1054 if ((column < 0) || (column >= raidPtr->numCol)) {
1055 return(EINVAL);
1056 }
1057
1058 /* XXX this isn't allowed to do anything for now :-) */
1059
1060 /* XXX and before it is, we need to fill in the rest
1061 of the fields!?!?!?! */
1062 #if 0
1063 raidwrite_component_label(
1064 raidPtr->Disks[column].dev,
1065 raidPtr->raid_cinfo[column].ci_vp,
1066 clabel );
1067 #endif
1068 return (0);
1069
1070 case RAIDFRAME_INIT_LABELS:
1071 clabel = (RF_ComponentLabel_t *) data;
1072 /*
1073 we only want the serial number from
1074 the above. We get all the rest of the information
1075 from the config that was used to create this RAID
1076 set.
1077 */
1078
1079 raidPtr->serial_number = clabel->serial_number;
1080
1081 RF_Malloc(ci_label, sizeof(RF_ComponentLabel_t),
1082 (RF_ComponentLabel_t *));
1083 if (ci_label == NULL)
1084 return (ENOMEM);
1085
1086 raid_init_component_label(raidPtr, ci_label);
1087 ci_label->serial_number = clabel->serial_number;
1088 ci_label->row = 0; /* we dont' pretend to support more */
1089
1090 for(column=0;column<raidPtr->numCol;column++) {
1091 diskPtr = &raidPtr->Disks[column];
1092 if (!RF_DEAD_DISK(diskPtr->status)) {
1093 ci_label->partitionSize = diskPtr->partitionSize;
1094 ci_label->column = column;
1095 raidwrite_component_label(
1096 raidPtr->Disks[column].dev,
1097 raidPtr->raid_cinfo[column].ci_vp,
1098 ci_label );
1099 }
1100 }
1101 RF_Free(ci_label, sizeof(RF_ComponentLabel_t));
1102
1103 return (retcode);
1104 case RAIDFRAME_SET_AUTOCONFIG:
1105 d = rf_set_autoconfig(raidPtr, *(int *) data);
1106 printf("raid%d: New autoconfig value is: %d\n",
1107 raidPtr->raidid, d);
1108 *(int *) data = d;
1109 return (retcode);
1110
1111 case RAIDFRAME_SET_ROOT:
1112 d = rf_set_rootpartition(raidPtr, *(int *) data);
1113 printf("raid%d: New rootpartition value is: %d\n",
1114 raidPtr->raidid, d);
1115 *(int *) data = d;
1116 return (retcode);
1117
1118 /* initialize all parity */
1119 case RAIDFRAME_REWRITEPARITY:
1120
1121 if (raidPtr->Layout.map->faultsTolerated == 0) {
1122 /* Parity for RAID 0 is trivially correct */
1123 raidPtr->parity_good = RF_RAID_CLEAN;
1124 return(0);
1125 }
1126
1127 if (raidPtr->parity_rewrite_in_progress == 1) {
1128 /* Re-write is already in progress! */
1129 return(EINVAL);
1130 }
1131
1132 retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
1133 rf_RewriteParityThread,
1134 raidPtr,"raid_parity");
1135 return (retcode);
1136
1137
1138 case RAIDFRAME_ADD_HOT_SPARE:
1139 sparePtr = (RF_SingleComponent_t *) data;
1140 memcpy( &component, sparePtr, sizeof(RF_SingleComponent_t));
1141 retcode = rf_add_hot_spare(raidPtr, &component);
1142 return(retcode);
1143
1144 case RAIDFRAME_REMOVE_HOT_SPARE:
1145 return(retcode);
1146
1147 case RAIDFRAME_DELETE_COMPONENT:
1148 componentPtr = (RF_SingleComponent_t *)data;
1149 memcpy( &component, componentPtr,
1150 sizeof(RF_SingleComponent_t));
1151 retcode = rf_delete_component(raidPtr, &component);
1152 return(retcode);
1153
1154 case RAIDFRAME_INCORPORATE_HOT_SPARE:
1155 componentPtr = (RF_SingleComponent_t *)data;
1156 memcpy( &component, componentPtr,
1157 sizeof(RF_SingleComponent_t));
1158 retcode = rf_incorporate_hot_spare(raidPtr, &component);
1159 return(retcode);
1160
1161 case RAIDFRAME_REBUILD_IN_PLACE:
1162
1163 if (raidPtr->Layout.map->faultsTolerated == 0) {
1164 /* Can't do this on a RAID 0!! */
1165 return(EINVAL);
1166 }
1167
1168 if (raidPtr->recon_in_progress == 1) {
1169 /* a reconstruct is already in progress! */
1170 return(EINVAL);
1171 }
1172
1173 componentPtr = (RF_SingleComponent_t *) data;
1174 memcpy( &component, componentPtr,
1175 sizeof(RF_SingleComponent_t));
1176 component.row = 0; /* we don't support any more */
1177 column = component.column;
1178
1179 if ((column < 0) || (column >= raidPtr->numCol)) {
1180 return(EINVAL);
1181 }
1182
1183 RF_LOCK_MUTEX(raidPtr->mutex);
1184 if ((raidPtr->Disks[column].status == rf_ds_optimal) &&
1185 (raidPtr->numFailures > 0)) {
1186 /* XXX 0 above shouldn't be constant!!! */
1187 /* some component other than this has failed.
1188 Let's not make things worse than they already
1189 are... */
1190 printf("raid%d: Unable to reconstruct to disk at:\n",
1191 raidPtr->raidid);
1192 printf("raid%d: Col: %d Too many failures.\n",
1193 raidPtr->raidid, column);
1194 RF_UNLOCK_MUTEX(raidPtr->mutex);
1195 return (EINVAL);
1196 }
1197 if (raidPtr->Disks[column].status ==
1198 rf_ds_reconstructing) {
1199 printf("raid%d: Unable to reconstruct to disk at:\n",
1200 raidPtr->raidid);
1201 printf("raid%d: Col: %d Reconstruction already occuring!\n", raidPtr->raidid, column);
1202
1203 RF_UNLOCK_MUTEX(raidPtr->mutex);
1204 return (EINVAL);
1205 }
1206 if (raidPtr->Disks[column].status == rf_ds_spared) {
1207 RF_UNLOCK_MUTEX(raidPtr->mutex);
1208 return (EINVAL);
1209 }
1210 RF_UNLOCK_MUTEX(raidPtr->mutex);
1211
1212 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1213 if (rrcopy == NULL)
1214 return(ENOMEM);
1215
1216 rrcopy->raidPtr = (void *) raidPtr;
1217 rrcopy->col = column;
1218
1219 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1220 rf_ReconstructInPlaceThread,
1221 rrcopy,"raid_reconip");
1222 return(retcode);
1223
1224 case RAIDFRAME_GET_INFO:
1225 if (!raidPtr->valid)
1226 return (ENODEV);
1227 ucfgp = (RF_DeviceConfig_t **) data;
1228 RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t),
1229 (RF_DeviceConfig_t *));
1230 if (d_cfg == NULL)
1231 return (ENOMEM);
1232 d_cfg->rows = 1; /* there is only 1 row now */
1233 d_cfg->cols = raidPtr->numCol;
1234 d_cfg->ndevs = raidPtr->numCol;
1235 if (d_cfg->ndevs >= RF_MAX_DISKS) {
1236 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1237 return (ENOMEM);
1238 }
1239 d_cfg->nspares = raidPtr->numSpare;
1240 if (d_cfg->nspares >= RF_MAX_DISKS) {
1241 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1242 return (ENOMEM);
1243 }
1244 d_cfg->maxqdepth = raidPtr->maxQueueDepth;
1245 d = 0;
1246 for (j = 0; j < d_cfg->cols; j++) {
1247 d_cfg->devs[d] = raidPtr->Disks[j];
1248 d++;
1249 }
1250 for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) {
1251 d_cfg->spares[i] = raidPtr->Disks[j];
1252 }
1253 retcode = copyout(d_cfg, *ucfgp, sizeof(RF_DeviceConfig_t));
1254 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1255
1256 return (retcode);
1257
1258 case RAIDFRAME_CHECK_PARITY:
1259 *(int *) data = raidPtr->parity_good;
1260 return (0);
1261
1262 case RAIDFRAME_RESET_ACCTOTALS:
1263 memset(&raidPtr->acc_totals, 0, sizeof(raidPtr->acc_totals));
1264 return (0);
1265
1266 case RAIDFRAME_GET_ACCTOTALS:
1267 totals = (RF_AccTotals_t *) data;
1268 *totals = raidPtr->acc_totals;
1269 return (0);
1270
1271 case RAIDFRAME_KEEP_ACCTOTALS:
1272 raidPtr->keep_acc_totals = *(int *)data;
1273 return (0);
1274
1275 case RAIDFRAME_GET_SIZE:
1276 *(int *) data = raidPtr->totalSectors;
1277 return (0);
1278
1279 /* fail a disk & optionally start reconstruction */
1280 case RAIDFRAME_FAIL_DISK:
1281
1282 if (raidPtr->Layout.map->faultsTolerated == 0) {
1283 /* Can't do this on a RAID 0!! */
1284 return(EINVAL);
1285 }
1286
1287 rr = (struct rf_recon_req *) data;
1288 rr->row = 0;
1289 if (rr->col < 0 || rr->col >= raidPtr->numCol)
1290 return (EINVAL);
1291
1292
1293 RF_LOCK_MUTEX(raidPtr->mutex);
1294 if (raidPtr->status == rf_rs_reconstructing) {
1295 /* you can't fail a disk while we're reconstructing! */
1296 /* XXX wrong for RAID6 */
1297 RF_UNLOCK_MUTEX(raidPtr->mutex);
1298 return (EINVAL);
1299 }
1300 if ((raidPtr->Disks[rr->col].status ==
1301 rf_ds_optimal) && (raidPtr->numFailures > 0)) {
1302 /* some other component has failed. Let's not make
1303 things worse. XXX wrong for RAID6 */
1304 RF_UNLOCK_MUTEX(raidPtr->mutex);
1305 return (EINVAL);
1306 }
1307 if (raidPtr->Disks[rr->col].status == rf_ds_spared) {
1308 /* Can't fail a spared disk! */
1309 RF_UNLOCK_MUTEX(raidPtr->mutex);
1310 return (EINVAL);
1311 }
1312 RF_UNLOCK_MUTEX(raidPtr->mutex);
1313
1314 /* make a copy of the recon request so that we don't rely on
1315 * the user's buffer */
1316 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1317 if (rrcopy == NULL)
1318 return(ENOMEM);
1319 memcpy(rrcopy, rr, sizeof(*rr));
1320 rrcopy->raidPtr = (void *) raidPtr;
1321
1322 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1323 rf_ReconThread,
1324 rrcopy,"raid_recon");
1325 return (0);
1326
1327 /* invoke a copyback operation after recon on whatever disk
1328 * needs it, if any */
1329 case RAIDFRAME_COPYBACK:
1330
1331 if (raidPtr->Layout.map->faultsTolerated == 0) {
1332 /* This makes no sense on a RAID 0!! */
1333 return(EINVAL);
1334 }
1335
1336 if (raidPtr->copyback_in_progress == 1) {
1337 /* Copyback is already in progress! */
1338 return(EINVAL);
1339 }
1340
1341 retcode = RF_CREATE_THREAD(raidPtr->copyback_thread,
1342 rf_CopybackThread,
1343 raidPtr,"raid_copyback");
1344 return (retcode);
1345
1346 /* return the percentage completion of reconstruction */
1347 case RAIDFRAME_CHECK_RECON_STATUS:
1348 if (raidPtr->Layout.map->faultsTolerated == 0) {
1349 /* This makes no sense on a RAID 0, so tell the
1350 user it's done. */
1351 *(int *) data = 100;
1352 return(0);
1353 }
1354 if (raidPtr->status != rf_rs_reconstructing)
1355 *(int *) data = 100;
1356 else {
1357 if (raidPtr->reconControl->numRUsTotal > 0) {
1358 *(int *) data = (raidPtr->reconControl->numRUsComplete * 100 / raidPtr->reconControl->numRUsTotal);
1359 } else {
1360 *(int *) data = 0;
1361 }
1362 }
1363 return (0);
1364 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
1365 progressInfoPtr = (RF_ProgressInfo_t **) data;
1366 if (raidPtr->status != rf_rs_reconstructing) {
1367 progressInfo.remaining = 0;
1368 progressInfo.completed = 100;
1369 progressInfo.total = 100;
1370 } else {
1371 progressInfo.total =
1372 raidPtr->reconControl->numRUsTotal;
1373 progressInfo.completed =
1374 raidPtr->reconControl->numRUsComplete;
1375 progressInfo.remaining = progressInfo.total -
1376 progressInfo.completed;
1377 }
1378 retcode = copyout(&progressInfo, *progressInfoPtr,
1379 sizeof(RF_ProgressInfo_t));
1380 return (retcode);
1381
1382 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1383 if (raidPtr->Layout.map->faultsTolerated == 0) {
1384 /* This makes no sense on a RAID 0, so tell the
1385 user it's done. */
1386 *(int *) data = 100;
1387 return(0);
1388 }
1389 if (raidPtr->parity_rewrite_in_progress == 1) {
1390 *(int *) data = 100 *
1391 raidPtr->parity_rewrite_stripes_done /
1392 raidPtr->Layout.numStripe;
1393 } else {
1394 *(int *) data = 100;
1395 }
1396 return (0);
1397
1398 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
1399 progressInfoPtr = (RF_ProgressInfo_t **) data;
1400 if (raidPtr->parity_rewrite_in_progress == 1) {
1401 progressInfo.total = raidPtr->Layout.numStripe;
1402 progressInfo.completed =
1403 raidPtr->parity_rewrite_stripes_done;
1404 progressInfo.remaining = progressInfo.total -
1405 progressInfo.completed;
1406 } else {
1407 progressInfo.remaining = 0;
1408 progressInfo.completed = 100;
1409 progressInfo.total = 100;
1410 }
1411 retcode = copyout(&progressInfo, *progressInfoPtr,
1412 sizeof(RF_ProgressInfo_t));
1413 return (retcode);
1414
1415 case RAIDFRAME_CHECK_COPYBACK_STATUS:
1416 if (raidPtr->Layout.map->faultsTolerated == 0) {
1417 /* This makes no sense on a RAID 0 */
1418 *(int *) data = 100;
1419 return(0);
1420 }
1421 if (raidPtr->copyback_in_progress == 1) {
1422 *(int *) data = 100 * raidPtr->copyback_stripes_done /
1423 raidPtr->Layout.numStripe;
1424 } else {
1425 *(int *) data = 100;
1426 }
1427 return (0);
1428
1429 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
1430 progressInfoPtr = (RF_ProgressInfo_t **) data;
1431 if (raidPtr->copyback_in_progress == 1) {
1432 progressInfo.total = raidPtr->Layout.numStripe;
1433 progressInfo.completed =
1434 raidPtr->copyback_stripes_done;
1435 progressInfo.remaining = progressInfo.total -
1436 progressInfo.completed;
1437 } else {
1438 progressInfo.remaining = 0;
1439 progressInfo.completed = 100;
1440 progressInfo.total = 100;
1441 }
1442 retcode = copyout(&progressInfo, *progressInfoPtr,
1443 sizeof(RF_ProgressInfo_t));
1444 return (retcode);
1445
1446 /* the sparetable daemon calls this to wait for the kernel to
1447 * need a spare table. this ioctl does not return until a
1448 * spare table is needed. XXX -- calling mpsleep here in the
1449 * ioctl code is almost certainly wrong and evil. -- XXX XXX
1450 * -- I should either compute the spare table in the kernel,
1451 * or have a different -- XXX XXX -- interface (a different
1452 * character device) for delivering the table -- XXX */
1453 #if 0
1454 case RAIDFRAME_SPARET_WAIT:
1455 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1456 while (!rf_sparet_wait_queue)
1457 mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE);
1458 waitreq = rf_sparet_wait_queue;
1459 rf_sparet_wait_queue = rf_sparet_wait_queue->next;
1460 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1461
1462 /* structure assignment */
1463 *((RF_SparetWait_t *) data) = *waitreq;
1464
1465 RF_Free(waitreq, sizeof(*waitreq));
1466 return (0);
1467
1468 /* wakes up a process waiting on SPARET_WAIT and puts an error
1469 * code in it that will cause the dameon to exit */
1470 case RAIDFRAME_ABORT_SPARET_WAIT:
1471 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1472 waitreq->fcol = -1;
1473 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1474 waitreq->next = rf_sparet_wait_queue;
1475 rf_sparet_wait_queue = waitreq;
1476 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1477 wakeup(&rf_sparet_wait_queue);
1478 return (0);
1479
1480 /* used by the spare table daemon to deliver a spare table
1481 * into the kernel */
1482 case RAIDFRAME_SEND_SPARET:
1483
1484 /* install the spare table */
1485 retcode = rf_SetSpareTable(raidPtr, *(void **) data);
1486
1487 /* respond to the requestor. the return status of the spare
1488 * table installation is passed in the "fcol" field */
1489 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1490 waitreq->fcol = retcode;
1491 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1492 waitreq->next = rf_sparet_resp_queue;
1493 rf_sparet_resp_queue = waitreq;
1494 wakeup(&rf_sparet_resp_queue);
1495 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1496
1497 return (retcode);
1498 #endif
1499
1500 default:
1501 break; /* fall through to the os-specific code below */
1502
1503 }
1504
1505 if (!raidPtr->valid)
1506 return (EINVAL);
1507
1508 /*
1509 * Add support for "regular" device ioctls here.
1510 */
1511
1512 switch (cmd) {
1513 case DIOCGDINFO:
1514 *(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
1515 break;
1516 #ifdef __HAVE_OLD_DISKLABEL
1517 case ODIOCGDINFO:
1518 newlabel = *(rs->sc_dkdev.dk_label);
1519 if (newlabel.d_npartitions > OLDMAXPARTITIONS)
1520 return ENOTTY;
1521 memcpy(data, &newlabel, sizeof (struct olddisklabel));
1522 break;
1523 #endif
1524
1525 case DIOCGPART:
1526 ((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
1527 ((struct partinfo *) data)->part =
1528 &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
1529 break;
1530
1531 case DIOCWDINFO:
1532 case DIOCSDINFO:
1533 #ifdef __HAVE_OLD_DISKLABEL
1534 case ODIOCWDINFO:
1535 case ODIOCSDINFO:
1536 #endif
1537 {
1538 struct disklabel *lp;
1539 #ifdef __HAVE_OLD_DISKLABEL
1540 if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) {
1541 memset(&newlabel, 0, sizeof newlabel);
1542 memcpy(&newlabel, data, sizeof (struct olddisklabel));
1543 lp = &newlabel;
1544 } else
1545 #endif
1546 lp = (struct disklabel *)data;
1547
1548 if ((error = raidlock(rs)) != 0)
1549 return (error);
1550
1551 rs->sc_flags |= RAIDF_LABELLING;
1552
1553 error = setdisklabel(rs->sc_dkdev.dk_label,
1554 lp, 0, rs->sc_dkdev.dk_cpulabel);
1555 if (error == 0) {
1556 if (cmd == DIOCWDINFO
1557 #ifdef __HAVE_OLD_DISKLABEL
1558 || cmd == ODIOCWDINFO
1559 #endif
1560 )
1561 error = writedisklabel(RAIDLABELDEV(dev),
1562 raidstrategy, rs->sc_dkdev.dk_label,
1563 rs->sc_dkdev.dk_cpulabel);
1564 }
1565 rs->sc_flags &= ~RAIDF_LABELLING;
1566
1567 raidunlock(rs);
1568
1569 if (error)
1570 return (error);
1571 break;
1572 }
1573
1574 case DIOCWLABEL:
1575 if (*(int *) data != 0)
1576 rs->sc_flags |= RAIDF_WLABEL;
1577 else
1578 rs->sc_flags &= ~RAIDF_WLABEL;
1579 break;
1580
1581 case DIOCGDEFLABEL:
1582 raidgetdefaultlabel(raidPtr, rs, (struct disklabel *) data);
1583 break;
1584
1585 #ifdef __HAVE_OLD_DISKLABEL
1586 case ODIOCGDEFLABEL:
1587 raidgetdefaultlabel(raidPtr, rs, &newlabel);
1588 if (newlabel.d_npartitions > OLDMAXPARTITIONS)
1589 return ENOTTY;
1590 memcpy(data, &newlabel, sizeof (struct olddisklabel));
1591 break;
1592 #endif
1593
1594 default:
1595 retcode = ENOTTY;
1596 }
1597 return (retcode);
1598
1599 }
1600
1601
1602 /* raidinit -- complete the rest of the initialization for the
1603 RAIDframe device. */
1604
1605
1606 static void
1607 raidinit(RF_Raid_t *raidPtr)
1608 {
1609 struct raid_softc *rs;
1610 int unit;
1611
1612 unit = raidPtr->raidid;
1613
1614 rs = &raid_softc[unit];
1615
1616 /* XXX should check return code first... */
1617 rs->sc_flags |= RAIDF_INITED;
1618
1619 /* XXX doesn't check bounds. */
1620 snprintf(rs->sc_xname, sizeof(rs->sc_xname), "raid%d", unit);
1621
1622 rs->sc_dkdev.dk_name = rs->sc_xname;
1623
1624 /* disk_attach actually creates space for the CPU disklabel, among
1625 * other things, so it's critical to call this *BEFORE* we try putzing
1626 * with disklabels. */
1627
1628 pseudo_disk_attach(&rs->sc_dkdev);
1629
1630 /* XXX There may be a weird interaction here between this, and
1631 * protectedSectors, as used in RAIDframe. */
1632
1633 rs->sc_size = raidPtr->totalSectors;
1634 }
1635 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
1636 /* wake up the daemon & tell it to get us a spare table
1637 * XXX
1638 * the entries in the queues should be tagged with the raidPtr
1639 * so that in the extremely rare case that two recons happen at once,
1640 * we know for which device were requesting a spare table
1641 * XXX
1642 *
1643 * XXX This code is not currently used. GO
1644 */
1645 int
1646 rf_GetSpareTableFromDaemon(RF_SparetWait_t *req)
1647 {
1648 int retcode;
1649
1650 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1651 req->next = rf_sparet_wait_queue;
1652 rf_sparet_wait_queue = req;
1653 wakeup(&rf_sparet_wait_queue);
1654
1655 /* mpsleep unlocks the mutex */
1656 while (!rf_sparet_resp_queue) {
1657 tsleep(&rf_sparet_resp_queue, PRIBIO,
1658 "raidframe getsparetable", 0);
1659 }
1660 req = rf_sparet_resp_queue;
1661 rf_sparet_resp_queue = req->next;
1662 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1663
1664 retcode = req->fcol;
1665 RF_Free(req, sizeof(*req)); /* this is not the same req as we
1666 * alloc'd */
1667 return (retcode);
1668 }
1669 #endif
1670
1671 /* a wrapper around rf_DoAccess that extracts appropriate info from the
1672 * bp & passes it down.
1673 * any calls originating in the kernel must use non-blocking I/O
1674 * do some extra sanity checking to return "appropriate" error values for
1675 * certain conditions (to make some standard utilities work)
1676 *
1677 * Formerly known as: rf_DoAccessKernel
1678 */
1679 void
1680 raidstart(RF_Raid_t *raidPtr)
1681 {
1682 RF_SectorCount_t num_blocks, pb, sum;
1683 RF_RaidAddr_t raid_addr;
1684 struct partition *pp;
1685 daddr_t blocknum;
1686 int unit;
1687 struct raid_softc *rs;
1688 int do_async;
1689 struct buf *bp;
1690 int rc;
1691
1692 unit = raidPtr->raidid;
1693 rs = &raid_softc[unit];
1694
1695 /* quick check to see if anything has died recently */
1696 RF_LOCK_MUTEX(raidPtr->mutex);
1697 if (raidPtr->numNewFailures > 0) {
1698 RF_UNLOCK_MUTEX(raidPtr->mutex);
1699 rf_update_component_labels(raidPtr,
1700 RF_NORMAL_COMPONENT_UPDATE);
1701 RF_LOCK_MUTEX(raidPtr->mutex);
1702 raidPtr->numNewFailures--;
1703 }
1704
1705 /* Check to see if we're at the limit... */
1706 while (raidPtr->openings > 0) {
1707 RF_UNLOCK_MUTEX(raidPtr->mutex);
1708
1709 /* get the next item, if any, from the queue */
1710 if ((bp = BUFQ_GET(rs->buf_queue)) == NULL) {
1711 /* nothing more to do */
1712 return;
1713 }
1714
1715 /* Ok, for the bp we have here, bp->b_blkno is relative to the
1716 * partition.. Need to make it absolute to the underlying
1717 * device.. */
1718
1719 blocknum = bp->b_blkno;
1720 if (DISKPART(bp->b_dev) != RAW_PART) {
1721 pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
1722 blocknum += pp->p_offset;
1723 }
1724
1725 db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
1726 (int) blocknum));
1727
1728 db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
1729 db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
1730
1731 /* *THIS* is where we adjust what block we're going to...
1732 * but DO NOT TOUCH bp->b_blkno!!! */
1733 raid_addr = blocknum;
1734
1735 num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
1736 pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
1737 sum = raid_addr + num_blocks + pb;
1738 if (1 || rf_debugKernelAccess) {
1739 db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
1740 (int) raid_addr, (int) sum, (int) num_blocks,
1741 (int) pb, (int) bp->b_resid));
1742 }
1743 if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
1744 || (sum < num_blocks) || (sum < pb)) {
1745 bp->b_error = ENOSPC;
1746 bp->b_flags |= B_ERROR;
1747 bp->b_resid = bp->b_bcount;
1748 biodone(bp);
1749 RF_LOCK_MUTEX(raidPtr->mutex);
1750 continue;
1751 }
1752 /*
1753 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
1754 */
1755
1756 if (bp->b_bcount & raidPtr->sectorMask) {
1757 bp->b_error = EINVAL;
1758 bp->b_flags |= B_ERROR;
1759 bp->b_resid = bp->b_bcount;
1760 biodone(bp);
1761 RF_LOCK_MUTEX(raidPtr->mutex);
1762 continue;
1763
1764 }
1765 db1_printf(("Calling DoAccess..\n"));
1766
1767
1768 RF_LOCK_MUTEX(raidPtr->mutex);
1769 raidPtr->openings--;
1770 RF_UNLOCK_MUTEX(raidPtr->mutex);
1771
1772 /*
1773 * Everything is async.
1774 */
1775 do_async = 1;
1776
1777 disk_busy(&rs->sc_dkdev);
1778
1779 /* XXX we're still at splbio() here... do we *really*
1780 need to be? */
1781
1782 /* don't ever condition on bp->b_flags & B_WRITE.
1783 * always condition on B_READ instead */
1784
1785 rc = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
1786 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
1787 do_async, raid_addr, num_blocks,
1788 bp->b_data, bp, RF_DAG_NONBLOCKING_IO);
1789
1790 if (rc) {
1791 bp->b_error = rc;
1792 bp->b_flags |= B_ERROR;
1793 bp->b_resid = bp->b_bcount;
1794 biodone(bp);
1795 /* continue loop */
1796 }
1797
1798 RF_LOCK_MUTEX(raidPtr->mutex);
1799 }
1800 RF_UNLOCK_MUTEX(raidPtr->mutex);
1801 }
1802
1803
1804
1805
1806 /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */
1807
1808 int
1809 rf_DispatchKernelIO(RF_DiskQueue_t *queue, RF_DiskQueueData_t *req)
1810 {
1811 int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
1812 struct buf *bp;
1813
1814 req->queue = queue;
1815
1816 #if DIAGNOSTIC
1817 if (queue->raidPtr->raidid >= numraid) {
1818 printf("Invalid unit number: %d %d\n", queue->raidPtr->raidid,
1819 numraid);
1820 panic("Invalid Unit number in rf_DispatchKernelIO");
1821 }
1822 #endif
1823
1824 bp = req->bp;
1825
1826 switch (req->type) {
1827 case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */
1828 /* XXX need to do something extra here.. */
1829 /* I'm leaving this in, as I've never actually seen it used,
1830 * and I'd like folks to report it... GO */
1831 printf(("WAKEUP CALLED\n"));
1832 queue->numOutstanding++;
1833
1834 bp->b_flags = 0;
1835 bp->b_private = req;
1836
1837 KernelWakeupFunc(bp);
1838 break;
1839
1840 case RF_IO_TYPE_READ:
1841 case RF_IO_TYPE_WRITE:
1842 #if RF_ACC_TRACE > 0
1843 if (req->tracerec) {
1844 RF_ETIMER_START(req->tracerec->timer);
1845 }
1846 #endif
1847 InitBP(bp, queue->rf_cinfo->ci_vp,
1848 op, queue->rf_cinfo->ci_dev,
1849 req->sectorOffset, req->numSector,
1850 req->buf, KernelWakeupFunc, (void *) req,
1851 queue->raidPtr->logBytesPerSector, req->b_proc);
1852
1853 if (rf_debugKernelAccess) {
1854 db1_printf(("dispatch: bp->b_blkno = %ld\n",
1855 (long) bp->b_blkno));
1856 }
1857 queue->numOutstanding++;
1858 queue->last_deq_sector = req->sectorOffset;
1859 /* acc wouldn't have been let in if there were any pending
1860 * reqs at any other priority */
1861 queue->curPriority = req->priority;
1862
1863 db1_printf(("Going for %c to unit %d col %d\n",
1864 req->type, queue->raidPtr->raidid,
1865 queue->col));
1866 db1_printf(("sector %d count %d (%d bytes) %d\n",
1867 (int) req->sectorOffset, (int) req->numSector,
1868 (int) (req->numSector <<
1869 queue->raidPtr->logBytesPerSector),
1870 (int) queue->raidPtr->logBytesPerSector));
1871 VOP_STRATEGY(bp->b_vp, bp);
1872
1873 break;
1874
1875 default:
1876 panic("bad req->type in rf_DispatchKernelIO");
1877 }
1878 db1_printf(("Exiting from DispatchKernelIO\n"));
1879
1880 return (0);
1881 }
1882 /* this is the callback function associated with a I/O invoked from
1883 kernel code.
1884 */
1885 static void
1886 KernelWakeupFunc(struct buf *bp)
1887 {
1888 RF_DiskQueueData_t *req = NULL;
1889 RF_DiskQueue_t *queue;
1890 int s;
1891
1892 s = splbio();
1893 db1_printf(("recovering the request queue:\n"));
1894 req = bp->b_private;
1895
1896 queue = (RF_DiskQueue_t *) req->queue;
1897
1898 #if RF_ACC_TRACE > 0
1899 if (req->tracerec) {
1900 RF_ETIMER_STOP(req->tracerec->timer);
1901 RF_ETIMER_EVAL(req->tracerec->timer);
1902 RF_LOCK_MUTEX(rf_tracing_mutex);
1903 req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1904 req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1905 req->tracerec->num_phys_ios++;
1906 RF_UNLOCK_MUTEX(rf_tracing_mutex);
1907 }
1908 #endif
1909
1910 /* XXX Ok, let's get aggressive... If B_ERROR is set, let's go
1911 * ballistic, and mark the component as hosed... */
1912
1913 if (bp->b_flags & B_ERROR) {
1914 /* Mark the disk as dead */
1915 /* but only mark it once... */
1916 /* and only if it wouldn't leave this RAID set
1917 completely broken */
1918 if (((queue->raidPtr->Disks[queue->col].status ==
1919 rf_ds_optimal) ||
1920 (queue->raidPtr->Disks[queue->col].status ==
1921 rf_ds_used_spare)) &&
1922 (queue->raidPtr->numFailures <
1923 queue->raidPtr->Layout.map->faultsTolerated)) {
1924 printf("raid%d: IO Error. Marking %s as failed.\n",
1925 queue->raidPtr->raidid,
1926 queue->raidPtr->Disks[queue->col].devname);
1927 queue->raidPtr->Disks[queue->col].status =
1928 rf_ds_failed;
1929 queue->raidPtr->status = rf_rs_degraded;
1930 queue->raidPtr->numFailures++;
1931 queue->raidPtr->numNewFailures++;
1932 } else { /* Disk is already dead... */
1933 /* printf("Disk already marked as dead!\n"); */
1934 }
1935
1936 }
1937
1938 /* Fill in the error value */
1939
1940 req->error = (bp->b_flags & B_ERROR) ? bp->b_error : 0;
1941
1942 simple_lock(&queue->raidPtr->iodone_lock);
1943
1944 /* Drop this one on the "finished" queue... */
1945 TAILQ_INSERT_TAIL(&(queue->raidPtr->iodone), req, iodone_entries);
1946
1947 /* Let the raidio thread know there is work to be done. */
1948 wakeup(&(queue->raidPtr->iodone));
1949
1950 simple_unlock(&queue->raidPtr->iodone_lock);
1951
1952 splx(s);
1953 }
1954
1955
1956
1957 /*
1958 * initialize a buf structure for doing an I/O in the kernel.
1959 */
1960 static void
1961 InitBP(struct buf *bp, struct vnode *b_vp, unsigned rw_flag, dev_t dev,
1962 RF_SectorNum_t startSect, RF_SectorCount_t numSect, caddr_t bf,
1963 void (*cbFunc) (struct buf *), void *cbArg, int logBytesPerSector,
1964 struct proc *b_proc)
1965 {
1966 /* bp->b_flags = B_PHYS | rw_flag; */
1967 bp->b_flags = B_CALL | rw_flag; /* XXX need B_PHYS here too??? */
1968 bp->b_bcount = numSect << logBytesPerSector;
1969 bp->b_bufsize = bp->b_bcount;
1970 bp->b_error = 0;
1971 bp->b_dev = dev;
1972 bp->b_data = bf;
1973 bp->b_blkno = startSect;
1974 bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */
1975 if (bp->b_bcount == 0) {
1976 panic("bp->b_bcount is zero in InitBP!!");
1977 }
1978 bp->b_proc = b_proc;
1979 bp->b_iodone = cbFunc;
1980 bp->b_private = cbArg;
1981 bp->b_vp = b_vp;
1982 if ((bp->b_flags & B_READ) == 0) {
1983 bp->b_vp->v_numoutput++;
1984 }
1985
1986 }
1987
1988 static void
1989 raidgetdefaultlabel(RF_Raid_t *raidPtr, struct raid_softc *rs,
1990 struct disklabel *lp)
1991 {
1992 memset(lp, 0, sizeof(*lp));
1993
1994 /* fabricate a label... */
1995 lp->d_secperunit = raidPtr->totalSectors;
1996 lp->d_secsize = raidPtr->bytesPerSector;
1997 lp->d_nsectors = raidPtr->Layout.dataSectorsPerStripe;
1998 lp->d_ntracks = 4 * raidPtr->numCol;
1999 lp->d_ncylinders = raidPtr->totalSectors /
2000 (lp->d_nsectors * lp->d_ntracks);
2001 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
2002
2003 strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
2004 lp->d_type = DTYPE_RAID;
2005 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
2006 lp->d_rpm = 3600;
2007 lp->d_interleave = 1;
2008 lp->d_flags = 0;
2009
2010 lp->d_partitions[RAW_PART].p_offset = 0;
2011 lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors;
2012 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
2013 lp->d_npartitions = RAW_PART + 1;
2014
2015 lp->d_magic = DISKMAGIC;
2016 lp->d_magic2 = DISKMAGIC;
2017 lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
2018
2019 }
2020 /*
2021 * Read the disklabel from the raid device. If one is not present, fake one
2022 * up.
2023 */
2024 static void
2025 raidgetdisklabel(dev_t dev)
2026 {
2027 int unit = raidunit(dev);
2028 struct raid_softc *rs = &raid_softc[unit];
2029 const char *errstring;
2030 struct disklabel *lp = rs->sc_dkdev.dk_label;
2031 struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel;
2032 RF_Raid_t *raidPtr;
2033
2034 db1_printf(("Getting the disklabel...\n"));
2035
2036 memset(clp, 0, sizeof(*clp));
2037
2038 raidPtr = raidPtrs[unit];
2039
2040 raidgetdefaultlabel(raidPtr, rs, lp);
2041
2042 /*
2043 * Call the generic disklabel extraction routine.
2044 */
2045 errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
2046 rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
2047 if (errstring)
2048 raidmakedisklabel(rs);
2049 else {
2050 int i;
2051 struct partition *pp;
2052
2053 /*
2054 * Sanity check whether the found disklabel is valid.
2055 *
2056 * This is necessary since total size of the raid device
2057 * may vary when an interleave is changed even though exactly
2058 * same components are used, and old disklabel may used
2059 * if that is found.
2060 */
2061 if (lp->d_secperunit != rs->sc_size)
2062 printf("raid%d: WARNING: %s: "
2063 "total sector size in disklabel (%d) != "
2064 "the size of raid (%ld)\n", unit, rs->sc_xname,
2065 lp->d_secperunit, (long) rs->sc_size);
2066 for (i = 0; i < lp->d_npartitions; i++) {
2067 pp = &lp->d_partitions[i];
2068 if (pp->p_offset + pp->p_size > rs->sc_size)
2069 printf("raid%d: WARNING: %s: end of partition `%c' "
2070 "exceeds the size of raid (%ld)\n",
2071 unit, rs->sc_xname, 'a' + i, (long) rs->sc_size);
2072 }
2073 }
2074
2075 }
2076 /*
2077 * Take care of things one might want to take care of in the event
2078 * that a disklabel isn't present.
2079 */
2080 static void
2081 raidmakedisklabel(struct raid_softc *rs)
2082 {
2083 struct disklabel *lp = rs->sc_dkdev.dk_label;
2084 db1_printf(("Making a label..\n"));
2085
2086 /*
2087 * For historical reasons, if there's no disklabel present
2088 * the raw partition must be marked FS_BSDFFS.
2089 */
2090
2091 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
2092
2093 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
2094
2095 lp->d_checksum = dkcksum(lp);
2096 }
2097 /*
2098 * Lookup the provided name in the filesystem. If the file exists,
2099 * is a valid block device, and isn't being used by anyone else,
2100 * set *vpp to the file's vnode.
2101 * You'll find the original of this in ccd.c
2102 */
2103 int
2104 raidlookup(char *path, struct lwp *l, struct vnode **vpp)
2105 {
2106 struct nameidata nd;
2107 struct vnode *vp;
2108 struct vattr va;
2109 int error;
2110
2111 if (l == NULL)
2112 return(ESRCH); /* Is ESRCH the best choice? */
2113
2114 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, path, l);
2115 if ((error = vn_open(&nd, FREAD | FWRITE, 0)) != 0) {
2116 return (error);
2117 }
2118 vp = nd.ni_vp;
2119 if (vp->v_usecount > 1) {
2120 VOP_UNLOCK(vp, 0);
2121 (void) vn_close(vp, FREAD | FWRITE, l->l_cred, l);
2122 return (EBUSY);
2123 }
2124 if ((error = VOP_GETATTR(vp, &va, l->l_cred, l)) != 0) {
2125 VOP_UNLOCK(vp, 0);
2126 (void) vn_close(vp, FREAD | FWRITE, l->l_cred, l);
2127 return (error);
2128 }
2129 /* XXX: eventually we should handle VREG, too. */
2130 if (va.va_type != VBLK) {
2131 VOP_UNLOCK(vp, 0);
2132 (void) vn_close(vp, FREAD | FWRITE, l->l_cred, l);
2133 return (ENOTBLK);
2134 }
2135 VOP_UNLOCK(vp, 0);
2136 *vpp = vp;
2137 return (0);
2138 }
2139 /*
2140 * Wait interruptibly for an exclusive lock.
2141 *
2142 * XXX
2143 * Several drivers do this; it should be abstracted and made MP-safe.
2144 * (Hmm... where have we seen this warning before :-> GO )
2145 */
2146 static int
2147 raidlock(struct raid_softc *rs)
2148 {
2149 int error;
2150
2151 while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
2152 rs->sc_flags |= RAIDF_WANTED;
2153 if ((error =
2154 tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
2155 return (error);
2156 }
2157 rs->sc_flags |= RAIDF_LOCKED;
2158 return (0);
2159 }
2160 /*
2161 * Unlock and wake up any waiters.
2162 */
2163 static void
2164 raidunlock(struct raid_softc *rs)
2165 {
2166
2167 rs->sc_flags &= ~RAIDF_LOCKED;
2168 if ((rs->sc_flags & RAIDF_WANTED) != 0) {
2169 rs->sc_flags &= ~RAIDF_WANTED;
2170 wakeup(rs);
2171 }
2172 }
2173
2174
2175 #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */
2176 #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */
2177
2178 int
2179 raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter)
2180 {
2181 RF_ComponentLabel_t clabel;
2182 raidread_component_label(dev, b_vp, &clabel);
2183 clabel.mod_counter = mod_counter;
2184 clabel.clean = RF_RAID_CLEAN;
2185 raidwrite_component_label(dev, b_vp, &clabel);
2186 return(0);
2187 }
2188
2189
2190 int
2191 raidmarkdirty(dev_t dev, struct vnode *b_vp, int mod_counter)
2192 {
2193 RF_ComponentLabel_t clabel;
2194 raidread_component_label(dev, b_vp, &clabel);
2195 clabel.mod_counter = mod_counter;
2196 clabel.clean = RF_RAID_DIRTY;
2197 raidwrite_component_label(dev, b_vp, &clabel);
2198 return(0);
2199 }
2200
2201 /* ARGSUSED */
2202 int
2203 raidread_component_label(dev_t dev, struct vnode *b_vp,
2204 RF_ComponentLabel_t *clabel)
2205 {
2206 struct buf *bp;
2207 const struct bdevsw *bdev;
2208 int error;
2209
2210 /* XXX should probably ensure that we don't try to do this if
2211 someone has changed rf_protected_sectors. */
2212
2213 if (b_vp == NULL) {
2214 /* For whatever reason, this component is not valid.
2215 Don't try to read a component label from it. */
2216 return(EINVAL);
2217 }
2218
2219 /* get a block of the appropriate size... */
2220 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2221 bp->b_dev = dev;
2222
2223 /* get our ducks in a row for the read */
2224 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2225 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2226 bp->b_flags |= B_READ;
2227 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2228
2229 bdev = bdevsw_lookup(bp->b_dev);
2230 if (bdev == NULL)
2231 return (ENXIO);
2232 (*bdev->d_strategy)(bp);
2233
2234 error = biowait(bp);
2235
2236 if (!error) {
2237 memcpy(clabel, bp->b_data,
2238 sizeof(RF_ComponentLabel_t));
2239 }
2240
2241 brelse(bp);
2242 return(error);
2243 }
2244 /* ARGSUSED */
2245 int
2246 raidwrite_component_label(dev_t dev, struct vnode *b_vp,
2247 RF_ComponentLabel_t *clabel)
2248 {
2249 struct buf *bp;
2250 const struct bdevsw *bdev;
2251 int error;
2252
2253 /* get a block of the appropriate size... */
2254 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2255 bp->b_dev = dev;
2256
2257 /* get our ducks in a row for the write */
2258 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2259 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2260 bp->b_flags |= B_WRITE;
2261 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2262
2263 memset(bp->b_data, 0, RF_COMPONENT_INFO_SIZE );
2264
2265 memcpy(bp->b_data, clabel, sizeof(RF_ComponentLabel_t));
2266
2267 bdev = bdevsw_lookup(bp->b_dev);
2268 if (bdev == NULL)
2269 return (ENXIO);
2270 (*bdev->d_strategy)(bp);
2271 error = biowait(bp);
2272 brelse(bp);
2273 if (error) {
2274 #if 1
2275 printf("Failed to write RAID component info!\n");
2276 #endif
2277 }
2278
2279 return(error);
2280 }
2281
2282 void
2283 rf_markalldirty(RF_Raid_t *raidPtr)
2284 {
2285 RF_ComponentLabel_t clabel;
2286 int sparecol;
2287 int c;
2288 int j;
2289 int scol = -1;
2290
2291 raidPtr->mod_counter++;
2292 for (c = 0; c < raidPtr->numCol; c++) {
2293 /* we don't want to touch (at all) a disk that has
2294 failed */
2295 if (!RF_DEAD_DISK(raidPtr->Disks[c].status)) {
2296 raidread_component_label(
2297 raidPtr->Disks[c].dev,
2298 raidPtr->raid_cinfo[c].ci_vp,
2299 &clabel);
2300 if (clabel.status == rf_ds_spared) {
2301 /* XXX do something special...
2302 but whatever you do, don't
2303 try to access it!! */
2304 } else {
2305 raidmarkdirty(
2306 raidPtr->Disks[c].dev,
2307 raidPtr->raid_cinfo[c].ci_vp,
2308 raidPtr->mod_counter);
2309 }
2310 }
2311 }
2312
2313 for( c = 0; c < raidPtr->numSpare ; c++) {
2314 sparecol = raidPtr->numCol + c;
2315 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
2316 /*
2317
2318 we claim this disk is "optimal" if it's
2319 rf_ds_used_spare, as that means it should be
2320 directly substitutable for the disk it replaced.
2321 We note that too...
2322
2323 */
2324
2325 for(j=0;j<raidPtr->numCol;j++) {
2326 if (raidPtr->Disks[j].spareCol == sparecol) {
2327 scol = j;
2328 break;
2329 }
2330 }
2331
2332 raidread_component_label(
2333 raidPtr->Disks[sparecol].dev,
2334 raidPtr->raid_cinfo[sparecol].ci_vp,
2335 &clabel);
2336 /* make sure status is noted */
2337
2338 raid_init_component_label(raidPtr, &clabel);
2339
2340 clabel.row = 0;
2341 clabel.column = scol;
2342 /* Note: we *don't* change status from rf_ds_used_spare
2343 to rf_ds_optimal */
2344 /* clabel.status = rf_ds_optimal; */
2345
2346 raidmarkdirty(raidPtr->Disks[sparecol].dev,
2347 raidPtr->raid_cinfo[sparecol].ci_vp,
2348 raidPtr->mod_counter);
2349 }
2350 }
2351 }
2352
2353
2354 void
2355 rf_update_component_labels(RF_Raid_t *raidPtr, int final)
2356 {
2357 RF_ComponentLabel_t clabel;
2358 int sparecol;
2359 int c;
2360 int j;
2361 int scol;
2362
2363 scol = -1;
2364
2365 /* XXX should do extra checks to make sure things really are clean,
2366 rather than blindly setting the clean bit... */
2367
2368 raidPtr->mod_counter++;
2369
2370 for (c = 0; c < raidPtr->numCol; c++) {
2371 if (raidPtr->Disks[c].status == rf_ds_optimal) {
2372 raidread_component_label(
2373 raidPtr->Disks[c].dev,
2374 raidPtr->raid_cinfo[c].ci_vp,
2375 &clabel);
2376 /* make sure status is noted */
2377 clabel.status = rf_ds_optimal;
2378
2379 /* bump the counter */
2380 clabel.mod_counter = raidPtr->mod_counter;
2381
2382 raidwrite_component_label(
2383 raidPtr->Disks[c].dev,
2384 raidPtr->raid_cinfo[c].ci_vp,
2385 &clabel);
2386 if (final == RF_FINAL_COMPONENT_UPDATE) {
2387 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2388 raidmarkclean(
2389 raidPtr->Disks[c].dev,
2390 raidPtr->raid_cinfo[c].ci_vp,
2391 raidPtr->mod_counter);
2392 }
2393 }
2394 }
2395 /* else we don't touch it.. */
2396 }
2397
2398 for( c = 0; c < raidPtr->numSpare ; c++) {
2399 sparecol = raidPtr->numCol + c;
2400 /* Need to ensure that the reconstruct actually completed! */
2401 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
2402 /*
2403
2404 we claim this disk is "optimal" if it's
2405 rf_ds_used_spare, as that means it should be
2406 directly substitutable for the disk it replaced.
2407 We note that too...
2408
2409 */
2410
2411 for(j=0;j<raidPtr->numCol;j++) {
2412 if (raidPtr->Disks[j].spareCol == sparecol) {
2413 scol = j;
2414 break;
2415 }
2416 }
2417
2418 /* XXX shouldn't *really* need this... */
2419 raidread_component_label(
2420 raidPtr->Disks[sparecol].dev,
2421 raidPtr->raid_cinfo[sparecol].ci_vp,
2422 &clabel);
2423 /* make sure status is noted */
2424
2425 raid_init_component_label(raidPtr, &clabel);
2426
2427 clabel.mod_counter = raidPtr->mod_counter;
2428 clabel.column = scol;
2429 clabel.status = rf_ds_optimal;
2430
2431 raidwrite_component_label(
2432 raidPtr->Disks[sparecol].dev,
2433 raidPtr->raid_cinfo[sparecol].ci_vp,
2434 &clabel);
2435 if (final == RF_FINAL_COMPONENT_UPDATE) {
2436 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2437 raidmarkclean( raidPtr->Disks[sparecol].dev,
2438 raidPtr->raid_cinfo[sparecol].ci_vp,
2439 raidPtr->mod_counter);
2440 }
2441 }
2442 }
2443 }
2444 }
2445
2446 void
2447 rf_close_component(RF_Raid_t *raidPtr, struct vnode *vp, int auto_configured)
2448 {
2449 struct proc *p;
2450 struct lwp *l;
2451
2452 p = raidPtr->engine_thread;
2453 l = LIST_FIRST(&p->p_lwps);
2454
2455 if (vp != NULL) {
2456 if (auto_configured == 1) {
2457 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2458 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
2459 vput(vp);
2460
2461 } else {
2462 (void) vn_close(vp, FREAD | FWRITE, p->p_cred, l);
2463 }
2464 }
2465 }
2466
2467
2468 void
2469 rf_UnconfigureVnodes(RF_Raid_t *raidPtr)
2470 {
2471 int r,c;
2472 struct vnode *vp;
2473 int acd;
2474
2475
2476 /* We take this opportunity to close the vnodes like we should.. */
2477
2478 for (c = 0; c < raidPtr->numCol; c++) {
2479 vp = raidPtr->raid_cinfo[c].ci_vp;
2480 acd = raidPtr->Disks[c].auto_configured;
2481 rf_close_component(raidPtr, vp, acd);
2482 raidPtr->raid_cinfo[c].ci_vp = NULL;
2483 raidPtr->Disks[c].auto_configured = 0;
2484 }
2485
2486 for (r = 0; r < raidPtr->numSpare; r++) {
2487 vp = raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp;
2488 acd = raidPtr->Disks[raidPtr->numCol + r].auto_configured;
2489 rf_close_component(raidPtr, vp, acd);
2490 raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp = NULL;
2491 raidPtr->Disks[raidPtr->numCol + r].auto_configured = 0;
2492 }
2493 }
2494
2495
2496 void
2497 rf_ReconThread(struct rf_recon_req *req)
2498 {
2499 int s;
2500 RF_Raid_t *raidPtr;
2501
2502 s = splbio();
2503 raidPtr = (RF_Raid_t *) req->raidPtr;
2504 raidPtr->recon_in_progress = 1;
2505
2506 rf_FailDisk((RF_Raid_t *) req->raidPtr, req->col,
2507 ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
2508
2509 RF_Free(req, sizeof(*req));
2510
2511 raidPtr->recon_in_progress = 0;
2512 splx(s);
2513
2514 /* That's all... */
2515 kthread_exit(0); /* does not return */
2516 }
2517
2518 void
2519 rf_RewriteParityThread(RF_Raid_t *raidPtr)
2520 {
2521 int retcode;
2522 int s;
2523
2524 raidPtr->parity_rewrite_stripes_done = 0;
2525 raidPtr->parity_rewrite_in_progress = 1;
2526 s = splbio();
2527 retcode = rf_RewriteParity(raidPtr);
2528 splx(s);
2529 if (retcode) {
2530 printf("raid%d: Error re-writing parity!\n",raidPtr->raidid);
2531 } else {
2532 /* set the clean bit! If we shutdown correctly,
2533 the clean bit on each component label will get
2534 set */
2535 raidPtr->parity_good = RF_RAID_CLEAN;
2536 }
2537 raidPtr->parity_rewrite_in_progress = 0;
2538
2539 /* Anyone waiting for us to stop? If so, inform them... */
2540 if (raidPtr->waitShutdown) {
2541 wakeup(&raidPtr->parity_rewrite_in_progress);
2542 }
2543
2544 /* That's all... */
2545 kthread_exit(0); /* does not return */
2546 }
2547
2548
2549 void
2550 rf_CopybackThread(RF_Raid_t *raidPtr)
2551 {
2552 int s;
2553
2554 raidPtr->copyback_in_progress = 1;
2555 s = splbio();
2556 rf_CopybackReconstructedData(raidPtr);
2557 splx(s);
2558 raidPtr->copyback_in_progress = 0;
2559
2560 /* That's all... */
2561 kthread_exit(0); /* does not return */
2562 }
2563
2564
2565 void
2566 rf_ReconstructInPlaceThread(struct rf_recon_req *req)
2567 {
2568 int s;
2569 RF_Raid_t *raidPtr;
2570
2571 s = splbio();
2572 raidPtr = req->raidPtr;
2573 raidPtr->recon_in_progress = 1;
2574 rf_ReconstructInPlace(raidPtr, req->col);
2575 RF_Free(req, sizeof(*req));
2576 raidPtr->recon_in_progress = 0;
2577 splx(s);
2578
2579 /* That's all... */
2580 kthread_exit(0); /* does not return */
2581 }
2582
2583 RF_AutoConfig_t *
2584 rf_find_raid_components()
2585 {
2586 struct vnode *vp;
2587 struct disklabel label;
2588 struct device *dv;
2589 dev_t dev;
2590 int bmajor;
2591 int error;
2592 int i;
2593 int good_one;
2594 RF_ComponentLabel_t *clabel;
2595 RF_AutoConfig_t *ac_list;
2596 RF_AutoConfig_t *ac;
2597
2598
2599 /* initialize the AutoConfig list */
2600 ac_list = NULL;
2601
2602 /* we begin by trolling through *all* the devices on the system */
2603
2604 for (dv = alldevs.tqh_first; dv != NULL;
2605 dv = dv->dv_list.tqe_next) {
2606
2607 /* we are only interested in disks... */
2608 if (device_class(dv) != DV_DISK)
2609 continue;
2610
2611 /* we don't care about floppies... */
2612 if (device_is_a(dv, "fd")) {
2613 continue;
2614 }
2615
2616 /* we don't care about CD's... */
2617 if (device_is_a(dv, "cd")) {
2618 continue;
2619 }
2620
2621 /* hdfd is the Atari/Hades floppy driver */
2622 if (device_is_a(dv, "hdfd")) {
2623 continue;
2624 }
2625
2626 /* fdisa is the Atari/Milan floppy driver */
2627 if (device_is_a(dv, "fdisa")) {
2628 continue;
2629 }
2630
2631 /* need to find the device_name_to_block_device_major stuff */
2632 bmajor = devsw_name2blk(dv->dv_xname, NULL, 0);
2633
2634 /* get a vnode for the raw partition of this disk */
2635
2636 dev = MAKEDISKDEV(bmajor, device_unit(dv), RAW_PART);
2637 if (bdevvp(dev, &vp))
2638 panic("RAID can't alloc vnode");
2639
2640 error = VOP_OPEN(vp, FREAD, NOCRED, 0);
2641
2642 if (error) {
2643 /* "Who cares." Continue looking
2644 for something that exists*/
2645 vput(vp);
2646 continue;
2647 }
2648
2649 /* Ok, the disk exists. Go get the disklabel. */
2650 error = VOP_IOCTL(vp, DIOCGDINFO, &label, FREAD, NOCRED, 0);
2651 if (error) {
2652 /*
2653 * XXX can't happen - open() would
2654 * have errored out (or faked up one)
2655 */
2656 if (error != ENOTTY)
2657 printf("RAIDframe: can't get label for dev "
2658 "%s (%d)\n", dv->dv_xname, error);
2659 }
2660
2661 /* don't need this any more. We'll allocate it again
2662 a little later if we really do... */
2663 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2664 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
2665 vput(vp);
2666
2667 if (error)
2668 continue;
2669
2670 for (i=0; i < label.d_npartitions; i++) {
2671 /* We only support partitions marked as RAID */
2672 if (label.d_partitions[i].p_fstype != FS_RAID)
2673 continue;
2674
2675 dev = MAKEDISKDEV(bmajor, device_unit(dv), i);
2676 if (bdevvp(dev, &vp))
2677 panic("RAID can't alloc vnode");
2678
2679 error = VOP_OPEN(vp, FREAD, NOCRED, 0);
2680 if (error) {
2681 /* Whatever... */
2682 vput(vp);
2683 continue;
2684 }
2685
2686 good_one = 0;
2687
2688 clabel = (RF_ComponentLabel_t *)
2689 malloc(sizeof(RF_ComponentLabel_t),
2690 M_RAIDFRAME, M_NOWAIT);
2691 if (clabel == NULL) {
2692 while(ac_list) {
2693 ac = ac_list;
2694 if (ac->clabel)
2695 free(ac->clabel, M_RAIDFRAME);
2696 ac_list = ac_list->next;
2697 free(ac, M_RAIDFRAME);
2698 };
2699 printf("RAID auto config: out of memory!\n");
2700 return(NULL); /* XXX probably should panic? */
2701 }
2702
2703 if (!raidread_component_label(dev, vp, clabel)) {
2704 /* Got the label. Does it look reasonable? */
2705 if (rf_reasonable_label(clabel) &&
2706 (clabel->partitionSize <=
2707 label.d_partitions[i].p_size)) {
2708 #if DEBUG
2709 printf("Component on: %s%c: %d\n",
2710 dv->dv_xname, 'a'+i,
2711 label.d_partitions[i].p_size);
2712 rf_print_component_label(clabel);
2713 #endif
2714 /* if it's reasonable, add it,
2715 else ignore it. */
2716 ac = (RF_AutoConfig_t *)
2717 malloc(sizeof(RF_AutoConfig_t),
2718 M_RAIDFRAME,
2719 M_NOWAIT);
2720 if (ac == NULL) {
2721 /* XXX should panic?? */
2722 while(ac_list) {
2723 ac = ac_list;
2724 if (ac->clabel)
2725 free(ac->clabel,
2726 M_RAIDFRAME);
2727 ac_list = ac_list->next;
2728 free(ac, M_RAIDFRAME);
2729 }
2730 free(clabel, M_RAIDFRAME);
2731 return(NULL);
2732 }
2733
2734 snprintf(ac->devname,
2735 sizeof(ac->devname), "%s%c",
2736 dv->dv_xname, 'a'+i);
2737 ac->dev = dev;
2738 ac->vp = vp;
2739 ac->clabel = clabel;
2740 ac->next = ac_list;
2741 ac_list = ac;
2742 good_one = 1;
2743 }
2744 }
2745 if (!good_one) {
2746 /* cleanup */
2747 free(clabel, M_RAIDFRAME);
2748 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2749 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
2750 vput(vp);
2751 }
2752 }
2753 }
2754 return(ac_list);
2755 }
2756
2757 static int
2758 rf_reasonable_label(RF_ComponentLabel_t *clabel)
2759 {
2760
2761 if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) ||
2762 (clabel->version==RF_COMPONENT_LABEL_VERSION)) &&
2763 ((clabel->clean == RF_RAID_CLEAN) ||
2764 (clabel->clean == RF_RAID_DIRTY)) &&
2765 clabel->row >=0 &&
2766 clabel->column >= 0 &&
2767 clabel->num_rows > 0 &&
2768 clabel->num_columns > 0 &&
2769 clabel->row < clabel->num_rows &&
2770 clabel->column < clabel->num_columns &&
2771 clabel->blockSize > 0 &&
2772 clabel->numBlocks > 0) {
2773 /* label looks reasonable enough... */
2774 return(1);
2775 }
2776 return(0);
2777 }
2778
2779
2780 #if DEBUG
2781 void
2782 rf_print_component_label(RF_ComponentLabel_t *clabel)
2783 {
2784 printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
2785 clabel->row, clabel->column,
2786 clabel->num_rows, clabel->num_columns);
2787 printf(" Version: %d Serial Number: %d Mod Counter: %d\n",
2788 clabel->version, clabel->serial_number,
2789 clabel->mod_counter);
2790 printf(" Clean: %s Status: %d\n",
2791 clabel->clean ? "Yes" : "No", clabel->status );
2792 printf(" sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
2793 clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
2794 printf(" RAID Level: %c blocksize: %d numBlocks: %d\n",
2795 (char) clabel->parityConfig, clabel->blockSize,
2796 clabel->numBlocks);
2797 printf(" Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No" );
2798 printf(" Contains root partition: %s\n",
2799 clabel->root_partition ? "Yes" : "No" );
2800 printf(" Last configured as: raid%d\n", clabel->last_unit );
2801 #if 0
2802 printf(" Config order: %d\n", clabel->config_order);
2803 #endif
2804
2805 }
2806 #endif
2807
2808 RF_ConfigSet_t *
2809 rf_create_auto_sets(RF_AutoConfig_t *ac_list)
2810 {
2811 RF_AutoConfig_t *ac;
2812 RF_ConfigSet_t *config_sets;
2813 RF_ConfigSet_t *cset;
2814 RF_AutoConfig_t *ac_next;
2815
2816
2817 config_sets = NULL;
2818
2819 /* Go through the AutoConfig list, and figure out which components
2820 belong to what sets. */
2821 ac = ac_list;
2822 while(ac!=NULL) {
2823 /* we're going to putz with ac->next, so save it here
2824 for use at the end of the loop */
2825 ac_next = ac->next;
2826
2827 if (config_sets == NULL) {
2828 /* will need at least this one... */
2829 config_sets = (RF_ConfigSet_t *)
2830 malloc(sizeof(RF_ConfigSet_t),
2831 M_RAIDFRAME, M_NOWAIT);
2832 if (config_sets == NULL) {
2833 panic("rf_create_auto_sets: No memory!");
2834 }
2835 /* this one is easy :) */
2836 config_sets->ac = ac;
2837 config_sets->next = NULL;
2838 config_sets->rootable = 0;
2839 ac->next = NULL;
2840 } else {
2841 /* which set does this component fit into? */
2842 cset = config_sets;
2843 while(cset!=NULL) {
2844 if (rf_does_it_fit(cset, ac)) {
2845 /* looks like it matches... */
2846 ac->next = cset->ac;
2847 cset->ac = ac;
2848 break;
2849 }
2850 cset = cset->next;
2851 }
2852 if (cset==NULL) {
2853 /* didn't find a match above... new set..*/
2854 cset = (RF_ConfigSet_t *)
2855 malloc(sizeof(RF_ConfigSet_t),
2856 M_RAIDFRAME, M_NOWAIT);
2857 if (cset == NULL) {
2858 panic("rf_create_auto_sets: No memory!");
2859 }
2860 cset->ac = ac;
2861 ac->next = NULL;
2862 cset->next = config_sets;
2863 cset->rootable = 0;
2864 config_sets = cset;
2865 }
2866 }
2867 ac = ac_next;
2868 }
2869
2870
2871 return(config_sets);
2872 }
2873
2874 static int
2875 rf_does_it_fit(RF_ConfigSet_t *cset, RF_AutoConfig_t *ac)
2876 {
2877 RF_ComponentLabel_t *clabel1, *clabel2;
2878
2879 /* If this one matches the *first* one in the set, that's good
2880 enough, since the other members of the set would have been
2881 through here too... */
2882 /* note that we are not checking partitionSize here..
2883
2884 Note that we are also not checking the mod_counters here.
2885 If everything else matches execpt the mod_counter, that's
2886 good enough for this test. We will deal with the mod_counters
2887 a little later in the autoconfiguration process.
2888
2889 (clabel1->mod_counter == clabel2->mod_counter) &&
2890
2891 The reason we don't check for this is that failed disks
2892 will have lower modification counts. If those disks are
2893 not added to the set they used to belong to, then they will
2894 form their own set, which may result in 2 different sets,
2895 for example, competing to be configured at raid0, and
2896 perhaps competing to be the root filesystem set. If the
2897 wrong ones get configured, or both attempt to become /,
2898 weird behaviour and or serious lossage will occur. Thus we
2899 need to bring them into the fold here, and kick them out at
2900 a later point.
2901
2902 */
2903
2904 clabel1 = cset->ac->clabel;
2905 clabel2 = ac->clabel;
2906 if ((clabel1->version == clabel2->version) &&
2907 (clabel1->serial_number == clabel2->serial_number) &&
2908 (clabel1->num_rows == clabel2->num_rows) &&
2909 (clabel1->num_columns == clabel2->num_columns) &&
2910 (clabel1->sectPerSU == clabel2->sectPerSU) &&
2911 (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
2912 (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
2913 (clabel1->parityConfig == clabel2->parityConfig) &&
2914 (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
2915 (clabel1->blockSize == clabel2->blockSize) &&
2916 (clabel1->numBlocks == clabel2->numBlocks) &&
2917 (clabel1->autoconfigure == clabel2->autoconfigure) &&
2918 (clabel1->root_partition == clabel2->root_partition) &&
2919 (clabel1->last_unit == clabel2->last_unit) &&
2920 (clabel1->config_order == clabel2->config_order)) {
2921 /* if it get's here, it almost *has* to be a match */
2922 } else {
2923 /* it's not consistent with somebody in the set..
2924 punt */
2925 return(0);
2926 }
2927 /* all was fine.. it must fit... */
2928 return(1);
2929 }
2930
2931 int
2932 rf_have_enough_components(RF_ConfigSet_t *cset)
2933 {
2934 RF_AutoConfig_t *ac;
2935 RF_AutoConfig_t *auto_config;
2936 RF_ComponentLabel_t *clabel;
2937 int c;
2938 int num_cols;
2939 int num_missing;
2940 int mod_counter;
2941 int mod_counter_found;
2942 int even_pair_failed;
2943 char parity_type;
2944
2945
2946 /* check to see that we have enough 'live' components
2947 of this set. If so, we can configure it if necessary */
2948
2949 num_cols = cset->ac->clabel->num_columns;
2950 parity_type = cset->ac->clabel->parityConfig;
2951
2952 /* XXX Check for duplicate components!?!?!? */
2953
2954 /* Determine what the mod_counter is supposed to be for this set. */
2955
2956 mod_counter_found = 0;
2957 mod_counter = 0;
2958 ac = cset->ac;
2959 while(ac!=NULL) {
2960 if (mod_counter_found==0) {
2961 mod_counter = ac->clabel->mod_counter;
2962 mod_counter_found = 1;
2963 } else {
2964 if (ac->clabel->mod_counter > mod_counter) {
2965 mod_counter = ac->clabel->mod_counter;
2966 }
2967 }
2968 ac = ac->next;
2969 }
2970
2971 num_missing = 0;
2972 auto_config = cset->ac;
2973
2974 even_pair_failed = 0;
2975 for(c=0; c<num_cols; c++) {
2976 ac = auto_config;
2977 while(ac!=NULL) {
2978 if ((ac->clabel->column == c) &&
2979 (ac->clabel->mod_counter == mod_counter)) {
2980 /* it's this one... */
2981 #if DEBUG
2982 printf("Found: %s at %d\n",
2983 ac->devname,c);
2984 #endif
2985 break;
2986 }
2987 ac=ac->next;
2988 }
2989 if (ac==NULL) {
2990 /* Didn't find one here! */
2991 /* special case for RAID 1, especially
2992 where there are more than 2
2993 components (where RAIDframe treats
2994 things a little differently :( ) */
2995 if (parity_type == '1') {
2996 if (c%2 == 0) { /* even component */
2997 even_pair_failed = 1;
2998 } else { /* odd component. If
2999 we're failed, and
3000 so is the even
3001 component, it's
3002 "Good Night, Charlie" */
3003 if (even_pair_failed == 1) {
3004 return(0);
3005 }
3006 }
3007 } else {
3008 /* normal accounting */
3009 num_missing++;
3010 }
3011 }
3012 if ((parity_type == '1') && (c%2 == 1)) {
3013 /* Just did an even component, and we didn't
3014 bail.. reset the even_pair_failed flag,
3015 and go on to the next component.... */
3016 even_pair_failed = 0;
3017 }
3018 }
3019
3020 clabel = cset->ac->clabel;
3021
3022 if (((clabel->parityConfig == '0') && (num_missing > 0)) ||
3023 ((clabel->parityConfig == '4') && (num_missing > 1)) ||
3024 ((clabel->parityConfig == '5') && (num_missing > 1))) {
3025 /* XXX this needs to be made *much* more general */
3026 /* Too many failures */
3027 return(0);
3028 }
3029 /* otherwise, all is well, and we've got enough to take a kick
3030 at autoconfiguring this set */
3031 return(1);
3032 }
3033
3034 void
3035 rf_create_configuration(RF_AutoConfig_t *ac, RF_Config_t *config,
3036 RF_Raid_t *raidPtr)
3037 {
3038 RF_ComponentLabel_t *clabel;
3039 int i;
3040
3041 clabel = ac->clabel;
3042
3043 /* 1. Fill in the common stuff */
3044 config->numRow = clabel->num_rows = 1;
3045 config->numCol = clabel->num_columns;
3046 config->numSpare = 0; /* XXX should this be set here? */
3047 config->sectPerSU = clabel->sectPerSU;
3048 config->SUsPerPU = clabel->SUsPerPU;
3049 config->SUsPerRU = clabel->SUsPerRU;
3050 config->parityConfig = clabel->parityConfig;
3051 /* XXX... */
3052 strcpy(config->diskQueueType,"fifo");
3053 config->maxOutstandingDiskReqs = clabel->maxOutstanding;
3054 config->layoutSpecificSize = 0; /* XXX ?? */
3055
3056 while(ac!=NULL) {
3057 /* row/col values will be in range due to the checks
3058 in reasonable_label() */
3059 strcpy(config->devnames[0][ac->clabel->column],
3060 ac->devname);
3061 ac = ac->next;
3062 }
3063
3064 for(i=0;i<RF_MAXDBGV;i++) {
3065 config->debugVars[i][0] = 0;
3066 }
3067 }
3068
3069 int
3070 rf_set_autoconfig(RF_Raid_t *raidPtr, int new_value)
3071 {
3072 RF_ComponentLabel_t clabel;
3073 struct vnode *vp;
3074 dev_t dev;
3075 int column;
3076 int sparecol;
3077
3078 raidPtr->autoconfigure = new_value;
3079
3080 for(column=0; column<raidPtr->numCol; column++) {
3081 if (raidPtr->Disks[column].status == rf_ds_optimal) {
3082 dev = raidPtr->Disks[column].dev;
3083 vp = raidPtr->raid_cinfo[column].ci_vp;
3084 raidread_component_label(dev, vp, &clabel);
3085 clabel.autoconfigure = new_value;
3086 raidwrite_component_label(dev, vp, &clabel);
3087 }
3088 }
3089 for(column = 0; column < raidPtr->numSpare ; column++) {
3090 sparecol = raidPtr->numCol + column;
3091 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3092 dev = raidPtr->Disks[sparecol].dev;
3093 vp = raidPtr->raid_cinfo[sparecol].ci_vp;
3094 raidread_component_label(dev, vp, &clabel);
3095 clabel.autoconfigure = new_value;
3096 raidwrite_component_label(dev, vp, &clabel);
3097 }
3098 }
3099 return(new_value);
3100 }
3101
3102 int
3103 rf_set_rootpartition(RF_Raid_t *raidPtr, int new_value)
3104 {
3105 RF_ComponentLabel_t clabel;
3106 struct vnode *vp;
3107 dev_t dev;
3108 int column;
3109 int sparecol;
3110
3111 raidPtr->root_partition = new_value;
3112 for(column=0; column<raidPtr->numCol; column++) {
3113 if (raidPtr->Disks[column].status == rf_ds_optimal) {
3114 dev = raidPtr->Disks[column].dev;
3115 vp = raidPtr->raid_cinfo[column].ci_vp;
3116 raidread_component_label(dev, vp, &clabel);
3117 clabel.root_partition = new_value;
3118 raidwrite_component_label(dev, vp, &clabel);
3119 }
3120 }
3121 for(column = 0; column < raidPtr->numSpare ; column++) {
3122 sparecol = raidPtr->numCol + column;
3123 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3124 dev = raidPtr->Disks[sparecol].dev;
3125 vp = raidPtr->raid_cinfo[sparecol].ci_vp;
3126 raidread_component_label(dev, vp, &clabel);
3127 clabel.root_partition = new_value;
3128 raidwrite_component_label(dev, vp, &clabel);
3129 }
3130 }
3131 return(new_value);
3132 }
3133
3134 void
3135 rf_release_all_vps(RF_ConfigSet_t *cset)
3136 {
3137 RF_AutoConfig_t *ac;
3138
3139 ac = cset->ac;
3140 while(ac!=NULL) {
3141 /* Close the vp, and give it back */
3142 if (ac->vp) {
3143 vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY);
3144 VOP_CLOSE(ac->vp, FREAD, NOCRED, 0);
3145 vput(ac->vp);
3146 ac->vp = NULL;
3147 }
3148 ac = ac->next;
3149 }
3150 }
3151
3152
3153 void
3154 rf_cleanup_config_set(RF_ConfigSet_t *cset)
3155 {
3156 RF_AutoConfig_t *ac;
3157 RF_AutoConfig_t *next_ac;
3158
3159 ac = cset->ac;
3160 while(ac!=NULL) {
3161 next_ac = ac->next;
3162 /* nuke the label */
3163 free(ac->clabel, M_RAIDFRAME);
3164 /* cleanup the config structure */
3165 free(ac, M_RAIDFRAME);
3166 /* "next.." */
3167 ac = next_ac;
3168 }
3169 /* and, finally, nuke the config set */
3170 free(cset, M_RAIDFRAME);
3171 }
3172
3173
3174 void
3175 raid_init_component_label(RF_Raid_t *raidPtr, RF_ComponentLabel_t *clabel)
3176 {
3177 /* current version number */
3178 clabel->version = RF_COMPONENT_LABEL_VERSION;
3179 clabel->serial_number = raidPtr->serial_number;
3180 clabel->mod_counter = raidPtr->mod_counter;
3181 clabel->num_rows = 1;
3182 clabel->num_columns = raidPtr->numCol;
3183 clabel->clean = RF_RAID_DIRTY; /* not clean */
3184 clabel->status = rf_ds_optimal; /* "It's good!" */
3185
3186 clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
3187 clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
3188 clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
3189
3190 clabel->blockSize = raidPtr->bytesPerSector;
3191 clabel->numBlocks = raidPtr->sectorsPerDisk;
3192
3193 /* XXX not portable */
3194 clabel->parityConfig = raidPtr->Layout.map->parityConfig;
3195 clabel->maxOutstanding = raidPtr->maxOutstanding;
3196 clabel->autoconfigure = raidPtr->autoconfigure;
3197 clabel->root_partition = raidPtr->root_partition;
3198 clabel->last_unit = raidPtr->raidid;
3199 clabel->config_order = raidPtr->config_order;
3200 }
3201
3202 int
3203 rf_auto_config_set(RF_ConfigSet_t *cset, int *unit)
3204 {
3205 RF_Raid_t *raidPtr;
3206 RF_Config_t *config;
3207 int raidID;
3208 int retcode;
3209
3210 #if DEBUG
3211 printf("RAID autoconfigure\n");
3212 #endif
3213
3214 retcode = 0;
3215 *unit = -1;
3216
3217 /* 1. Create a config structure */
3218
3219 config = (RF_Config_t *)malloc(sizeof(RF_Config_t),
3220 M_RAIDFRAME,
3221 M_NOWAIT);
3222 if (config==NULL) {
3223 printf("Out of mem!?!?\n");
3224 /* XXX do something more intelligent here. */
3225 return(1);
3226 }
3227
3228 memset(config, 0, sizeof(RF_Config_t));
3229
3230 /*
3231 2. Figure out what RAID ID this one is supposed to live at
3232 See if we can get the same RAID dev that it was configured
3233 on last time..
3234 */
3235
3236 raidID = cset->ac->clabel->last_unit;
3237 if ((raidID < 0) || (raidID >= numraid)) {
3238 /* let's not wander off into lala land. */
3239 raidID = numraid - 1;
3240 }
3241 if (raidPtrs[raidID]->valid != 0) {
3242
3243 /*
3244 Nope... Go looking for an alternative...
3245 Start high so we don't immediately use raid0 if that's
3246 not taken.
3247 */
3248
3249 for(raidID = numraid - 1; raidID >= 0; raidID--) {
3250 if (raidPtrs[raidID]->valid == 0) {
3251 /* can use this one! */
3252 break;
3253 }
3254 }
3255 }
3256
3257 if (raidID < 0) {
3258 /* punt... */
3259 printf("Unable to auto configure this set!\n");
3260 printf("(Out of RAID devs!)\n");
3261 free(config, M_RAIDFRAME);
3262 return(1);
3263 }
3264
3265 #if DEBUG
3266 printf("Configuring raid%d:\n",raidID);
3267 #endif
3268
3269 raidPtr = raidPtrs[raidID];
3270
3271 /* XXX all this stuff should be done SOMEWHERE ELSE! */
3272 raidPtr->raidid = raidID;
3273 raidPtr->openings = RAIDOUTSTANDING;
3274
3275 /* 3. Build the configuration structure */
3276 rf_create_configuration(cset->ac, config, raidPtr);
3277
3278 /* 4. Do the configuration */
3279 retcode = rf_Configure(raidPtr, config, cset->ac);
3280
3281 if (retcode == 0) {
3282
3283 raidinit(raidPtrs[raidID]);
3284
3285 rf_markalldirty(raidPtrs[raidID]);
3286 raidPtrs[raidID]->autoconfigure = 1; /* XXX do this here? */
3287 if (cset->ac->clabel->root_partition==1) {
3288 /* everything configured just fine. Make a note
3289 that this set is eligible to be root. */
3290 cset->rootable = 1;
3291 /* XXX do this here? */
3292 raidPtrs[raidID]->root_partition = 1;
3293 }
3294 }
3295
3296 /* 5. Cleanup */
3297 free(config, M_RAIDFRAME);
3298
3299 *unit = raidID;
3300 return(retcode);
3301 }
3302
3303 void
3304 rf_disk_unbusy(RF_RaidAccessDesc_t *desc)
3305 {
3306 struct buf *bp;
3307
3308 bp = (struct buf *)desc->bp;
3309 disk_unbusy(&raid_softc[desc->raidPtr->raidid].sc_dkdev,
3310 (bp->b_bcount - bp->b_resid), (bp->b_flags & B_READ));
3311 }
3312
3313 void
3314 rf_pool_init(struct pool *p, size_t size, const char *w_chan,
3315 size_t xmin, size_t xmax)
3316 {
3317 pool_init(p, size, 0, 0, 0, w_chan, NULL);
3318 pool_sethiwat(p, xmax);
3319 pool_prime(p, xmin);
3320 pool_setlowat(p, xmin);
3321 }
3322
3323 /*
3324 * rf_buf_queue_check(int raidid) -- looks into the buf_queue to see
3325 * if there is IO pending and if that IO could possibly be done for a
3326 * given RAID set. Returns 0 if IO is waiting and can be done, 1
3327 * otherwise.
3328 *
3329 */
3330
3331 int
3332 rf_buf_queue_check(int raidid)
3333 {
3334 if ((BUFQ_PEEK(raid_softc[raidid].buf_queue) != NULL) &&
3335 raidPtrs[raidid]->openings > 0) {
3336 /* there is work to do */
3337 return 0;
3338 }
3339 /* default is nothing to do */
3340 return 1;
3341 }
3342