rf_netbsdkintf.c revision 1.213 1 /* $NetBSD: rf_netbsdkintf.c,v 1.213 2006/08/27 05:07:13 christos Exp $ */
2 /*-
3 * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
4 * All rights reserved.
5 *
6 * This code is derived from software contributed to The NetBSD Foundation
7 * by Greg Oster; Jason R. Thorpe.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed by the NetBSD
20 * Foundation, Inc. and its contributors.
21 * 4. Neither the name of The NetBSD Foundation nor the names of its
22 * contributors may be used to endorse or promote products derived
23 * from this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
26 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
36 */
37
38 /*
39 * Copyright (c) 1990, 1993
40 * The Regents of the University of California. All rights reserved.
41 *
42 * This code is derived from software contributed to Berkeley by
43 * the Systems Programming Group of the University of Utah Computer
44 * Science Department.
45 *
46 * Redistribution and use in source and binary forms, with or without
47 * modification, are permitted provided that the following conditions
48 * are met:
49 * 1. Redistributions of source code must retain the above copyright
50 * notice, this list of conditions and the following disclaimer.
51 * 2. Redistributions in binary form must reproduce the above copyright
52 * notice, this list of conditions and the following disclaimer in the
53 * documentation and/or other materials provided with the distribution.
54 * 3. Neither the name of the University nor the names of its contributors
55 * may be used to endorse or promote products derived from this software
56 * without specific prior written permission.
57 *
58 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
59 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
60 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
61 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
62 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
63 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
64 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
65 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
66 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
67 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
68 * SUCH DAMAGE.
69 *
70 * from: Utah $Hdr: cd.c 1.6 90/11/28$
71 *
72 * @(#)cd.c 8.2 (Berkeley) 11/16/93
73 */
74
75 /*
76 * Copyright (c) 1988 University of Utah.
77 *
78 * This code is derived from software contributed to Berkeley by
79 * the Systems Programming Group of the University of Utah Computer
80 * Science Department.
81 *
82 * Redistribution and use in source and binary forms, with or without
83 * modification, are permitted provided that the following conditions
84 * are met:
85 * 1. Redistributions of source code must retain the above copyright
86 * notice, this list of conditions and the following disclaimer.
87 * 2. Redistributions in binary form must reproduce the above copyright
88 * notice, this list of conditions and the following disclaimer in the
89 * documentation and/or other materials provided with the distribution.
90 * 3. All advertising materials mentioning features or use of this software
91 * must display the following acknowledgement:
92 * This product includes software developed by the University of
93 * California, Berkeley and its contributors.
94 * 4. Neither the name of the University nor the names of its contributors
95 * may be used to endorse or promote products derived from this software
96 * without specific prior written permission.
97 *
98 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
99 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
100 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
101 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
102 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
103 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
104 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
105 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
106 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
107 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
108 * SUCH DAMAGE.
109 *
110 * from: Utah $Hdr: cd.c 1.6 90/11/28$
111 *
112 * @(#)cd.c 8.2 (Berkeley) 11/16/93
113 */
114
115 /*
116 * Copyright (c) 1995 Carnegie-Mellon University.
117 * All rights reserved.
118 *
119 * Authors: Mark Holland, Jim Zelenka
120 *
121 * Permission to use, copy, modify and distribute this software and
122 * its documentation is hereby granted, provided that both the copyright
123 * notice and this permission notice appear in all copies of the
124 * software, derivative works or modified versions, and any portions
125 * thereof, and that both notices appear in supporting documentation.
126 *
127 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
128 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
129 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
130 *
131 * Carnegie Mellon requests users of this software to return to
132 *
133 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
134 * School of Computer Science
135 * Carnegie Mellon University
136 * Pittsburgh PA 15213-3890
137 *
138 * any improvements or extensions that they make and grant Carnegie the
139 * rights to redistribute these changes.
140 */
141
142 /***********************************************************
143 *
144 * rf_kintf.c -- the kernel interface routines for RAIDframe
145 *
146 ***********************************************************/
147
148 #include <sys/cdefs.h>
149 __KERNEL_RCSID(0, "$NetBSD: rf_netbsdkintf.c,v 1.213 2006/08/27 05:07:13 christos Exp $");
150
151 #include <sys/param.h>
152 #include <sys/errno.h>
153 #include <sys/pool.h>
154 #include <sys/proc.h>
155 #include <sys/queue.h>
156 #include <sys/disk.h>
157 #include <sys/device.h>
158 #include <sys/stat.h>
159 #include <sys/ioctl.h>
160 #include <sys/fcntl.h>
161 #include <sys/systm.h>
162 #include <sys/namei.h>
163 #include <sys/vnode.h>
164 #include <sys/disklabel.h>
165 #include <sys/conf.h>
166 #include <sys/lock.h>
167 #include <sys/buf.h>
168 #include <sys/bufq.h>
169 #include <sys/user.h>
170 #include <sys/reboot.h>
171 #include <sys/kauth.h>
172
173 #include <dev/raidframe/raidframevar.h>
174 #include <dev/raidframe/raidframeio.h>
175 #include "raid.h"
176 #include "opt_raid_autoconfig.h"
177 #include "rf_raid.h"
178 #include "rf_copyback.h"
179 #include "rf_dag.h"
180 #include "rf_dagflags.h"
181 #include "rf_desc.h"
182 #include "rf_diskqueue.h"
183 #include "rf_etimer.h"
184 #include "rf_general.h"
185 #include "rf_kintf.h"
186 #include "rf_options.h"
187 #include "rf_driver.h"
188 #include "rf_parityscan.h"
189 #include "rf_threadstuff.h"
190
191 #ifdef DEBUG
192 int rf_kdebug_level = 0;
193 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
194 #else /* DEBUG */
195 #define db1_printf(a) { }
196 #endif /* DEBUG */
197
198 static RF_Raid_t **raidPtrs; /* global raid device descriptors */
199
200 RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex)
201
202 static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a
203 * spare table */
204 static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from
205 * installation process */
206
207 MALLOC_DEFINE(M_RAIDFRAME, "RAIDframe", "RAIDframe structures");
208
209 /* prototypes */
210 static void KernelWakeupFunc(struct buf *);
211 static void InitBP(struct buf *, struct vnode *, unsigned,
212 dev_t, RF_SectorNum_t, RF_SectorCount_t, caddr_t, void (*) (struct buf *),
213 void *, int, struct proc *);
214 static void raidinit(RF_Raid_t *);
215
216 void raidattach(int);
217
218 dev_type_open(raidopen);
219 dev_type_close(raidclose);
220 dev_type_read(raidread);
221 dev_type_write(raidwrite);
222 dev_type_ioctl(raidioctl);
223 dev_type_strategy(raidstrategy);
224 dev_type_dump(raiddump);
225 dev_type_size(raidsize);
226
227 const struct bdevsw raid_bdevsw = {
228 raidopen, raidclose, raidstrategy, raidioctl,
229 raiddump, raidsize, D_DISK
230 };
231
232 const struct cdevsw raid_cdevsw = {
233 raidopen, raidclose, raidread, raidwrite, raidioctl,
234 nostop, notty, nopoll, nommap, nokqfilter, D_DISK
235 };
236
237 /* XXX Not sure if the following should be replacing the raidPtrs above,
238 or if it should be used in conjunction with that...
239 */
240
241 struct raid_softc {
242 int sc_flags; /* flags */
243 int sc_cflags; /* configuration flags */
244 uint64_t sc_size; /* size of the raid device */
245 char sc_xname[20]; /* XXX external name */
246 struct disk sc_dkdev; /* generic disk device info */
247 struct bufq_state *buf_queue; /* used for the device queue */
248 };
249 /* sc_flags */
250 #define RAIDF_INITED 0x01 /* unit has been initialized */
251 #define RAIDF_WLABEL 0x02 /* label area is writable */
252 #define RAIDF_LABELLING 0x04 /* unit is currently being labelled */
253 #define RAIDF_WANTED 0x40 /* someone is waiting to obtain a lock */
254 #define RAIDF_LOCKED 0x80 /* unit is locked */
255
256 #define raidunit(x) DISKUNIT(x)
257 int numraid = 0;
258
259 extern struct cfdriver raid_cd;
260
261 /*
262 * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
263 * Be aware that large numbers can allow the driver to consume a lot of
264 * kernel memory, especially on writes, and in degraded mode reads.
265 *
266 * For example: with a stripe width of 64 blocks (32k) and 5 disks,
267 * a single 64K write will typically require 64K for the old data,
268 * 64K for the old parity, and 64K for the new parity, for a total
269 * of 192K (if the parity buffer is not re-used immediately).
270 * Even it if is used immediately, that's still 128K, which when multiplied
271 * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
272 *
273 * Now in degraded mode, for example, a 64K read on the above setup may
274 * require data reconstruction, which will require *all* of the 4 remaining
275 * disks to participate -- 4 * 32K/disk == 128K again.
276 */
277
278 #ifndef RAIDOUTSTANDING
279 #define RAIDOUTSTANDING 6
280 #endif
281
282 #define RAIDLABELDEV(dev) \
283 (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
284
285 /* declared here, and made public, for the benefit of KVM stuff.. */
286 struct raid_softc *raid_softc;
287
288 static void raidgetdefaultlabel(RF_Raid_t *, struct raid_softc *,
289 struct disklabel *);
290 static void raidgetdisklabel(dev_t);
291 static void raidmakedisklabel(struct raid_softc *);
292
293 static int raidlock(struct raid_softc *);
294 static void raidunlock(struct raid_softc *);
295
296 static void rf_markalldirty(RF_Raid_t *);
297
298 struct device *raidrootdev;
299
300 void rf_ReconThread(struct rf_recon_req *);
301 void rf_RewriteParityThread(RF_Raid_t *raidPtr);
302 void rf_CopybackThread(RF_Raid_t *raidPtr);
303 void rf_ReconstructInPlaceThread(struct rf_recon_req *);
304 int rf_autoconfig(struct device *self);
305 void rf_buildroothack(RF_ConfigSet_t *);
306
307 RF_AutoConfig_t *rf_find_raid_components(void);
308 RF_ConfigSet_t *rf_create_auto_sets(RF_AutoConfig_t *);
309 static int rf_does_it_fit(RF_ConfigSet_t *,RF_AutoConfig_t *);
310 static int rf_reasonable_label(RF_ComponentLabel_t *);
311 void rf_create_configuration(RF_AutoConfig_t *,RF_Config_t *, RF_Raid_t *);
312 int rf_set_autoconfig(RF_Raid_t *, int);
313 int rf_set_rootpartition(RF_Raid_t *, int);
314 void rf_release_all_vps(RF_ConfigSet_t *);
315 void rf_cleanup_config_set(RF_ConfigSet_t *);
316 int rf_have_enough_components(RF_ConfigSet_t *);
317 int rf_auto_config_set(RF_ConfigSet_t *, int *);
318
319 static int raidautoconfig = 0; /* Debugging, mostly. Set to 0 to not
320 allow autoconfig to take place.
321 Note that this is overridden by having
322 RAID_AUTOCONFIG as an option in the
323 kernel config file. */
324
325 struct RF_Pools_s rf_pools;
326
327 void
328 raidattach(int num)
329 {
330 int raidID;
331 int i, rc;
332
333 #ifdef DEBUG
334 printf("raidattach: Asked for %d units\n", num);
335 #endif
336
337 if (num <= 0) {
338 #ifdef DIAGNOSTIC
339 panic("raidattach: count <= 0");
340 #endif
341 return;
342 }
343 /* This is where all the initialization stuff gets done. */
344
345 numraid = num;
346
347 /* Make some space for requested number of units... */
348
349 RF_Malloc(raidPtrs, num * sizeof(RF_Raid_t *), (RF_Raid_t **));
350 if (raidPtrs == NULL) {
351 panic("raidPtrs is NULL!!");
352 }
353
354 rf_mutex_init(&rf_sparet_wait_mutex);
355
356 rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
357
358 for (i = 0; i < num; i++)
359 raidPtrs[i] = NULL;
360 rc = rf_BootRaidframe();
361 if (rc == 0)
362 printf("Kernelized RAIDframe activated\n");
363 else
364 panic("Serious error booting RAID!!");
365
366 /* put together some datastructures like the CCD device does.. This
367 * lets us lock the device and what-not when it gets opened. */
368
369 raid_softc = (struct raid_softc *)
370 malloc(num * sizeof(struct raid_softc),
371 M_RAIDFRAME, M_NOWAIT);
372 if (raid_softc == NULL) {
373 printf("WARNING: no memory for RAIDframe driver\n");
374 return;
375 }
376
377 memset(raid_softc, 0, num * sizeof(struct raid_softc));
378
379 raidrootdev = (struct device *)malloc(num * sizeof(struct device),
380 M_RAIDFRAME, M_NOWAIT);
381 if (raidrootdev == NULL) {
382 panic("No memory for RAIDframe driver!!?!?!");
383 }
384
385 for (raidID = 0; raidID < num; raidID++) {
386 bufq_alloc(&raid_softc[raidID].buf_queue, "fcfs", 0);
387 pseudo_disk_init(&raid_softc[raidID].sc_dkdev);
388
389 /* XXXJRT Should use config_attach_pseudo() */
390
391 raidrootdev[raidID].dv_class = DV_DISK;
392 raidrootdev[raidID].dv_cfdata = NULL;
393 raidrootdev[raidID].dv_unit = raidID;
394 raidrootdev[raidID].dv_parent = NULL;
395 raidrootdev[raidID].dv_flags = 0;
396 raidrootdev[raidID].dv_cfdriver = &raid_cd;
397 snprintf(raidrootdev[raidID].dv_xname,
398 sizeof(raidrootdev[raidID].dv_xname), "raid%d", raidID);
399 raid_softc[raidID].sc_dkdev.dk_name =
400 raidrootdev[raidID].dv_xname;
401
402 RF_Malloc(raidPtrs[raidID], sizeof(RF_Raid_t),
403 (RF_Raid_t *));
404 if (raidPtrs[raidID] == NULL) {
405 printf("WARNING: raidPtrs[%d] is NULL\n", raidID);
406 numraid = raidID;
407 return;
408 }
409 }
410
411 #ifdef RAID_AUTOCONFIG
412 raidautoconfig = 1;
413 #endif
414
415 /*
416 * Register a finalizer which will be used to auto-config RAID
417 * sets once all real hardware devices have been found.
418 */
419 if (config_finalize_register(NULL, rf_autoconfig) != 0)
420 printf("WARNING: unable to register RAIDframe finalizer\n");
421 }
422
423 int
424 rf_autoconfig(struct device *self)
425 {
426 RF_AutoConfig_t *ac_list;
427 RF_ConfigSet_t *config_sets;
428 int i;
429
430 if (raidautoconfig == 0)
431 return (0);
432
433 /* XXX This code can only be run once. */
434 raidautoconfig = 0;
435
436 /* 1. locate all RAID components on the system */
437 #ifdef DEBUG
438 printf("Searching for RAID components...\n");
439 #endif
440 ac_list = rf_find_raid_components();
441
442 /* 2. Sort them into their respective sets. */
443 config_sets = rf_create_auto_sets(ac_list);
444
445 /*
446 * 3. Evaluate each set andconfigure the valid ones.
447 * This gets done in rf_buildroothack().
448 */
449 rf_buildroothack(config_sets);
450
451 for (i = 0; i < numraid; i++)
452 if (raidPtrs[i] != NULL && raidPtrs[i]->valid)
453 dkwedge_discover(&raid_softc[i].sc_dkdev);
454
455 return 1;
456 }
457
458 void
459 rf_buildroothack(RF_ConfigSet_t *config_sets)
460 {
461 RF_ConfigSet_t *cset;
462 RF_ConfigSet_t *next_cset;
463 int retcode;
464 int raidID;
465 int rootID;
466 int num_root;
467
468 rootID = 0;
469 num_root = 0;
470 cset = config_sets;
471 while(cset != NULL ) {
472 next_cset = cset->next;
473 if (rf_have_enough_components(cset) &&
474 cset->ac->clabel->autoconfigure==1) {
475 retcode = rf_auto_config_set(cset,&raidID);
476 if (!retcode) {
477 #ifdef DEBUG
478 printf("raid%d: configured ok\n", raidID);
479 #endif
480 if (cset->rootable) {
481 rootID = raidID;
482 num_root++;
483 }
484 } else {
485 /* The autoconfig didn't work :( */
486 #if DEBUG
487 printf("Autoconfig failed with code %d for raid%d\n", retcode, raidID);
488 #endif
489 rf_release_all_vps(cset);
490 }
491 } else {
492 #ifdef DEBUG
493 printf("raid%d: not enough components\n", raidID);
494 #endif
495 /* we're not autoconfiguring this set...
496 release the associated resources */
497 rf_release_all_vps(cset);
498 }
499 /* cleanup */
500 rf_cleanup_config_set(cset);
501 cset = next_cset;
502 }
503
504 /* we found something bootable... */
505
506 if (num_root == 1) {
507 booted_device = &raidrootdev[rootID];
508 } else if (num_root > 1) {
509 /* we can't guess.. require the user to answer... */
510 boothowto |= RB_ASKNAME;
511 }
512 }
513
514
515 int
516 raidsize(dev_t dev)
517 {
518 struct raid_softc *rs;
519 struct disklabel *lp;
520 int part, unit, omask, size;
521
522 unit = raidunit(dev);
523 if (unit >= numraid)
524 return (-1);
525 rs = &raid_softc[unit];
526
527 if ((rs->sc_flags & RAIDF_INITED) == 0)
528 return (-1);
529
530 part = DISKPART(dev);
531 omask = rs->sc_dkdev.dk_openmask & (1 << part);
532 lp = rs->sc_dkdev.dk_label;
533
534 if (omask == 0 && raidopen(dev, 0, S_IFBLK, curlwp))
535 return (-1);
536
537 if (lp->d_partitions[part].p_fstype != FS_SWAP)
538 size = -1;
539 else
540 size = lp->d_partitions[part].p_size *
541 (lp->d_secsize / DEV_BSIZE);
542
543 if (omask == 0 && raidclose(dev, 0, S_IFBLK, curlwp))
544 return (-1);
545
546 return (size);
547
548 }
549
550 int
551 raiddump(dev_t dev, daddr_t blkno, caddr_t va, size_t size)
552 {
553 /* Not implemented. */
554 return ENXIO;
555 }
556 /* ARGSUSED */
557 int
558 raidopen(dev_t dev, int flags, int fmt, struct lwp *l)
559 {
560 int unit = raidunit(dev);
561 struct raid_softc *rs;
562 struct disklabel *lp;
563 int part, pmask;
564 int error = 0;
565
566 if (unit >= numraid)
567 return (ENXIO);
568 rs = &raid_softc[unit];
569
570 if ((error = raidlock(rs)) != 0)
571 return (error);
572 lp = rs->sc_dkdev.dk_label;
573
574 part = DISKPART(dev);
575
576 /*
577 * If there are wedges, and this is not RAW_PART, then we
578 * need to fail.
579 */
580 if (rs->sc_dkdev.dk_nwedges != 0 && part != RAW_PART) {
581 error = EBUSY;
582 goto bad;
583 }
584 pmask = (1 << part);
585
586 if ((rs->sc_flags & RAIDF_INITED) &&
587 (rs->sc_dkdev.dk_openmask == 0))
588 raidgetdisklabel(dev);
589
590 /* make sure that this partition exists */
591
592 if (part != RAW_PART) {
593 if (((rs->sc_flags & RAIDF_INITED) == 0) ||
594 ((part >= lp->d_npartitions) ||
595 (lp->d_partitions[part].p_fstype == FS_UNUSED))) {
596 error = ENXIO;
597 goto bad;
598 }
599 }
600 /* Prevent this unit from being unconfigured while open. */
601 switch (fmt) {
602 case S_IFCHR:
603 rs->sc_dkdev.dk_copenmask |= pmask;
604 break;
605
606 case S_IFBLK:
607 rs->sc_dkdev.dk_bopenmask |= pmask;
608 break;
609 }
610
611 if ((rs->sc_dkdev.dk_openmask == 0) &&
612 ((rs->sc_flags & RAIDF_INITED) != 0)) {
613 /* First one... mark things as dirty... Note that we *MUST*
614 have done a configure before this. I DO NOT WANT TO BE
615 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
616 THAT THEY BELONG TOGETHER!!!!! */
617 /* XXX should check to see if we're only open for reading
618 here... If so, we needn't do this, but then need some
619 other way of keeping track of what's happened.. */
620
621 rf_markalldirty( raidPtrs[unit] );
622 }
623
624
625 rs->sc_dkdev.dk_openmask =
626 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
627
628 bad:
629 raidunlock(rs);
630
631 return (error);
632
633
634 }
635 /* ARGSUSED */
636 int
637 raidclose(dev_t dev, int flags, int fmt, struct lwp *l)
638 {
639 int unit = raidunit(dev);
640 struct raid_softc *rs;
641 int error = 0;
642 int part;
643
644 if (unit >= numraid)
645 return (ENXIO);
646 rs = &raid_softc[unit];
647
648 if ((error = raidlock(rs)) != 0)
649 return (error);
650
651 part = DISKPART(dev);
652
653 /* ...that much closer to allowing unconfiguration... */
654 switch (fmt) {
655 case S_IFCHR:
656 rs->sc_dkdev.dk_copenmask &= ~(1 << part);
657 break;
658
659 case S_IFBLK:
660 rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
661 break;
662 }
663 rs->sc_dkdev.dk_openmask =
664 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
665
666 if ((rs->sc_dkdev.dk_openmask == 0) &&
667 ((rs->sc_flags & RAIDF_INITED) != 0)) {
668 /* Last one... device is not unconfigured yet.
669 Device shutdown has taken care of setting the
670 clean bits if RAIDF_INITED is not set
671 mark things as clean... */
672
673 rf_update_component_labels(raidPtrs[unit],
674 RF_FINAL_COMPONENT_UPDATE);
675 if (doing_shutdown) {
676 /* last one, and we're going down, so
677 lights out for this RAID set too. */
678 error = rf_Shutdown(raidPtrs[unit]);
679
680 /* It's no longer initialized... */
681 rs->sc_flags &= ~RAIDF_INITED;
682
683 /* Detach the disk. */
684 pseudo_disk_detach(&rs->sc_dkdev);
685 }
686 }
687
688 raidunlock(rs);
689 return (0);
690
691 }
692
693 void
694 raidstrategy(struct buf *bp)
695 {
696 int s;
697
698 unsigned int raidID = raidunit(bp->b_dev);
699 RF_Raid_t *raidPtr;
700 struct raid_softc *rs = &raid_softc[raidID];
701 int wlabel;
702
703 if ((rs->sc_flags & RAIDF_INITED) ==0) {
704 bp->b_error = ENXIO;
705 bp->b_flags |= B_ERROR;
706 goto done;
707 }
708 if (raidID >= numraid || !raidPtrs[raidID]) {
709 bp->b_error = ENODEV;
710 bp->b_flags |= B_ERROR;
711 goto done;
712 }
713 raidPtr = raidPtrs[raidID];
714 if (!raidPtr->valid) {
715 bp->b_error = ENODEV;
716 bp->b_flags |= B_ERROR;
717 goto done;
718 }
719 if (bp->b_bcount == 0) {
720 db1_printf(("b_bcount is zero..\n"));
721 goto done;
722 }
723
724 /*
725 * Do bounds checking and adjust transfer. If there's an
726 * error, the bounds check will flag that for us.
727 */
728
729 wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
730 if (DISKPART(bp->b_dev) == RAW_PART) {
731 uint64_t size; /* device size in DEV_BSIZE unit */
732
733 if (raidPtr->logBytesPerSector > DEV_BSHIFT) {
734 size = raidPtr->totalSectors <<
735 (raidPtr->logBytesPerSector - DEV_BSHIFT);
736 } else {
737 size = raidPtr->totalSectors >>
738 (DEV_BSHIFT - raidPtr->logBytesPerSector);
739 }
740 if (bounds_check_with_mediasize(bp, DEV_BSIZE, size) <= 0) {
741 goto done;
742 }
743 } else {
744 if (bounds_check_with_label(&rs->sc_dkdev, bp, wlabel) <= 0) {
745 db1_printf(("Bounds check failed!!:%d %d\n",
746 (int) bp->b_blkno, (int) wlabel));
747 goto done;
748 }
749 }
750 s = splbio();
751
752 bp->b_resid = 0;
753
754 /* stuff it onto our queue */
755 BUFQ_PUT(rs->buf_queue, bp);
756
757 /* scheduled the IO to happen at the next convenient time */
758 wakeup(&(raidPtrs[raidID]->iodone));
759
760 splx(s);
761 return;
762
763 done:
764 bp->b_resid = bp->b_bcount;
765 biodone(bp);
766 }
767 /* ARGSUSED */
768 int
769 raidread(dev_t dev, struct uio *uio, int flags)
770 {
771 int unit = raidunit(dev);
772 struct raid_softc *rs;
773
774 if (unit >= numraid)
775 return (ENXIO);
776 rs = &raid_softc[unit];
777
778 if ((rs->sc_flags & RAIDF_INITED) == 0)
779 return (ENXIO);
780
781 return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
782
783 }
784 /* ARGSUSED */
785 int
786 raidwrite(dev_t dev, struct uio *uio, int flags)
787 {
788 int unit = raidunit(dev);
789 struct raid_softc *rs;
790
791 if (unit >= numraid)
792 return (ENXIO);
793 rs = &raid_softc[unit];
794
795 if ((rs->sc_flags & RAIDF_INITED) == 0)
796 return (ENXIO);
797
798 return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
799
800 }
801
802 int
803 raidioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct lwp *l)
804 {
805 int unit = raidunit(dev);
806 int error = 0;
807 int part, pmask;
808 struct raid_softc *rs;
809 RF_Config_t *k_cfg, *u_cfg;
810 RF_Raid_t *raidPtr;
811 RF_RaidDisk_t *diskPtr;
812 RF_AccTotals_t *totals;
813 RF_DeviceConfig_t *d_cfg, **ucfgp;
814 u_char *specific_buf;
815 int retcode = 0;
816 int column;
817 int raidid;
818 struct rf_recon_req *rrcopy, *rr;
819 RF_ComponentLabel_t *clabel;
820 RF_ComponentLabel_t *ci_label;
821 RF_ComponentLabel_t **clabel_ptr;
822 RF_SingleComponent_t *sparePtr,*componentPtr;
823 RF_SingleComponent_t component;
824 RF_ProgressInfo_t progressInfo, **progressInfoPtr;
825 int i, j, d;
826 #ifdef __HAVE_OLD_DISKLABEL
827 struct disklabel newlabel;
828 #endif
829 struct dkwedge_info *dkw;
830
831 if (unit >= numraid)
832 return (ENXIO);
833 rs = &raid_softc[unit];
834 raidPtr = raidPtrs[unit];
835
836 db1_printf(("raidioctl: %d %d %d %d\n", (int) dev,
837 (int) DISKPART(dev), (int) unit, (int) cmd));
838
839 /* Must be open for writes for these commands... */
840 switch (cmd) {
841 #ifdef DIOCGSECTORSIZE
842 case DIOCGSECTORSIZE:
843 *(u_int *)data = raidPtr->bytesPerSector;
844 return 0;
845 case DIOCGMEDIASIZE:
846 *(off_t *)data =
847 (off_t)raidPtr->totalSectors * raidPtr->bytesPerSector;
848 return 0;
849 #endif
850 case DIOCSDINFO:
851 case DIOCWDINFO:
852 #ifdef __HAVE_OLD_DISKLABEL
853 case ODIOCWDINFO:
854 case ODIOCSDINFO:
855 #endif
856 case DIOCWLABEL:
857 case DIOCAWEDGE:
858 case DIOCDWEDGE:
859 if ((flag & FWRITE) == 0)
860 return (EBADF);
861 }
862
863 /* Must be initialized for these... */
864 switch (cmd) {
865 case DIOCGDINFO:
866 case DIOCSDINFO:
867 case DIOCWDINFO:
868 #ifdef __HAVE_OLD_DISKLABEL
869 case ODIOCGDINFO:
870 case ODIOCWDINFO:
871 case ODIOCSDINFO:
872 case ODIOCGDEFLABEL:
873 #endif
874 case DIOCGPART:
875 case DIOCWLABEL:
876 case DIOCGDEFLABEL:
877 case DIOCAWEDGE:
878 case DIOCDWEDGE:
879 case DIOCLWEDGES:
880 case RAIDFRAME_SHUTDOWN:
881 case RAIDFRAME_REWRITEPARITY:
882 case RAIDFRAME_GET_INFO:
883 case RAIDFRAME_RESET_ACCTOTALS:
884 case RAIDFRAME_GET_ACCTOTALS:
885 case RAIDFRAME_KEEP_ACCTOTALS:
886 case RAIDFRAME_GET_SIZE:
887 case RAIDFRAME_FAIL_DISK:
888 case RAIDFRAME_COPYBACK:
889 case RAIDFRAME_CHECK_RECON_STATUS:
890 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
891 case RAIDFRAME_GET_COMPONENT_LABEL:
892 case RAIDFRAME_SET_COMPONENT_LABEL:
893 case RAIDFRAME_ADD_HOT_SPARE:
894 case RAIDFRAME_REMOVE_HOT_SPARE:
895 case RAIDFRAME_INIT_LABELS:
896 case RAIDFRAME_REBUILD_IN_PLACE:
897 case RAIDFRAME_CHECK_PARITY:
898 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
899 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
900 case RAIDFRAME_CHECK_COPYBACK_STATUS:
901 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
902 case RAIDFRAME_SET_AUTOCONFIG:
903 case RAIDFRAME_SET_ROOT:
904 case RAIDFRAME_DELETE_COMPONENT:
905 case RAIDFRAME_INCORPORATE_HOT_SPARE:
906 if ((rs->sc_flags & RAIDF_INITED) == 0)
907 return (ENXIO);
908 }
909
910 switch (cmd) {
911
912 /* configure the system */
913 case RAIDFRAME_CONFIGURE:
914
915 if (raidPtr->valid) {
916 /* There is a valid RAID set running on this unit! */
917 printf("raid%d: Device already configured!\n",unit);
918 return(EINVAL);
919 }
920
921 /* copy-in the configuration information */
922 /* data points to a pointer to the configuration structure */
923
924 u_cfg = *((RF_Config_t **) data);
925 RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
926 if (k_cfg == NULL) {
927 return (ENOMEM);
928 }
929 retcode = copyin(u_cfg, k_cfg, sizeof(RF_Config_t));
930 if (retcode) {
931 RF_Free(k_cfg, sizeof(RF_Config_t));
932 db1_printf(("rf_ioctl: retcode=%d copyin.1\n",
933 retcode));
934 return (retcode);
935 }
936 /* allocate a buffer for the layout-specific data, and copy it
937 * in */
938 if (k_cfg->layoutSpecificSize) {
939 if (k_cfg->layoutSpecificSize > 10000) {
940 /* sanity check */
941 RF_Free(k_cfg, sizeof(RF_Config_t));
942 return (EINVAL);
943 }
944 RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
945 (u_char *));
946 if (specific_buf == NULL) {
947 RF_Free(k_cfg, sizeof(RF_Config_t));
948 return (ENOMEM);
949 }
950 retcode = copyin(k_cfg->layoutSpecific, specific_buf,
951 k_cfg->layoutSpecificSize);
952 if (retcode) {
953 RF_Free(k_cfg, sizeof(RF_Config_t));
954 RF_Free(specific_buf,
955 k_cfg->layoutSpecificSize);
956 db1_printf(("rf_ioctl: retcode=%d copyin.2\n",
957 retcode));
958 return (retcode);
959 }
960 } else
961 specific_buf = NULL;
962 k_cfg->layoutSpecific = specific_buf;
963
964 /* should do some kind of sanity check on the configuration.
965 * Store the sum of all the bytes in the last byte? */
966
967 /* configure the system */
968
969 /*
970 * Clear the entire RAID descriptor, just to make sure
971 * there is no stale data left in the case of a
972 * reconfiguration
973 */
974 memset((char *) raidPtr, 0, sizeof(RF_Raid_t));
975 raidPtr->raidid = unit;
976
977 retcode = rf_Configure(raidPtr, k_cfg, NULL);
978
979 if (retcode == 0) {
980
981 /* allow this many simultaneous IO's to
982 this RAID device */
983 raidPtr->openings = RAIDOUTSTANDING;
984
985 raidinit(raidPtr);
986 rf_markalldirty(raidPtr);
987 }
988 /* free the buffers. No return code here. */
989 if (k_cfg->layoutSpecificSize) {
990 RF_Free(specific_buf, k_cfg->layoutSpecificSize);
991 }
992 RF_Free(k_cfg, sizeof(RF_Config_t));
993
994 return (retcode);
995
996 /* shutdown the system */
997 case RAIDFRAME_SHUTDOWN:
998
999 if ((error = raidlock(rs)) != 0)
1000 return (error);
1001
1002 /*
1003 * If somebody has a partition mounted, we shouldn't
1004 * shutdown.
1005 */
1006
1007 part = DISKPART(dev);
1008 pmask = (1 << part);
1009 if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
1010 ((rs->sc_dkdev.dk_bopenmask & pmask) &&
1011 (rs->sc_dkdev.dk_copenmask & pmask))) {
1012 raidunlock(rs);
1013 return (EBUSY);
1014 }
1015
1016 retcode = rf_Shutdown(raidPtr);
1017
1018 /* It's no longer initialized... */
1019 rs->sc_flags &= ~RAIDF_INITED;
1020
1021 /* Detach the disk. */
1022 pseudo_disk_detach(&rs->sc_dkdev);
1023
1024 raidunlock(rs);
1025
1026 return (retcode);
1027 case RAIDFRAME_GET_COMPONENT_LABEL:
1028 clabel_ptr = (RF_ComponentLabel_t **) data;
1029 /* need to read the component label for the disk indicated
1030 by row,column in clabel */
1031
1032 /* For practice, let's get it directly fromdisk, rather
1033 than from the in-core copy */
1034 RF_Malloc( clabel, sizeof( RF_ComponentLabel_t ),
1035 (RF_ComponentLabel_t *));
1036 if (clabel == NULL)
1037 return (ENOMEM);
1038
1039 retcode = copyin( *clabel_ptr, clabel,
1040 sizeof(RF_ComponentLabel_t));
1041
1042 if (retcode) {
1043 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
1044 return(retcode);
1045 }
1046
1047 clabel->row = 0; /* Don't allow looking at anything else.*/
1048
1049 column = clabel->column;
1050
1051 if ((column < 0) || (column >= raidPtr->numCol +
1052 raidPtr->numSpare)) {
1053 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
1054 return(EINVAL);
1055 }
1056
1057 retcode = raidread_component_label(raidPtr->Disks[column].dev,
1058 raidPtr->raid_cinfo[column].ci_vp,
1059 clabel );
1060
1061 if (retcode == 0) {
1062 retcode = copyout(clabel, *clabel_ptr,
1063 sizeof(RF_ComponentLabel_t));
1064 }
1065 RF_Free(clabel, sizeof(RF_ComponentLabel_t));
1066 return (retcode);
1067
1068 case RAIDFRAME_SET_COMPONENT_LABEL:
1069 clabel = (RF_ComponentLabel_t *) data;
1070
1071 /* XXX check the label for valid stuff... */
1072 /* Note that some things *should not* get modified --
1073 the user should be re-initing the labels instead of
1074 trying to patch things.
1075 */
1076
1077 raidid = raidPtr->raidid;
1078 #if DEBUG
1079 printf("raid%d: Got component label:\n", raidid);
1080 printf("raid%d: Version: %d\n", raidid, clabel->version);
1081 printf("raid%d: Serial Number: %d\n", raidid, clabel->serial_number);
1082 printf("raid%d: Mod counter: %d\n", raidid, clabel->mod_counter);
1083 printf("raid%d: Column: %d\n", raidid, clabel->column);
1084 printf("raid%d: Num Columns: %d\n", raidid, clabel->num_columns);
1085 printf("raid%d: Clean: %d\n", raidid, clabel->clean);
1086 printf("raid%d: Status: %d\n", raidid, clabel->status);
1087 #endif
1088 clabel->row = 0;
1089 column = clabel->column;
1090
1091 if ((column < 0) || (column >= raidPtr->numCol)) {
1092 return(EINVAL);
1093 }
1094
1095 /* XXX this isn't allowed to do anything for now :-) */
1096
1097 /* XXX and before it is, we need to fill in the rest
1098 of the fields!?!?!?! */
1099 #if 0
1100 raidwrite_component_label(
1101 raidPtr->Disks[column].dev,
1102 raidPtr->raid_cinfo[column].ci_vp,
1103 clabel );
1104 #endif
1105 return (0);
1106
1107 case RAIDFRAME_INIT_LABELS:
1108 clabel = (RF_ComponentLabel_t *) data;
1109 /*
1110 we only want the serial number from
1111 the above. We get all the rest of the information
1112 from the config that was used to create this RAID
1113 set.
1114 */
1115
1116 raidPtr->serial_number = clabel->serial_number;
1117
1118 RF_Malloc(ci_label, sizeof(RF_ComponentLabel_t),
1119 (RF_ComponentLabel_t *));
1120 if (ci_label == NULL)
1121 return (ENOMEM);
1122
1123 raid_init_component_label(raidPtr, ci_label);
1124 ci_label->serial_number = clabel->serial_number;
1125 ci_label->row = 0; /* we dont' pretend to support more */
1126
1127 for(column=0;column<raidPtr->numCol;column++) {
1128 diskPtr = &raidPtr->Disks[column];
1129 if (!RF_DEAD_DISK(diskPtr->status)) {
1130 ci_label->partitionSize = diskPtr->partitionSize;
1131 ci_label->column = column;
1132 raidwrite_component_label(
1133 raidPtr->Disks[column].dev,
1134 raidPtr->raid_cinfo[column].ci_vp,
1135 ci_label );
1136 }
1137 }
1138 RF_Free(ci_label, sizeof(RF_ComponentLabel_t));
1139
1140 return (retcode);
1141 case RAIDFRAME_SET_AUTOCONFIG:
1142 d = rf_set_autoconfig(raidPtr, *(int *) data);
1143 printf("raid%d: New autoconfig value is: %d\n",
1144 raidPtr->raidid, d);
1145 *(int *) data = d;
1146 return (retcode);
1147
1148 case RAIDFRAME_SET_ROOT:
1149 d = rf_set_rootpartition(raidPtr, *(int *) data);
1150 printf("raid%d: New rootpartition value is: %d\n",
1151 raidPtr->raidid, d);
1152 *(int *) data = d;
1153 return (retcode);
1154
1155 /* initialize all parity */
1156 case RAIDFRAME_REWRITEPARITY:
1157
1158 if (raidPtr->Layout.map->faultsTolerated == 0) {
1159 /* Parity for RAID 0 is trivially correct */
1160 raidPtr->parity_good = RF_RAID_CLEAN;
1161 return(0);
1162 }
1163
1164 if (raidPtr->parity_rewrite_in_progress == 1) {
1165 /* Re-write is already in progress! */
1166 return(EINVAL);
1167 }
1168
1169 retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
1170 rf_RewriteParityThread,
1171 raidPtr,"raid_parity");
1172 return (retcode);
1173
1174
1175 case RAIDFRAME_ADD_HOT_SPARE:
1176 sparePtr = (RF_SingleComponent_t *) data;
1177 memcpy( &component, sparePtr, sizeof(RF_SingleComponent_t));
1178 retcode = rf_add_hot_spare(raidPtr, &component);
1179 return(retcode);
1180
1181 case RAIDFRAME_REMOVE_HOT_SPARE:
1182 return(retcode);
1183
1184 case RAIDFRAME_DELETE_COMPONENT:
1185 componentPtr = (RF_SingleComponent_t *)data;
1186 memcpy( &component, componentPtr,
1187 sizeof(RF_SingleComponent_t));
1188 retcode = rf_delete_component(raidPtr, &component);
1189 return(retcode);
1190
1191 case RAIDFRAME_INCORPORATE_HOT_SPARE:
1192 componentPtr = (RF_SingleComponent_t *)data;
1193 memcpy( &component, componentPtr,
1194 sizeof(RF_SingleComponent_t));
1195 retcode = rf_incorporate_hot_spare(raidPtr, &component);
1196 return(retcode);
1197
1198 case RAIDFRAME_REBUILD_IN_PLACE:
1199
1200 if (raidPtr->Layout.map->faultsTolerated == 0) {
1201 /* Can't do this on a RAID 0!! */
1202 return(EINVAL);
1203 }
1204
1205 if (raidPtr->recon_in_progress == 1) {
1206 /* a reconstruct is already in progress! */
1207 return(EINVAL);
1208 }
1209
1210 componentPtr = (RF_SingleComponent_t *) data;
1211 memcpy( &component, componentPtr,
1212 sizeof(RF_SingleComponent_t));
1213 component.row = 0; /* we don't support any more */
1214 column = component.column;
1215
1216 if ((column < 0) || (column >= raidPtr->numCol)) {
1217 return(EINVAL);
1218 }
1219
1220 RF_LOCK_MUTEX(raidPtr->mutex);
1221 if ((raidPtr->Disks[column].status == rf_ds_optimal) &&
1222 (raidPtr->numFailures > 0)) {
1223 /* XXX 0 above shouldn't be constant!!! */
1224 /* some component other than this has failed.
1225 Let's not make things worse than they already
1226 are... */
1227 printf("raid%d: Unable to reconstruct to disk at:\n",
1228 raidPtr->raidid);
1229 printf("raid%d: Col: %d Too many failures.\n",
1230 raidPtr->raidid, column);
1231 RF_UNLOCK_MUTEX(raidPtr->mutex);
1232 return (EINVAL);
1233 }
1234 if (raidPtr->Disks[column].status ==
1235 rf_ds_reconstructing) {
1236 printf("raid%d: Unable to reconstruct to disk at:\n",
1237 raidPtr->raidid);
1238 printf("raid%d: Col: %d Reconstruction already occuring!\n", raidPtr->raidid, column);
1239
1240 RF_UNLOCK_MUTEX(raidPtr->mutex);
1241 return (EINVAL);
1242 }
1243 if (raidPtr->Disks[column].status == rf_ds_spared) {
1244 RF_UNLOCK_MUTEX(raidPtr->mutex);
1245 return (EINVAL);
1246 }
1247 RF_UNLOCK_MUTEX(raidPtr->mutex);
1248
1249 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1250 if (rrcopy == NULL)
1251 return(ENOMEM);
1252
1253 rrcopy->raidPtr = (void *) raidPtr;
1254 rrcopy->col = column;
1255
1256 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1257 rf_ReconstructInPlaceThread,
1258 rrcopy,"raid_reconip");
1259 return(retcode);
1260
1261 case RAIDFRAME_GET_INFO:
1262 if (!raidPtr->valid)
1263 return (ENODEV);
1264 ucfgp = (RF_DeviceConfig_t **) data;
1265 RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t),
1266 (RF_DeviceConfig_t *));
1267 if (d_cfg == NULL)
1268 return (ENOMEM);
1269 d_cfg->rows = 1; /* there is only 1 row now */
1270 d_cfg->cols = raidPtr->numCol;
1271 d_cfg->ndevs = raidPtr->numCol;
1272 if (d_cfg->ndevs >= RF_MAX_DISKS) {
1273 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1274 return (ENOMEM);
1275 }
1276 d_cfg->nspares = raidPtr->numSpare;
1277 if (d_cfg->nspares >= RF_MAX_DISKS) {
1278 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1279 return (ENOMEM);
1280 }
1281 d_cfg->maxqdepth = raidPtr->maxQueueDepth;
1282 d = 0;
1283 for (j = 0; j < d_cfg->cols; j++) {
1284 d_cfg->devs[d] = raidPtr->Disks[j];
1285 d++;
1286 }
1287 for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) {
1288 d_cfg->spares[i] = raidPtr->Disks[j];
1289 }
1290 retcode = copyout(d_cfg, *ucfgp, sizeof(RF_DeviceConfig_t));
1291 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1292
1293 return (retcode);
1294
1295 case RAIDFRAME_CHECK_PARITY:
1296 *(int *) data = raidPtr->parity_good;
1297 return (0);
1298
1299 case RAIDFRAME_RESET_ACCTOTALS:
1300 memset(&raidPtr->acc_totals, 0, sizeof(raidPtr->acc_totals));
1301 return (0);
1302
1303 case RAIDFRAME_GET_ACCTOTALS:
1304 totals = (RF_AccTotals_t *) data;
1305 *totals = raidPtr->acc_totals;
1306 return (0);
1307
1308 case RAIDFRAME_KEEP_ACCTOTALS:
1309 raidPtr->keep_acc_totals = *(int *)data;
1310 return (0);
1311
1312 case RAIDFRAME_GET_SIZE:
1313 *(int *) data = raidPtr->totalSectors;
1314 return (0);
1315
1316 /* fail a disk & optionally start reconstruction */
1317 case RAIDFRAME_FAIL_DISK:
1318
1319 if (raidPtr->Layout.map->faultsTolerated == 0) {
1320 /* Can't do this on a RAID 0!! */
1321 return(EINVAL);
1322 }
1323
1324 rr = (struct rf_recon_req *) data;
1325 rr->row = 0;
1326 if (rr->col < 0 || rr->col >= raidPtr->numCol)
1327 return (EINVAL);
1328
1329
1330 RF_LOCK_MUTEX(raidPtr->mutex);
1331 if (raidPtr->status == rf_rs_reconstructing) {
1332 /* you can't fail a disk while we're reconstructing! */
1333 /* XXX wrong for RAID6 */
1334 RF_UNLOCK_MUTEX(raidPtr->mutex);
1335 return (EINVAL);
1336 }
1337 if ((raidPtr->Disks[rr->col].status ==
1338 rf_ds_optimal) && (raidPtr->numFailures > 0)) {
1339 /* some other component has failed. Let's not make
1340 things worse. XXX wrong for RAID6 */
1341 RF_UNLOCK_MUTEX(raidPtr->mutex);
1342 return (EINVAL);
1343 }
1344 if (raidPtr->Disks[rr->col].status == rf_ds_spared) {
1345 /* Can't fail a spared disk! */
1346 RF_UNLOCK_MUTEX(raidPtr->mutex);
1347 return (EINVAL);
1348 }
1349 RF_UNLOCK_MUTEX(raidPtr->mutex);
1350
1351 /* make a copy of the recon request so that we don't rely on
1352 * the user's buffer */
1353 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1354 if (rrcopy == NULL)
1355 return(ENOMEM);
1356 memcpy(rrcopy, rr, sizeof(*rr));
1357 rrcopy->raidPtr = (void *) raidPtr;
1358
1359 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1360 rf_ReconThread,
1361 rrcopy,"raid_recon");
1362 return (0);
1363
1364 /* invoke a copyback operation after recon on whatever disk
1365 * needs it, if any */
1366 case RAIDFRAME_COPYBACK:
1367
1368 if (raidPtr->Layout.map->faultsTolerated == 0) {
1369 /* This makes no sense on a RAID 0!! */
1370 return(EINVAL);
1371 }
1372
1373 if (raidPtr->copyback_in_progress == 1) {
1374 /* Copyback is already in progress! */
1375 return(EINVAL);
1376 }
1377
1378 retcode = RF_CREATE_THREAD(raidPtr->copyback_thread,
1379 rf_CopybackThread,
1380 raidPtr,"raid_copyback");
1381 return (retcode);
1382
1383 /* return the percentage completion of reconstruction */
1384 case RAIDFRAME_CHECK_RECON_STATUS:
1385 if (raidPtr->Layout.map->faultsTolerated == 0) {
1386 /* This makes no sense on a RAID 0, so tell the
1387 user it's done. */
1388 *(int *) data = 100;
1389 return(0);
1390 }
1391 if (raidPtr->status != rf_rs_reconstructing)
1392 *(int *) data = 100;
1393 else {
1394 if (raidPtr->reconControl->numRUsTotal > 0) {
1395 *(int *) data = (raidPtr->reconControl->numRUsComplete * 100 / raidPtr->reconControl->numRUsTotal);
1396 } else {
1397 *(int *) data = 0;
1398 }
1399 }
1400 return (0);
1401 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
1402 progressInfoPtr = (RF_ProgressInfo_t **) data;
1403 if (raidPtr->status != rf_rs_reconstructing) {
1404 progressInfo.remaining = 0;
1405 progressInfo.completed = 100;
1406 progressInfo.total = 100;
1407 } else {
1408 progressInfo.total =
1409 raidPtr->reconControl->numRUsTotal;
1410 progressInfo.completed =
1411 raidPtr->reconControl->numRUsComplete;
1412 progressInfo.remaining = progressInfo.total -
1413 progressInfo.completed;
1414 }
1415 retcode = copyout(&progressInfo, *progressInfoPtr,
1416 sizeof(RF_ProgressInfo_t));
1417 return (retcode);
1418
1419 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1420 if (raidPtr->Layout.map->faultsTolerated == 0) {
1421 /* This makes no sense on a RAID 0, so tell the
1422 user it's done. */
1423 *(int *) data = 100;
1424 return(0);
1425 }
1426 if (raidPtr->parity_rewrite_in_progress == 1) {
1427 *(int *) data = 100 *
1428 raidPtr->parity_rewrite_stripes_done /
1429 raidPtr->Layout.numStripe;
1430 } else {
1431 *(int *) data = 100;
1432 }
1433 return (0);
1434
1435 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
1436 progressInfoPtr = (RF_ProgressInfo_t **) data;
1437 if (raidPtr->parity_rewrite_in_progress == 1) {
1438 progressInfo.total = raidPtr->Layout.numStripe;
1439 progressInfo.completed =
1440 raidPtr->parity_rewrite_stripes_done;
1441 progressInfo.remaining = progressInfo.total -
1442 progressInfo.completed;
1443 } else {
1444 progressInfo.remaining = 0;
1445 progressInfo.completed = 100;
1446 progressInfo.total = 100;
1447 }
1448 retcode = copyout(&progressInfo, *progressInfoPtr,
1449 sizeof(RF_ProgressInfo_t));
1450 return (retcode);
1451
1452 case RAIDFRAME_CHECK_COPYBACK_STATUS:
1453 if (raidPtr->Layout.map->faultsTolerated == 0) {
1454 /* This makes no sense on a RAID 0 */
1455 *(int *) data = 100;
1456 return(0);
1457 }
1458 if (raidPtr->copyback_in_progress == 1) {
1459 *(int *) data = 100 * raidPtr->copyback_stripes_done /
1460 raidPtr->Layout.numStripe;
1461 } else {
1462 *(int *) data = 100;
1463 }
1464 return (0);
1465
1466 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
1467 progressInfoPtr = (RF_ProgressInfo_t **) data;
1468 if (raidPtr->copyback_in_progress == 1) {
1469 progressInfo.total = raidPtr->Layout.numStripe;
1470 progressInfo.completed =
1471 raidPtr->copyback_stripes_done;
1472 progressInfo.remaining = progressInfo.total -
1473 progressInfo.completed;
1474 } else {
1475 progressInfo.remaining = 0;
1476 progressInfo.completed = 100;
1477 progressInfo.total = 100;
1478 }
1479 retcode = copyout(&progressInfo, *progressInfoPtr,
1480 sizeof(RF_ProgressInfo_t));
1481 return (retcode);
1482
1483 /* the sparetable daemon calls this to wait for the kernel to
1484 * need a spare table. this ioctl does not return until a
1485 * spare table is needed. XXX -- calling mpsleep here in the
1486 * ioctl code is almost certainly wrong and evil. -- XXX XXX
1487 * -- I should either compute the spare table in the kernel,
1488 * or have a different -- XXX XXX -- interface (a different
1489 * character device) for delivering the table -- XXX */
1490 #if 0
1491 case RAIDFRAME_SPARET_WAIT:
1492 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1493 while (!rf_sparet_wait_queue)
1494 mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE);
1495 waitreq = rf_sparet_wait_queue;
1496 rf_sparet_wait_queue = rf_sparet_wait_queue->next;
1497 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1498
1499 /* structure assignment */
1500 *((RF_SparetWait_t *) data) = *waitreq;
1501
1502 RF_Free(waitreq, sizeof(*waitreq));
1503 return (0);
1504
1505 /* wakes up a process waiting on SPARET_WAIT and puts an error
1506 * code in it that will cause the dameon to exit */
1507 case RAIDFRAME_ABORT_SPARET_WAIT:
1508 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1509 waitreq->fcol = -1;
1510 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1511 waitreq->next = rf_sparet_wait_queue;
1512 rf_sparet_wait_queue = waitreq;
1513 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1514 wakeup(&rf_sparet_wait_queue);
1515 return (0);
1516
1517 /* used by the spare table daemon to deliver a spare table
1518 * into the kernel */
1519 case RAIDFRAME_SEND_SPARET:
1520
1521 /* install the spare table */
1522 retcode = rf_SetSpareTable(raidPtr, *(void **) data);
1523
1524 /* respond to the requestor. the return status of the spare
1525 * table installation is passed in the "fcol" field */
1526 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1527 waitreq->fcol = retcode;
1528 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1529 waitreq->next = rf_sparet_resp_queue;
1530 rf_sparet_resp_queue = waitreq;
1531 wakeup(&rf_sparet_resp_queue);
1532 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1533
1534 return (retcode);
1535 #endif
1536
1537 default:
1538 break; /* fall through to the os-specific code below */
1539
1540 }
1541
1542 if (!raidPtr->valid)
1543 return (EINVAL);
1544
1545 /*
1546 * Add support for "regular" device ioctls here.
1547 */
1548
1549 switch (cmd) {
1550 case DIOCGDINFO:
1551 *(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
1552 break;
1553 #ifdef __HAVE_OLD_DISKLABEL
1554 case ODIOCGDINFO:
1555 newlabel = *(rs->sc_dkdev.dk_label);
1556 if (newlabel.d_npartitions > OLDMAXPARTITIONS)
1557 return ENOTTY;
1558 memcpy(data, &newlabel, sizeof (struct olddisklabel));
1559 break;
1560 #endif
1561
1562 case DIOCGPART:
1563 ((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
1564 ((struct partinfo *) data)->part =
1565 &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
1566 break;
1567
1568 case DIOCWDINFO:
1569 case DIOCSDINFO:
1570 #ifdef __HAVE_OLD_DISKLABEL
1571 case ODIOCWDINFO:
1572 case ODIOCSDINFO:
1573 #endif
1574 {
1575 struct disklabel *lp;
1576 #ifdef __HAVE_OLD_DISKLABEL
1577 if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) {
1578 memset(&newlabel, 0, sizeof newlabel);
1579 memcpy(&newlabel, data, sizeof (struct olddisklabel));
1580 lp = &newlabel;
1581 } else
1582 #endif
1583 lp = (struct disklabel *)data;
1584
1585 if ((error = raidlock(rs)) != 0)
1586 return (error);
1587
1588 rs->sc_flags |= RAIDF_LABELLING;
1589
1590 error = setdisklabel(rs->sc_dkdev.dk_label,
1591 lp, 0, rs->sc_dkdev.dk_cpulabel);
1592 if (error == 0) {
1593 if (cmd == DIOCWDINFO
1594 #ifdef __HAVE_OLD_DISKLABEL
1595 || cmd == ODIOCWDINFO
1596 #endif
1597 )
1598 error = writedisklabel(RAIDLABELDEV(dev),
1599 raidstrategy, rs->sc_dkdev.dk_label,
1600 rs->sc_dkdev.dk_cpulabel);
1601 }
1602 rs->sc_flags &= ~RAIDF_LABELLING;
1603
1604 raidunlock(rs);
1605
1606 if (error)
1607 return (error);
1608 break;
1609 }
1610
1611 case DIOCWLABEL:
1612 if (*(int *) data != 0)
1613 rs->sc_flags |= RAIDF_WLABEL;
1614 else
1615 rs->sc_flags &= ~RAIDF_WLABEL;
1616 break;
1617
1618 case DIOCGDEFLABEL:
1619 raidgetdefaultlabel(raidPtr, rs, (struct disklabel *) data);
1620 break;
1621
1622 #ifdef __HAVE_OLD_DISKLABEL
1623 case ODIOCGDEFLABEL:
1624 raidgetdefaultlabel(raidPtr, rs, &newlabel);
1625 if (newlabel.d_npartitions > OLDMAXPARTITIONS)
1626 return ENOTTY;
1627 memcpy(data, &newlabel, sizeof (struct olddisklabel));
1628 break;
1629 #endif
1630
1631 case DIOCAWEDGE:
1632 case DIOCDWEDGE:
1633 dkw = (void *)data;
1634
1635 /* If the ioctl happens here, the parent is us. */
1636 (void)strcpy(dkw->dkw_parent, rs->sc_xname);
1637 return cmd == DIOCAWEDGE ? dkwedge_add(dkw) : dkwedge_del(dkw);
1638
1639 case DIOCLWEDGES:
1640 return dkwedge_list(&rs->sc_dkdev,
1641 (struct dkwedge_list *)data, l);
1642
1643 default:
1644 retcode = ENOTTY;
1645 }
1646 return (retcode);
1647
1648 }
1649
1650
1651 /* raidinit -- complete the rest of the initialization for the
1652 RAIDframe device. */
1653
1654
1655 static void
1656 raidinit(RF_Raid_t *raidPtr)
1657 {
1658 struct raid_softc *rs;
1659 int unit;
1660
1661 unit = raidPtr->raidid;
1662
1663 rs = &raid_softc[unit];
1664
1665 /* XXX should check return code first... */
1666 rs->sc_flags |= RAIDF_INITED;
1667
1668 /* XXX doesn't check bounds. */
1669 snprintf(rs->sc_xname, sizeof(rs->sc_xname), "raid%d", unit);
1670
1671 rs->sc_dkdev.dk_name = rs->sc_xname;
1672
1673 /* disk_attach actually creates space for the CPU disklabel, among
1674 * other things, so it's critical to call this *BEFORE* we try putzing
1675 * with disklabels. */
1676
1677 pseudo_disk_attach(&rs->sc_dkdev);
1678
1679 /* XXX There may be a weird interaction here between this, and
1680 * protectedSectors, as used in RAIDframe. */
1681
1682 rs->sc_size = raidPtr->totalSectors;
1683 }
1684 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
1685 /* wake up the daemon & tell it to get us a spare table
1686 * XXX
1687 * the entries in the queues should be tagged with the raidPtr
1688 * so that in the extremely rare case that two recons happen at once,
1689 * we know for which device were requesting a spare table
1690 * XXX
1691 *
1692 * XXX This code is not currently used. GO
1693 */
1694 int
1695 rf_GetSpareTableFromDaemon(RF_SparetWait_t *req)
1696 {
1697 int retcode;
1698
1699 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1700 req->next = rf_sparet_wait_queue;
1701 rf_sparet_wait_queue = req;
1702 wakeup(&rf_sparet_wait_queue);
1703
1704 /* mpsleep unlocks the mutex */
1705 while (!rf_sparet_resp_queue) {
1706 tsleep(&rf_sparet_resp_queue, PRIBIO,
1707 "raidframe getsparetable", 0);
1708 }
1709 req = rf_sparet_resp_queue;
1710 rf_sparet_resp_queue = req->next;
1711 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1712
1713 retcode = req->fcol;
1714 RF_Free(req, sizeof(*req)); /* this is not the same req as we
1715 * alloc'd */
1716 return (retcode);
1717 }
1718 #endif
1719
1720 /* a wrapper around rf_DoAccess that extracts appropriate info from the
1721 * bp & passes it down.
1722 * any calls originating in the kernel must use non-blocking I/O
1723 * do some extra sanity checking to return "appropriate" error values for
1724 * certain conditions (to make some standard utilities work)
1725 *
1726 * Formerly known as: rf_DoAccessKernel
1727 */
1728 void
1729 raidstart(RF_Raid_t *raidPtr)
1730 {
1731 RF_SectorCount_t num_blocks, pb, sum;
1732 RF_RaidAddr_t raid_addr;
1733 struct partition *pp;
1734 daddr_t blocknum;
1735 int unit;
1736 struct raid_softc *rs;
1737 int do_async;
1738 struct buf *bp;
1739 int rc;
1740
1741 unit = raidPtr->raidid;
1742 rs = &raid_softc[unit];
1743
1744 /* quick check to see if anything has died recently */
1745 RF_LOCK_MUTEX(raidPtr->mutex);
1746 if (raidPtr->numNewFailures > 0) {
1747 RF_UNLOCK_MUTEX(raidPtr->mutex);
1748 rf_update_component_labels(raidPtr,
1749 RF_NORMAL_COMPONENT_UPDATE);
1750 RF_LOCK_MUTEX(raidPtr->mutex);
1751 raidPtr->numNewFailures--;
1752 }
1753
1754 /* Check to see if we're at the limit... */
1755 while (raidPtr->openings > 0) {
1756 RF_UNLOCK_MUTEX(raidPtr->mutex);
1757
1758 /* get the next item, if any, from the queue */
1759 if ((bp = BUFQ_GET(rs->buf_queue)) == NULL) {
1760 /* nothing more to do */
1761 return;
1762 }
1763
1764 /* Ok, for the bp we have here, bp->b_blkno is relative to the
1765 * partition.. Need to make it absolute to the underlying
1766 * device.. */
1767
1768 blocknum = bp->b_blkno;
1769 if (DISKPART(bp->b_dev) != RAW_PART) {
1770 pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
1771 blocknum += pp->p_offset;
1772 }
1773
1774 db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
1775 (int) blocknum));
1776
1777 db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
1778 db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
1779
1780 /* *THIS* is where we adjust what block we're going to...
1781 * but DO NOT TOUCH bp->b_blkno!!! */
1782 raid_addr = blocknum;
1783
1784 num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
1785 pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
1786 sum = raid_addr + num_blocks + pb;
1787 if (1 || rf_debugKernelAccess) {
1788 db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
1789 (int) raid_addr, (int) sum, (int) num_blocks,
1790 (int) pb, (int) bp->b_resid));
1791 }
1792 if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
1793 || (sum < num_blocks) || (sum < pb)) {
1794 bp->b_error = ENOSPC;
1795 bp->b_flags |= B_ERROR;
1796 bp->b_resid = bp->b_bcount;
1797 biodone(bp);
1798 RF_LOCK_MUTEX(raidPtr->mutex);
1799 continue;
1800 }
1801 /*
1802 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
1803 */
1804
1805 if (bp->b_bcount & raidPtr->sectorMask) {
1806 bp->b_error = EINVAL;
1807 bp->b_flags |= B_ERROR;
1808 bp->b_resid = bp->b_bcount;
1809 biodone(bp);
1810 RF_LOCK_MUTEX(raidPtr->mutex);
1811 continue;
1812
1813 }
1814 db1_printf(("Calling DoAccess..\n"));
1815
1816
1817 RF_LOCK_MUTEX(raidPtr->mutex);
1818 raidPtr->openings--;
1819 RF_UNLOCK_MUTEX(raidPtr->mutex);
1820
1821 /*
1822 * Everything is async.
1823 */
1824 do_async = 1;
1825
1826 disk_busy(&rs->sc_dkdev);
1827
1828 /* XXX we're still at splbio() here... do we *really*
1829 need to be? */
1830
1831 /* don't ever condition on bp->b_flags & B_WRITE.
1832 * always condition on B_READ instead */
1833
1834 rc = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
1835 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
1836 do_async, raid_addr, num_blocks,
1837 bp->b_data, bp, RF_DAG_NONBLOCKING_IO);
1838
1839 if (rc) {
1840 bp->b_error = rc;
1841 bp->b_flags |= B_ERROR;
1842 bp->b_resid = bp->b_bcount;
1843 biodone(bp);
1844 /* continue loop */
1845 }
1846
1847 RF_LOCK_MUTEX(raidPtr->mutex);
1848 }
1849 RF_UNLOCK_MUTEX(raidPtr->mutex);
1850 }
1851
1852
1853
1854
1855 /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */
1856
1857 int
1858 rf_DispatchKernelIO(RF_DiskQueue_t *queue, RF_DiskQueueData_t *req)
1859 {
1860 int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
1861 struct buf *bp;
1862
1863 req->queue = queue;
1864
1865 #if DIAGNOSTIC
1866 if (queue->raidPtr->raidid >= numraid) {
1867 printf("Invalid unit number: %d %d\n", queue->raidPtr->raidid,
1868 numraid);
1869 panic("Invalid Unit number in rf_DispatchKernelIO");
1870 }
1871 #endif
1872
1873 bp = req->bp;
1874
1875 switch (req->type) {
1876 case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */
1877 /* XXX need to do something extra here.. */
1878 /* I'm leaving this in, as I've never actually seen it used,
1879 * and I'd like folks to report it... GO */
1880 printf(("WAKEUP CALLED\n"));
1881 queue->numOutstanding++;
1882
1883 bp->b_flags = 0;
1884 bp->b_private = req;
1885
1886 KernelWakeupFunc(bp);
1887 break;
1888
1889 case RF_IO_TYPE_READ:
1890 case RF_IO_TYPE_WRITE:
1891 #if RF_ACC_TRACE > 0
1892 if (req->tracerec) {
1893 RF_ETIMER_START(req->tracerec->timer);
1894 }
1895 #endif
1896 InitBP(bp, queue->rf_cinfo->ci_vp,
1897 op, queue->rf_cinfo->ci_dev,
1898 req->sectorOffset, req->numSector,
1899 req->buf, KernelWakeupFunc, (void *) req,
1900 queue->raidPtr->logBytesPerSector, req->b_proc);
1901
1902 if (rf_debugKernelAccess) {
1903 db1_printf(("dispatch: bp->b_blkno = %ld\n",
1904 (long) bp->b_blkno));
1905 }
1906 queue->numOutstanding++;
1907 queue->last_deq_sector = req->sectorOffset;
1908 /* acc wouldn't have been let in if there were any pending
1909 * reqs at any other priority */
1910 queue->curPriority = req->priority;
1911
1912 db1_printf(("Going for %c to unit %d col %d\n",
1913 req->type, queue->raidPtr->raidid,
1914 queue->col));
1915 db1_printf(("sector %d count %d (%d bytes) %d\n",
1916 (int) req->sectorOffset, (int) req->numSector,
1917 (int) (req->numSector <<
1918 queue->raidPtr->logBytesPerSector),
1919 (int) queue->raidPtr->logBytesPerSector));
1920 VOP_STRATEGY(bp->b_vp, bp);
1921
1922 break;
1923
1924 default:
1925 panic("bad req->type in rf_DispatchKernelIO");
1926 }
1927 db1_printf(("Exiting from DispatchKernelIO\n"));
1928
1929 return (0);
1930 }
1931 /* this is the callback function associated with a I/O invoked from
1932 kernel code.
1933 */
1934 static void
1935 KernelWakeupFunc(struct buf *bp)
1936 {
1937 RF_DiskQueueData_t *req = NULL;
1938 RF_DiskQueue_t *queue;
1939 int s;
1940
1941 s = splbio();
1942 db1_printf(("recovering the request queue:\n"));
1943 req = bp->b_private;
1944
1945 queue = (RF_DiskQueue_t *) req->queue;
1946
1947 #if RF_ACC_TRACE > 0
1948 if (req->tracerec) {
1949 RF_ETIMER_STOP(req->tracerec->timer);
1950 RF_ETIMER_EVAL(req->tracerec->timer);
1951 RF_LOCK_MUTEX(rf_tracing_mutex);
1952 req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1953 req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1954 req->tracerec->num_phys_ios++;
1955 RF_UNLOCK_MUTEX(rf_tracing_mutex);
1956 }
1957 #endif
1958
1959 /* XXX Ok, let's get aggressive... If B_ERROR is set, let's go
1960 * ballistic, and mark the component as hosed... */
1961
1962 if (bp->b_flags & B_ERROR) {
1963 /* Mark the disk as dead */
1964 /* but only mark it once... */
1965 /* and only if it wouldn't leave this RAID set
1966 completely broken */
1967 if (((queue->raidPtr->Disks[queue->col].status ==
1968 rf_ds_optimal) ||
1969 (queue->raidPtr->Disks[queue->col].status ==
1970 rf_ds_used_spare)) &&
1971 (queue->raidPtr->numFailures <
1972 queue->raidPtr->Layout.map->faultsTolerated)) {
1973 printf("raid%d: IO Error. Marking %s as failed.\n",
1974 queue->raidPtr->raidid,
1975 queue->raidPtr->Disks[queue->col].devname);
1976 queue->raidPtr->Disks[queue->col].status =
1977 rf_ds_failed;
1978 queue->raidPtr->status = rf_rs_degraded;
1979 queue->raidPtr->numFailures++;
1980 queue->raidPtr->numNewFailures++;
1981 } else { /* Disk is already dead... */
1982 /* printf("Disk already marked as dead!\n"); */
1983 }
1984
1985 }
1986
1987 /* Fill in the error value */
1988
1989 req->error = (bp->b_flags & B_ERROR) ? bp->b_error : 0;
1990
1991 simple_lock(&queue->raidPtr->iodone_lock);
1992
1993 /* Drop this one on the "finished" queue... */
1994 TAILQ_INSERT_TAIL(&(queue->raidPtr->iodone), req, iodone_entries);
1995
1996 /* Let the raidio thread know there is work to be done. */
1997 wakeup(&(queue->raidPtr->iodone));
1998
1999 simple_unlock(&queue->raidPtr->iodone_lock);
2000
2001 splx(s);
2002 }
2003
2004
2005
2006 /*
2007 * initialize a buf structure for doing an I/O in the kernel.
2008 */
2009 static void
2010 InitBP(struct buf *bp, struct vnode *b_vp, unsigned rw_flag, dev_t dev,
2011 RF_SectorNum_t startSect, RF_SectorCount_t numSect, caddr_t bf,
2012 void (*cbFunc) (struct buf *), void *cbArg, int logBytesPerSector,
2013 struct proc *b_proc)
2014 {
2015 /* bp->b_flags = B_PHYS | rw_flag; */
2016 bp->b_flags = B_CALL | rw_flag; /* XXX need B_PHYS here too??? */
2017 bp->b_bcount = numSect << logBytesPerSector;
2018 bp->b_bufsize = bp->b_bcount;
2019 bp->b_error = 0;
2020 bp->b_dev = dev;
2021 bp->b_data = bf;
2022 bp->b_blkno = startSect;
2023 bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */
2024 if (bp->b_bcount == 0) {
2025 panic("bp->b_bcount is zero in InitBP!!");
2026 }
2027 bp->b_proc = b_proc;
2028 bp->b_iodone = cbFunc;
2029 bp->b_private = cbArg;
2030 bp->b_vp = b_vp;
2031 if ((bp->b_flags & B_READ) == 0) {
2032 bp->b_vp->v_numoutput++;
2033 }
2034
2035 }
2036
2037 static void
2038 raidgetdefaultlabel(RF_Raid_t *raidPtr, struct raid_softc *rs,
2039 struct disklabel *lp)
2040 {
2041 memset(lp, 0, sizeof(*lp));
2042
2043 /* fabricate a label... */
2044 lp->d_secperunit = raidPtr->totalSectors;
2045 lp->d_secsize = raidPtr->bytesPerSector;
2046 lp->d_nsectors = raidPtr->Layout.dataSectorsPerStripe;
2047 lp->d_ntracks = 4 * raidPtr->numCol;
2048 lp->d_ncylinders = raidPtr->totalSectors /
2049 (lp->d_nsectors * lp->d_ntracks);
2050 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
2051
2052 strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
2053 lp->d_type = DTYPE_RAID;
2054 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
2055 lp->d_rpm = 3600;
2056 lp->d_interleave = 1;
2057 lp->d_flags = 0;
2058
2059 lp->d_partitions[RAW_PART].p_offset = 0;
2060 lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors;
2061 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
2062 lp->d_npartitions = RAW_PART + 1;
2063
2064 lp->d_magic = DISKMAGIC;
2065 lp->d_magic2 = DISKMAGIC;
2066 lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
2067
2068 }
2069 /*
2070 * Read the disklabel from the raid device. If one is not present, fake one
2071 * up.
2072 */
2073 static void
2074 raidgetdisklabel(dev_t dev)
2075 {
2076 int unit = raidunit(dev);
2077 struct raid_softc *rs = &raid_softc[unit];
2078 const char *errstring;
2079 struct disklabel *lp = rs->sc_dkdev.dk_label;
2080 struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel;
2081 RF_Raid_t *raidPtr;
2082
2083 db1_printf(("Getting the disklabel...\n"));
2084
2085 memset(clp, 0, sizeof(*clp));
2086
2087 raidPtr = raidPtrs[unit];
2088
2089 raidgetdefaultlabel(raidPtr, rs, lp);
2090
2091 /*
2092 * Call the generic disklabel extraction routine.
2093 */
2094 errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
2095 rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
2096 if (errstring)
2097 raidmakedisklabel(rs);
2098 else {
2099 int i;
2100 struct partition *pp;
2101
2102 /*
2103 * Sanity check whether the found disklabel is valid.
2104 *
2105 * This is necessary since total size of the raid device
2106 * may vary when an interleave is changed even though exactly
2107 * same components are used, and old disklabel may used
2108 * if that is found.
2109 */
2110 if (lp->d_secperunit != rs->sc_size)
2111 printf("raid%d: WARNING: %s: "
2112 "total sector size in disklabel (%d) != "
2113 "the size of raid (%ld)\n", unit, rs->sc_xname,
2114 lp->d_secperunit, (long) rs->sc_size);
2115 for (i = 0; i < lp->d_npartitions; i++) {
2116 pp = &lp->d_partitions[i];
2117 if (pp->p_offset + pp->p_size > rs->sc_size)
2118 printf("raid%d: WARNING: %s: end of partition `%c' "
2119 "exceeds the size of raid (%ld)\n",
2120 unit, rs->sc_xname, 'a' + i, (long) rs->sc_size);
2121 }
2122 }
2123
2124 }
2125 /*
2126 * Take care of things one might want to take care of in the event
2127 * that a disklabel isn't present.
2128 */
2129 static void
2130 raidmakedisklabel(struct raid_softc *rs)
2131 {
2132 struct disklabel *lp = rs->sc_dkdev.dk_label;
2133 db1_printf(("Making a label..\n"));
2134
2135 /*
2136 * For historical reasons, if there's no disklabel present
2137 * the raw partition must be marked FS_BSDFFS.
2138 */
2139
2140 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
2141
2142 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
2143
2144 lp->d_checksum = dkcksum(lp);
2145 }
2146 /*
2147 * Wait interruptibly for an exclusive lock.
2148 *
2149 * XXX
2150 * Several drivers do this; it should be abstracted and made MP-safe.
2151 * (Hmm... where have we seen this warning before :-> GO )
2152 */
2153 static int
2154 raidlock(struct raid_softc *rs)
2155 {
2156 int error;
2157
2158 while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
2159 rs->sc_flags |= RAIDF_WANTED;
2160 if ((error =
2161 tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
2162 return (error);
2163 }
2164 rs->sc_flags |= RAIDF_LOCKED;
2165 return (0);
2166 }
2167 /*
2168 * Unlock and wake up any waiters.
2169 */
2170 static void
2171 raidunlock(struct raid_softc *rs)
2172 {
2173
2174 rs->sc_flags &= ~RAIDF_LOCKED;
2175 if ((rs->sc_flags & RAIDF_WANTED) != 0) {
2176 rs->sc_flags &= ~RAIDF_WANTED;
2177 wakeup(rs);
2178 }
2179 }
2180
2181
2182 #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */
2183 #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */
2184
2185 int
2186 raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter)
2187 {
2188 RF_ComponentLabel_t clabel;
2189 raidread_component_label(dev, b_vp, &clabel);
2190 clabel.mod_counter = mod_counter;
2191 clabel.clean = RF_RAID_CLEAN;
2192 raidwrite_component_label(dev, b_vp, &clabel);
2193 return(0);
2194 }
2195
2196
2197 int
2198 raidmarkdirty(dev_t dev, struct vnode *b_vp, int mod_counter)
2199 {
2200 RF_ComponentLabel_t clabel;
2201 raidread_component_label(dev, b_vp, &clabel);
2202 clabel.mod_counter = mod_counter;
2203 clabel.clean = RF_RAID_DIRTY;
2204 raidwrite_component_label(dev, b_vp, &clabel);
2205 return(0);
2206 }
2207
2208 /* ARGSUSED */
2209 int
2210 raidread_component_label(dev_t dev, struct vnode *b_vp,
2211 RF_ComponentLabel_t *clabel)
2212 {
2213 struct buf *bp;
2214 const struct bdevsw *bdev;
2215 int error;
2216
2217 /* XXX should probably ensure that we don't try to do this if
2218 someone has changed rf_protected_sectors. */
2219
2220 if (b_vp == NULL) {
2221 /* For whatever reason, this component is not valid.
2222 Don't try to read a component label from it. */
2223 return(EINVAL);
2224 }
2225
2226 /* get a block of the appropriate size... */
2227 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2228 bp->b_dev = dev;
2229
2230 /* get our ducks in a row for the read */
2231 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2232 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2233 bp->b_flags |= B_READ;
2234 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2235
2236 bdev = bdevsw_lookup(bp->b_dev);
2237 if (bdev == NULL)
2238 return (ENXIO);
2239 (*bdev->d_strategy)(bp);
2240
2241 error = biowait(bp);
2242
2243 if (!error) {
2244 memcpy(clabel, bp->b_data,
2245 sizeof(RF_ComponentLabel_t));
2246 }
2247
2248 brelse(bp);
2249 return(error);
2250 }
2251 /* ARGSUSED */
2252 int
2253 raidwrite_component_label(dev_t dev, struct vnode *b_vp,
2254 RF_ComponentLabel_t *clabel)
2255 {
2256 struct buf *bp;
2257 const struct bdevsw *bdev;
2258 int error;
2259
2260 /* get a block of the appropriate size... */
2261 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2262 bp->b_dev = dev;
2263
2264 /* get our ducks in a row for the write */
2265 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2266 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2267 bp->b_flags |= B_WRITE;
2268 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2269
2270 memset(bp->b_data, 0, RF_COMPONENT_INFO_SIZE );
2271
2272 memcpy(bp->b_data, clabel, sizeof(RF_ComponentLabel_t));
2273
2274 bdev = bdevsw_lookup(bp->b_dev);
2275 if (bdev == NULL)
2276 return (ENXIO);
2277 (*bdev->d_strategy)(bp);
2278 error = biowait(bp);
2279 brelse(bp);
2280 if (error) {
2281 #if 1
2282 printf("Failed to write RAID component info!\n");
2283 #endif
2284 }
2285
2286 return(error);
2287 }
2288
2289 void
2290 rf_markalldirty(RF_Raid_t *raidPtr)
2291 {
2292 RF_ComponentLabel_t clabel;
2293 int sparecol;
2294 int c;
2295 int j;
2296 int scol = -1;
2297
2298 raidPtr->mod_counter++;
2299 for (c = 0; c < raidPtr->numCol; c++) {
2300 /* we don't want to touch (at all) a disk that has
2301 failed */
2302 if (!RF_DEAD_DISK(raidPtr->Disks[c].status)) {
2303 raidread_component_label(
2304 raidPtr->Disks[c].dev,
2305 raidPtr->raid_cinfo[c].ci_vp,
2306 &clabel);
2307 if (clabel.status == rf_ds_spared) {
2308 /* XXX do something special...
2309 but whatever you do, don't
2310 try to access it!! */
2311 } else {
2312 raidmarkdirty(
2313 raidPtr->Disks[c].dev,
2314 raidPtr->raid_cinfo[c].ci_vp,
2315 raidPtr->mod_counter);
2316 }
2317 }
2318 }
2319
2320 for( c = 0; c < raidPtr->numSpare ; c++) {
2321 sparecol = raidPtr->numCol + c;
2322 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
2323 /*
2324
2325 we claim this disk is "optimal" if it's
2326 rf_ds_used_spare, as that means it should be
2327 directly substitutable for the disk it replaced.
2328 We note that too...
2329
2330 */
2331
2332 for(j=0;j<raidPtr->numCol;j++) {
2333 if (raidPtr->Disks[j].spareCol == sparecol) {
2334 scol = j;
2335 break;
2336 }
2337 }
2338
2339 raidread_component_label(
2340 raidPtr->Disks[sparecol].dev,
2341 raidPtr->raid_cinfo[sparecol].ci_vp,
2342 &clabel);
2343 /* make sure status is noted */
2344
2345 raid_init_component_label(raidPtr, &clabel);
2346
2347 clabel.row = 0;
2348 clabel.column = scol;
2349 /* Note: we *don't* change status from rf_ds_used_spare
2350 to rf_ds_optimal */
2351 /* clabel.status = rf_ds_optimal; */
2352
2353 raidmarkdirty(raidPtr->Disks[sparecol].dev,
2354 raidPtr->raid_cinfo[sparecol].ci_vp,
2355 raidPtr->mod_counter);
2356 }
2357 }
2358 }
2359
2360
2361 void
2362 rf_update_component_labels(RF_Raid_t *raidPtr, int final)
2363 {
2364 RF_ComponentLabel_t clabel;
2365 int sparecol;
2366 int c;
2367 int j;
2368 int scol;
2369
2370 scol = -1;
2371
2372 /* XXX should do extra checks to make sure things really are clean,
2373 rather than blindly setting the clean bit... */
2374
2375 raidPtr->mod_counter++;
2376
2377 for (c = 0; c < raidPtr->numCol; c++) {
2378 if (raidPtr->Disks[c].status == rf_ds_optimal) {
2379 raidread_component_label(
2380 raidPtr->Disks[c].dev,
2381 raidPtr->raid_cinfo[c].ci_vp,
2382 &clabel);
2383 /* make sure status is noted */
2384 clabel.status = rf_ds_optimal;
2385
2386 /* bump the counter */
2387 clabel.mod_counter = raidPtr->mod_counter;
2388
2389 raidwrite_component_label(
2390 raidPtr->Disks[c].dev,
2391 raidPtr->raid_cinfo[c].ci_vp,
2392 &clabel);
2393 if (final == RF_FINAL_COMPONENT_UPDATE) {
2394 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2395 raidmarkclean(
2396 raidPtr->Disks[c].dev,
2397 raidPtr->raid_cinfo[c].ci_vp,
2398 raidPtr->mod_counter);
2399 }
2400 }
2401 }
2402 /* else we don't touch it.. */
2403 }
2404
2405 for( c = 0; c < raidPtr->numSpare ; c++) {
2406 sparecol = raidPtr->numCol + c;
2407 /* Need to ensure that the reconstruct actually completed! */
2408 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
2409 /*
2410
2411 we claim this disk is "optimal" if it's
2412 rf_ds_used_spare, as that means it should be
2413 directly substitutable for the disk it replaced.
2414 We note that too...
2415
2416 */
2417
2418 for(j=0;j<raidPtr->numCol;j++) {
2419 if (raidPtr->Disks[j].spareCol == sparecol) {
2420 scol = j;
2421 break;
2422 }
2423 }
2424
2425 /* XXX shouldn't *really* need this... */
2426 raidread_component_label(
2427 raidPtr->Disks[sparecol].dev,
2428 raidPtr->raid_cinfo[sparecol].ci_vp,
2429 &clabel);
2430 /* make sure status is noted */
2431
2432 raid_init_component_label(raidPtr, &clabel);
2433
2434 clabel.mod_counter = raidPtr->mod_counter;
2435 clabel.column = scol;
2436 clabel.status = rf_ds_optimal;
2437
2438 raidwrite_component_label(
2439 raidPtr->Disks[sparecol].dev,
2440 raidPtr->raid_cinfo[sparecol].ci_vp,
2441 &clabel);
2442 if (final == RF_FINAL_COMPONENT_UPDATE) {
2443 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2444 raidmarkclean( raidPtr->Disks[sparecol].dev,
2445 raidPtr->raid_cinfo[sparecol].ci_vp,
2446 raidPtr->mod_counter);
2447 }
2448 }
2449 }
2450 }
2451 }
2452
2453 void
2454 rf_close_component(RF_Raid_t *raidPtr, struct vnode *vp, int auto_configured)
2455 {
2456 struct proc *p;
2457 struct lwp *l;
2458
2459 p = raidPtr->engine_thread;
2460 l = LIST_FIRST(&p->p_lwps);
2461
2462 if (vp != NULL) {
2463 if (auto_configured == 1) {
2464 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2465 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
2466 vput(vp);
2467
2468 } else {
2469 (void) vn_close(vp, FREAD | FWRITE, p->p_cred, l);
2470 }
2471 }
2472 }
2473
2474
2475 void
2476 rf_UnconfigureVnodes(RF_Raid_t *raidPtr)
2477 {
2478 int r,c;
2479 struct vnode *vp;
2480 int acd;
2481
2482
2483 /* We take this opportunity to close the vnodes like we should.. */
2484
2485 for (c = 0; c < raidPtr->numCol; c++) {
2486 vp = raidPtr->raid_cinfo[c].ci_vp;
2487 acd = raidPtr->Disks[c].auto_configured;
2488 rf_close_component(raidPtr, vp, acd);
2489 raidPtr->raid_cinfo[c].ci_vp = NULL;
2490 raidPtr->Disks[c].auto_configured = 0;
2491 }
2492
2493 for (r = 0; r < raidPtr->numSpare; r++) {
2494 vp = raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp;
2495 acd = raidPtr->Disks[raidPtr->numCol + r].auto_configured;
2496 rf_close_component(raidPtr, vp, acd);
2497 raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp = NULL;
2498 raidPtr->Disks[raidPtr->numCol + r].auto_configured = 0;
2499 }
2500 }
2501
2502
2503 void
2504 rf_ReconThread(struct rf_recon_req *req)
2505 {
2506 int s;
2507 RF_Raid_t *raidPtr;
2508
2509 s = splbio();
2510 raidPtr = (RF_Raid_t *) req->raidPtr;
2511 raidPtr->recon_in_progress = 1;
2512
2513 rf_FailDisk((RF_Raid_t *) req->raidPtr, req->col,
2514 ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
2515
2516 RF_Free(req, sizeof(*req));
2517
2518 raidPtr->recon_in_progress = 0;
2519 splx(s);
2520
2521 /* That's all... */
2522 kthread_exit(0); /* does not return */
2523 }
2524
2525 void
2526 rf_RewriteParityThread(RF_Raid_t *raidPtr)
2527 {
2528 int retcode;
2529 int s;
2530
2531 raidPtr->parity_rewrite_stripes_done = 0;
2532 raidPtr->parity_rewrite_in_progress = 1;
2533 s = splbio();
2534 retcode = rf_RewriteParity(raidPtr);
2535 splx(s);
2536 if (retcode) {
2537 printf("raid%d: Error re-writing parity!\n",raidPtr->raidid);
2538 } else {
2539 /* set the clean bit! If we shutdown correctly,
2540 the clean bit on each component label will get
2541 set */
2542 raidPtr->parity_good = RF_RAID_CLEAN;
2543 }
2544 raidPtr->parity_rewrite_in_progress = 0;
2545
2546 /* Anyone waiting for us to stop? If so, inform them... */
2547 if (raidPtr->waitShutdown) {
2548 wakeup(&raidPtr->parity_rewrite_in_progress);
2549 }
2550
2551 /* That's all... */
2552 kthread_exit(0); /* does not return */
2553 }
2554
2555
2556 void
2557 rf_CopybackThread(RF_Raid_t *raidPtr)
2558 {
2559 int s;
2560
2561 raidPtr->copyback_in_progress = 1;
2562 s = splbio();
2563 rf_CopybackReconstructedData(raidPtr);
2564 splx(s);
2565 raidPtr->copyback_in_progress = 0;
2566
2567 /* That's all... */
2568 kthread_exit(0); /* does not return */
2569 }
2570
2571
2572 void
2573 rf_ReconstructInPlaceThread(struct rf_recon_req *req)
2574 {
2575 int s;
2576 RF_Raid_t *raidPtr;
2577
2578 s = splbio();
2579 raidPtr = req->raidPtr;
2580 raidPtr->recon_in_progress = 1;
2581 rf_ReconstructInPlace(raidPtr, req->col);
2582 RF_Free(req, sizeof(*req));
2583 raidPtr->recon_in_progress = 0;
2584 splx(s);
2585
2586 /* That's all... */
2587 kthread_exit(0); /* does not return */
2588 }
2589
2590 static RF_AutoConfig_t *
2591 rf_get_component(RF_AutoConfig_t *ac_list, dev_t dev, struct vnode *vp,
2592 const char *cname, RF_SectorCount_t size)
2593 {
2594 int good_one = 0;
2595 RF_ComponentLabel_t *clabel;
2596 RF_AutoConfig_t *ac;
2597
2598 clabel = malloc(sizeof(RF_ComponentLabel_t), M_RAIDFRAME, M_NOWAIT);
2599 if (clabel == NULL) {
2600 oomem:
2601 while(ac_list) {
2602 ac = ac_list;
2603 if (ac->clabel)
2604 free(ac->clabel, M_RAIDFRAME);
2605 ac_list = ac_list->next;
2606 free(ac, M_RAIDFRAME);
2607 }
2608 printf("RAID auto config: out of memory!\n");
2609 return NULL; /* XXX probably should panic? */
2610 }
2611
2612 if (!raidread_component_label(dev, vp, clabel)) {
2613 /* Got the label. Does it look reasonable? */
2614 if (rf_reasonable_label(clabel) &&
2615 (clabel->partitionSize <= size)) {
2616 #if DEBUG
2617 printf("Component on: %s: %llu\n",
2618 cname, (unsigned long long)size);
2619 rf_print_component_label(clabel);
2620 #endif
2621 /* if it's reasonable, add it, else ignore it. */
2622 ac = malloc(sizeof(RF_AutoConfig_t), M_RAIDFRAME,
2623 M_NOWAIT);
2624 if (ac == NULL) {
2625 free(clabel, M_RAIDFRAME);
2626 goto oomem;
2627 }
2628 strlcpy(ac->devname, cname, sizeof(ac->devname));
2629 ac->dev = dev;
2630 ac->vp = vp;
2631 ac->clabel = clabel;
2632 ac->next = ac_list;
2633 ac_list = ac;
2634 good_one = 1;
2635 }
2636 }
2637 if (!good_one) {
2638 /* cleanup */
2639 free(clabel, M_RAIDFRAME);
2640 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2641 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
2642 vput(vp);
2643 }
2644 return ac_list;
2645 }
2646
2647 RF_AutoConfig_t *
2648 rf_find_raid_components()
2649 {
2650 struct vnode *vp;
2651 struct disklabel label;
2652 struct device *dv;
2653 dev_t dev;
2654 int bmajor, bminor, wedge;
2655 int error;
2656 int i;
2657 RF_AutoConfig_t *ac_list;
2658
2659
2660 /* initialize the AutoConfig list */
2661 ac_list = NULL;
2662
2663 /* we begin by trolling through *all* the devices on the system */
2664
2665 for (dv = alldevs.tqh_first; dv != NULL;
2666 dv = dv->dv_list.tqe_next) {
2667
2668 /* we are only interested in disks... */
2669 if (device_class(dv) != DV_DISK)
2670 continue;
2671
2672 /* we don't care about floppies... */
2673 if (device_is_a(dv, "fd")) {
2674 continue;
2675 }
2676
2677 /* we don't care about CD's... */
2678 if (device_is_a(dv, "cd")) {
2679 continue;
2680 }
2681
2682 /* hdfd is the Atari/Hades floppy driver */
2683 if (device_is_a(dv, "hdfd")) {
2684 continue;
2685 }
2686
2687 /* fdisa is the Atari/Milan floppy driver */
2688 if (device_is_a(dv, "fdisa")) {
2689 continue;
2690 }
2691
2692 /* need to find the device_name_to_block_device_major stuff */
2693 bmajor = devsw_name2blk(dv->dv_xname, NULL, 0);
2694
2695 /* get a vnode for the raw partition of this disk */
2696
2697 wedge = device_is_a(dv, "dk");
2698 bminor = minor(device_unit(dv));
2699 dev = wedge ? makedev(bmajor, bminor) :
2700 MAKEDISKDEV(bmajor, bminor, RAW_PART);
2701 if (bdevvp(dev, &vp))
2702 panic("RAID can't alloc vnode");
2703
2704 error = VOP_OPEN(vp, FREAD, NOCRED, 0);
2705
2706 if (error) {
2707 /* "Who cares." Continue looking
2708 for something that exists*/
2709 vput(vp);
2710 continue;
2711 }
2712
2713 if (wedge) {
2714 struct dkwedge_info dkw;
2715 error = VOP_IOCTL(vp, DIOCGWEDGEINFO, &dkw, FREAD,
2716 NOCRED, 0);
2717 if (error) {
2718 printf("RAIDframe: can't get wedge info for "
2719 "dev %s (%d)\n", dv->dv_xname, error);
2720 out:
2721 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2722 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
2723 vput(vp);
2724 continue;
2725 }
2726
2727 if (strcmp(dkw.dkw_ptype, DKW_PTYPE_RAIDFRAME) != 0)
2728 goto out;
2729
2730 ac_list = rf_get_component(ac_list, dev, vp,
2731 dv->dv_xname, dkw.dkw_size);
2732 continue;
2733 }
2734
2735 /* Ok, the disk exists. Go get the disklabel. */
2736 error = VOP_IOCTL(vp, DIOCGDINFO, &label, FREAD, NOCRED, 0);
2737 if (error) {
2738 /*
2739 * XXX can't happen - open() would
2740 * have errored out (or faked up one)
2741 */
2742 if (error != ENOTTY)
2743 printf("RAIDframe: can't get label for dev "
2744 "%s (%d)\n", dv->dv_xname, error);
2745 }
2746
2747 /* don't need this any more. We'll allocate it again
2748 a little later if we really do... */
2749 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2750 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
2751 vput(vp);
2752
2753 if (error)
2754 continue;
2755
2756 for (i = 0; i < label.d_npartitions; i++) {
2757 char cname[sizeof(ac_list->devname)];
2758
2759 /* We only support partitions marked as RAID */
2760 if (label.d_partitions[i].p_fstype != FS_RAID)
2761 continue;
2762
2763 dev = MAKEDISKDEV(bmajor, device_unit(dv), i);
2764 if (bdevvp(dev, &vp))
2765 panic("RAID can't alloc vnode");
2766
2767 error = VOP_OPEN(vp, FREAD, NOCRED, 0);
2768 if (error) {
2769 /* Whatever... */
2770 vput(vp);
2771 continue;
2772 }
2773 snprintf(cname, sizeof(cname), "%s%c",
2774 dv->dv_xname, 'a' + i);
2775 ac_list = rf_get_component(ac_list, dev, vp, cname,
2776 label.d_partitions[i].p_size);
2777 }
2778 }
2779 return ac_list;
2780 }
2781
2782
2783 static int
2784 rf_reasonable_label(RF_ComponentLabel_t *clabel)
2785 {
2786
2787 if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) ||
2788 (clabel->version==RF_COMPONENT_LABEL_VERSION)) &&
2789 ((clabel->clean == RF_RAID_CLEAN) ||
2790 (clabel->clean == RF_RAID_DIRTY)) &&
2791 clabel->row >=0 &&
2792 clabel->column >= 0 &&
2793 clabel->num_rows > 0 &&
2794 clabel->num_columns > 0 &&
2795 clabel->row < clabel->num_rows &&
2796 clabel->column < clabel->num_columns &&
2797 clabel->blockSize > 0 &&
2798 clabel->numBlocks > 0) {
2799 /* label looks reasonable enough... */
2800 return(1);
2801 }
2802 return(0);
2803 }
2804
2805
2806 #if DEBUG
2807 void
2808 rf_print_component_label(RF_ComponentLabel_t *clabel)
2809 {
2810 printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
2811 clabel->row, clabel->column,
2812 clabel->num_rows, clabel->num_columns);
2813 printf(" Version: %d Serial Number: %d Mod Counter: %d\n",
2814 clabel->version, clabel->serial_number,
2815 clabel->mod_counter);
2816 printf(" Clean: %s Status: %d\n",
2817 clabel->clean ? "Yes" : "No", clabel->status );
2818 printf(" sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
2819 clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
2820 printf(" RAID Level: %c blocksize: %d numBlocks: %d\n",
2821 (char) clabel->parityConfig, clabel->blockSize,
2822 clabel->numBlocks);
2823 printf(" Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No" );
2824 printf(" Contains root partition: %s\n",
2825 clabel->root_partition ? "Yes" : "No" );
2826 printf(" Last configured as: raid%d\n", clabel->last_unit );
2827 #if 0
2828 printf(" Config order: %d\n", clabel->config_order);
2829 #endif
2830
2831 }
2832 #endif
2833
2834 RF_ConfigSet_t *
2835 rf_create_auto_sets(RF_AutoConfig_t *ac_list)
2836 {
2837 RF_AutoConfig_t *ac;
2838 RF_ConfigSet_t *config_sets;
2839 RF_ConfigSet_t *cset;
2840 RF_AutoConfig_t *ac_next;
2841
2842
2843 config_sets = NULL;
2844
2845 /* Go through the AutoConfig list, and figure out which components
2846 belong to what sets. */
2847 ac = ac_list;
2848 while(ac!=NULL) {
2849 /* we're going to putz with ac->next, so save it here
2850 for use at the end of the loop */
2851 ac_next = ac->next;
2852
2853 if (config_sets == NULL) {
2854 /* will need at least this one... */
2855 config_sets = (RF_ConfigSet_t *)
2856 malloc(sizeof(RF_ConfigSet_t),
2857 M_RAIDFRAME, M_NOWAIT);
2858 if (config_sets == NULL) {
2859 panic("rf_create_auto_sets: No memory!");
2860 }
2861 /* this one is easy :) */
2862 config_sets->ac = ac;
2863 config_sets->next = NULL;
2864 config_sets->rootable = 0;
2865 ac->next = NULL;
2866 } else {
2867 /* which set does this component fit into? */
2868 cset = config_sets;
2869 while(cset!=NULL) {
2870 if (rf_does_it_fit(cset, ac)) {
2871 /* looks like it matches... */
2872 ac->next = cset->ac;
2873 cset->ac = ac;
2874 break;
2875 }
2876 cset = cset->next;
2877 }
2878 if (cset==NULL) {
2879 /* didn't find a match above... new set..*/
2880 cset = (RF_ConfigSet_t *)
2881 malloc(sizeof(RF_ConfigSet_t),
2882 M_RAIDFRAME, M_NOWAIT);
2883 if (cset == NULL) {
2884 panic("rf_create_auto_sets: No memory!");
2885 }
2886 cset->ac = ac;
2887 ac->next = NULL;
2888 cset->next = config_sets;
2889 cset->rootable = 0;
2890 config_sets = cset;
2891 }
2892 }
2893 ac = ac_next;
2894 }
2895
2896
2897 return(config_sets);
2898 }
2899
2900 static int
2901 rf_does_it_fit(RF_ConfigSet_t *cset, RF_AutoConfig_t *ac)
2902 {
2903 RF_ComponentLabel_t *clabel1, *clabel2;
2904
2905 /* If this one matches the *first* one in the set, that's good
2906 enough, since the other members of the set would have been
2907 through here too... */
2908 /* note that we are not checking partitionSize here..
2909
2910 Note that we are also not checking the mod_counters here.
2911 If everything else matches execpt the mod_counter, that's
2912 good enough for this test. We will deal with the mod_counters
2913 a little later in the autoconfiguration process.
2914
2915 (clabel1->mod_counter == clabel2->mod_counter) &&
2916
2917 The reason we don't check for this is that failed disks
2918 will have lower modification counts. If those disks are
2919 not added to the set they used to belong to, then they will
2920 form their own set, which may result in 2 different sets,
2921 for example, competing to be configured at raid0, and
2922 perhaps competing to be the root filesystem set. If the
2923 wrong ones get configured, or both attempt to become /,
2924 weird behaviour and or serious lossage will occur. Thus we
2925 need to bring them into the fold here, and kick them out at
2926 a later point.
2927
2928 */
2929
2930 clabel1 = cset->ac->clabel;
2931 clabel2 = ac->clabel;
2932 if ((clabel1->version == clabel2->version) &&
2933 (clabel1->serial_number == clabel2->serial_number) &&
2934 (clabel1->num_rows == clabel2->num_rows) &&
2935 (clabel1->num_columns == clabel2->num_columns) &&
2936 (clabel1->sectPerSU == clabel2->sectPerSU) &&
2937 (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
2938 (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
2939 (clabel1->parityConfig == clabel2->parityConfig) &&
2940 (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
2941 (clabel1->blockSize == clabel2->blockSize) &&
2942 (clabel1->numBlocks == clabel2->numBlocks) &&
2943 (clabel1->autoconfigure == clabel2->autoconfigure) &&
2944 (clabel1->root_partition == clabel2->root_partition) &&
2945 (clabel1->last_unit == clabel2->last_unit) &&
2946 (clabel1->config_order == clabel2->config_order)) {
2947 /* if it get's here, it almost *has* to be a match */
2948 } else {
2949 /* it's not consistent with somebody in the set..
2950 punt */
2951 return(0);
2952 }
2953 /* all was fine.. it must fit... */
2954 return(1);
2955 }
2956
2957 int
2958 rf_have_enough_components(RF_ConfigSet_t *cset)
2959 {
2960 RF_AutoConfig_t *ac;
2961 RF_AutoConfig_t *auto_config;
2962 RF_ComponentLabel_t *clabel;
2963 int c;
2964 int num_cols;
2965 int num_missing;
2966 int mod_counter;
2967 int mod_counter_found;
2968 int even_pair_failed;
2969 char parity_type;
2970
2971
2972 /* check to see that we have enough 'live' components
2973 of this set. If so, we can configure it if necessary */
2974
2975 num_cols = cset->ac->clabel->num_columns;
2976 parity_type = cset->ac->clabel->parityConfig;
2977
2978 /* XXX Check for duplicate components!?!?!? */
2979
2980 /* Determine what the mod_counter is supposed to be for this set. */
2981
2982 mod_counter_found = 0;
2983 mod_counter = 0;
2984 ac = cset->ac;
2985 while(ac!=NULL) {
2986 if (mod_counter_found==0) {
2987 mod_counter = ac->clabel->mod_counter;
2988 mod_counter_found = 1;
2989 } else {
2990 if (ac->clabel->mod_counter > mod_counter) {
2991 mod_counter = ac->clabel->mod_counter;
2992 }
2993 }
2994 ac = ac->next;
2995 }
2996
2997 num_missing = 0;
2998 auto_config = cset->ac;
2999
3000 even_pair_failed = 0;
3001 for(c=0; c<num_cols; c++) {
3002 ac = auto_config;
3003 while(ac!=NULL) {
3004 if ((ac->clabel->column == c) &&
3005 (ac->clabel->mod_counter == mod_counter)) {
3006 /* it's this one... */
3007 #if DEBUG
3008 printf("Found: %s at %d\n",
3009 ac->devname,c);
3010 #endif
3011 break;
3012 }
3013 ac=ac->next;
3014 }
3015 if (ac==NULL) {
3016 /* Didn't find one here! */
3017 /* special case for RAID 1, especially
3018 where there are more than 2
3019 components (where RAIDframe treats
3020 things a little differently :( ) */
3021 if (parity_type == '1') {
3022 if (c%2 == 0) { /* even component */
3023 even_pair_failed = 1;
3024 } else { /* odd component. If
3025 we're failed, and
3026 so is the even
3027 component, it's
3028 "Good Night, Charlie" */
3029 if (even_pair_failed == 1) {
3030 return(0);
3031 }
3032 }
3033 } else {
3034 /* normal accounting */
3035 num_missing++;
3036 }
3037 }
3038 if ((parity_type == '1') && (c%2 == 1)) {
3039 /* Just did an even component, and we didn't
3040 bail.. reset the even_pair_failed flag,
3041 and go on to the next component.... */
3042 even_pair_failed = 0;
3043 }
3044 }
3045
3046 clabel = cset->ac->clabel;
3047
3048 if (((clabel->parityConfig == '0') && (num_missing > 0)) ||
3049 ((clabel->parityConfig == '4') && (num_missing > 1)) ||
3050 ((clabel->parityConfig == '5') && (num_missing > 1))) {
3051 /* XXX this needs to be made *much* more general */
3052 /* Too many failures */
3053 return(0);
3054 }
3055 /* otherwise, all is well, and we've got enough to take a kick
3056 at autoconfiguring this set */
3057 return(1);
3058 }
3059
3060 void
3061 rf_create_configuration(RF_AutoConfig_t *ac, RF_Config_t *config,
3062 RF_Raid_t *raidPtr)
3063 {
3064 RF_ComponentLabel_t *clabel;
3065 int i;
3066
3067 clabel = ac->clabel;
3068
3069 /* 1. Fill in the common stuff */
3070 config->numRow = clabel->num_rows = 1;
3071 config->numCol = clabel->num_columns;
3072 config->numSpare = 0; /* XXX should this be set here? */
3073 config->sectPerSU = clabel->sectPerSU;
3074 config->SUsPerPU = clabel->SUsPerPU;
3075 config->SUsPerRU = clabel->SUsPerRU;
3076 config->parityConfig = clabel->parityConfig;
3077 /* XXX... */
3078 strcpy(config->diskQueueType,"fifo");
3079 config->maxOutstandingDiskReqs = clabel->maxOutstanding;
3080 config->layoutSpecificSize = 0; /* XXX ?? */
3081
3082 while(ac!=NULL) {
3083 /* row/col values will be in range due to the checks
3084 in reasonable_label() */
3085 strcpy(config->devnames[0][ac->clabel->column],
3086 ac->devname);
3087 ac = ac->next;
3088 }
3089
3090 for(i=0;i<RF_MAXDBGV;i++) {
3091 config->debugVars[i][0] = 0;
3092 }
3093 }
3094
3095 int
3096 rf_set_autoconfig(RF_Raid_t *raidPtr, int new_value)
3097 {
3098 RF_ComponentLabel_t clabel;
3099 struct vnode *vp;
3100 dev_t dev;
3101 int column;
3102 int sparecol;
3103
3104 raidPtr->autoconfigure = new_value;
3105
3106 for(column=0; column<raidPtr->numCol; column++) {
3107 if (raidPtr->Disks[column].status == rf_ds_optimal) {
3108 dev = raidPtr->Disks[column].dev;
3109 vp = raidPtr->raid_cinfo[column].ci_vp;
3110 raidread_component_label(dev, vp, &clabel);
3111 clabel.autoconfigure = new_value;
3112 raidwrite_component_label(dev, vp, &clabel);
3113 }
3114 }
3115 for(column = 0; column < raidPtr->numSpare ; column++) {
3116 sparecol = raidPtr->numCol + column;
3117 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3118 dev = raidPtr->Disks[sparecol].dev;
3119 vp = raidPtr->raid_cinfo[sparecol].ci_vp;
3120 raidread_component_label(dev, vp, &clabel);
3121 clabel.autoconfigure = new_value;
3122 raidwrite_component_label(dev, vp, &clabel);
3123 }
3124 }
3125 return(new_value);
3126 }
3127
3128 int
3129 rf_set_rootpartition(RF_Raid_t *raidPtr, int new_value)
3130 {
3131 RF_ComponentLabel_t clabel;
3132 struct vnode *vp;
3133 dev_t dev;
3134 int column;
3135 int sparecol;
3136
3137 raidPtr->root_partition = new_value;
3138 for(column=0; column<raidPtr->numCol; column++) {
3139 if (raidPtr->Disks[column].status == rf_ds_optimal) {
3140 dev = raidPtr->Disks[column].dev;
3141 vp = raidPtr->raid_cinfo[column].ci_vp;
3142 raidread_component_label(dev, vp, &clabel);
3143 clabel.root_partition = new_value;
3144 raidwrite_component_label(dev, vp, &clabel);
3145 }
3146 }
3147 for(column = 0; column < raidPtr->numSpare ; column++) {
3148 sparecol = raidPtr->numCol + column;
3149 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3150 dev = raidPtr->Disks[sparecol].dev;
3151 vp = raidPtr->raid_cinfo[sparecol].ci_vp;
3152 raidread_component_label(dev, vp, &clabel);
3153 clabel.root_partition = new_value;
3154 raidwrite_component_label(dev, vp, &clabel);
3155 }
3156 }
3157 return(new_value);
3158 }
3159
3160 void
3161 rf_release_all_vps(RF_ConfigSet_t *cset)
3162 {
3163 RF_AutoConfig_t *ac;
3164
3165 ac = cset->ac;
3166 while(ac!=NULL) {
3167 /* Close the vp, and give it back */
3168 if (ac->vp) {
3169 vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY);
3170 VOP_CLOSE(ac->vp, FREAD, NOCRED, 0);
3171 vput(ac->vp);
3172 ac->vp = NULL;
3173 }
3174 ac = ac->next;
3175 }
3176 }
3177
3178
3179 void
3180 rf_cleanup_config_set(RF_ConfigSet_t *cset)
3181 {
3182 RF_AutoConfig_t *ac;
3183 RF_AutoConfig_t *next_ac;
3184
3185 ac = cset->ac;
3186 while(ac!=NULL) {
3187 next_ac = ac->next;
3188 /* nuke the label */
3189 free(ac->clabel, M_RAIDFRAME);
3190 /* cleanup the config structure */
3191 free(ac, M_RAIDFRAME);
3192 /* "next.." */
3193 ac = next_ac;
3194 }
3195 /* and, finally, nuke the config set */
3196 free(cset, M_RAIDFRAME);
3197 }
3198
3199
3200 void
3201 raid_init_component_label(RF_Raid_t *raidPtr, RF_ComponentLabel_t *clabel)
3202 {
3203 /* current version number */
3204 clabel->version = RF_COMPONENT_LABEL_VERSION;
3205 clabel->serial_number = raidPtr->serial_number;
3206 clabel->mod_counter = raidPtr->mod_counter;
3207 clabel->num_rows = 1;
3208 clabel->num_columns = raidPtr->numCol;
3209 clabel->clean = RF_RAID_DIRTY; /* not clean */
3210 clabel->status = rf_ds_optimal; /* "It's good!" */
3211
3212 clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
3213 clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
3214 clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
3215
3216 clabel->blockSize = raidPtr->bytesPerSector;
3217 clabel->numBlocks = raidPtr->sectorsPerDisk;
3218
3219 /* XXX not portable */
3220 clabel->parityConfig = raidPtr->Layout.map->parityConfig;
3221 clabel->maxOutstanding = raidPtr->maxOutstanding;
3222 clabel->autoconfigure = raidPtr->autoconfigure;
3223 clabel->root_partition = raidPtr->root_partition;
3224 clabel->last_unit = raidPtr->raidid;
3225 clabel->config_order = raidPtr->config_order;
3226 }
3227
3228 int
3229 rf_auto_config_set(RF_ConfigSet_t *cset, int *unit)
3230 {
3231 RF_Raid_t *raidPtr;
3232 RF_Config_t *config;
3233 int raidID;
3234 int retcode;
3235
3236 #if DEBUG
3237 printf("RAID autoconfigure\n");
3238 #endif
3239
3240 retcode = 0;
3241 *unit = -1;
3242
3243 /* 1. Create a config structure */
3244
3245 config = (RF_Config_t *)malloc(sizeof(RF_Config_t),
3246 M_RAIDFRAME,
3247 M_NOWAIT);
3248 if (config==NULL) {
3249 printf("Out of mem!?!?\n");
3250 /* XXX do something more intelligent here. */
3251 return(1);
3252 }
3253
3254 memset(config, 0, sizeof(RF_Config_t));
3255
3256 /*
3257 2. Figure out what RAID ID this one is supposed to live at
3258 See if we can get the same RAID dev that it was configured
3259 on last time..
3260 */
3261
3262 raidID = cset->ac->clabel->last_unit;
3263 if ((raidID < 0) || (raidID >= numraid)) {
3264 /* let's not wander off into lala land. */
3265 raidID = numraid - 1;
3266 }
3267 if (raidPtrs[raidID]->valid != 0) {
3268
3269 /*
3270 Nope... Go looking for an alternative...
3271 Start high so we don't immediately use raid0 if that's
3272 not taken.
3273 */
3274
3275 for(raidID = numraid - 1; raidID >= 0; raidID--) {
3276 if (raidPtrs[raidID]->valid == 0) {
3277 /* can use this one! */
3278 break;
3279 }
3280 }
3281 }
3282
3283 if (raidID < 0) {
3284 /* punt... */
3285 printf("Unable to auto configure this set!\n");
3286 printf("(Out of RAID devs!)\n");
3287 free(config, M_RAIDFRAME);
3288 return(1);
3289 }
3290
3291 #if DEBUG
3292 printf("Configuring raid%d:\n",raidID);
3293 #endif
3294
3295 raidPtr = raidPtrs[raidID];
3296
3297 /* XXX all this stuff should be done SOMEWHERE ELSE! */
3298 raidPtr->raidid = raidID;
3299 raidPtr->openings = RAIDOUTSTANDING;
3300
3301 /* 3. Build the configuration structure */
3302 rf_create_configuration(cset->ac, config, raidPtr);
3303
3304 /* 4. Do the configuration */
3305 retcode = rf_Configure(raidPtr, config, cset->ac);
3306
3307 if (retcode == 0) {
3308
3309 raidinit(raidPtrs[raidID]);
3310
3311 rf_markalldirty(raidPtrs[raidID]);
3312 raidPtrs[raidID]->autoconfigure = 1; /* XXX do this here? */
3313 if (cset->ac->clabel->root_partition==1) {
3314 /* everything configured just fine. Make a note
3315 that this set is eligible to be root. */
3316 cset->rootable = 1;
3317 /* XXX do this here? */
3318 raidPtrs[raidID]->root_partition = 1;
3319 }
3320 }
3321
3322 /* 5. Cleanup */
3323 free(config, M_RAIDFRAME);
3324
3325 *unit = raidID;
3326 return(retcode);
3327 }
3328
3329 void
3330 rf_disk_unbusy(RF_RaidAccessDesc_t *desc)
3331 {
3332 struct buf *bp;
3333
3334 bp = (struct buf *)desc->bp;
3335 disk_unbusy(&raid_softc[desc->raidPtr->raidid].sc_dkdev,
3336 (bp->b_bcount - bp->b_resid), (bp->b_flags & B_READ));
3337 }
3338
3339 void
3340 rf_pool_init(struct pool *p, size_t size, const char *w_chan,
3341 size_t xmin, size_t xmax)
3342 {
3343 pool_init(p, size, 0, 0, 0, w_chan, NULL);
3344 pool_sethiwat(p, xmax);
3345 pool_prime(p, xmin);
3346 pool_setlowat(p, xmin);
3347 }
3348
3349 /*
3350 * rf_buf_queue_check(int raidid) -- looks into the buf_queue to see
3351 * if there is IO pending and if that IO could possibly be done for a
3352 * given RAID set. Returns 0 if IO is waiting and can be done, 1
3353 * otherwise.
3354 *
3355 */
3356
3357 int
3358 rf_buf_queue_check(int raidid)
3359 {
3360 if ((BUFQ_PEEK(raid_softc[raidid].buf_queue) != NULL) &&
3361 raidPtrs[raidid]->openings > 0) {
3362 /* there is work to do */
3363 return 0;
3364 }
3365 /* default is nothing to do */
3366 return 1;
3367 }
3368
3369 int
3370 rf_getdisksize(struct vnode *vp, struct lwp *l, RF_RaidDisk_t *diskPtr)
3371 {
3372 struct partinfo dpart;
3373 struct dkwedge_info dkw;
3374 int error;
3375
3376 error = VOP_IOCTL(vp, DIOCGPART, &dpart, FREAD, l->l_cred, l);
3377 if (error == 0) {
3378 diskPtr->blockSize = dpart.disklab->d_secsize;
3379 diskPtr->numBlocks = dpart.part->p_size - rf_protectedSectors;
3380 diskPtr->partitionSize = dpart.part->p_size;
3381 return 0;
3382 }
3383
3384 error = VOP_IOCTL(vp, DIOCGWEDGEINFO, &dkw, FREAD, l->l_cred, l);
3385 if (error == 0) {
3386 diskPtr->blockSize = 512; /* XXX */
3387 diskPtr->numBlocks = dkw.dkw_size - rf_protectedSectors;
3388 diskPtr->partitionSize = dkw.dkw_size;
3389 return 0;
3390 }
3391 return error;
3392 }
3393