rf_netbsdkintf.c revision 1.218 1 /* $NetBSD: rf_netbsdkintf.c,v 1.218 2006/10/08 22:57:51 christos Exp $ */
2 /*-
3 * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
4 * All rights reserved.
5 *
6 * This code is derived from software contributed to The NetBSD Foundation
7 * by Greg Oster; Jason R. Thorpe.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed by the NetBSD
20 * Foundation, Inc. and its contributors.
21 * 4. Neither the name of The NetBSD Foundation nor the names of its
22 * contributors may be used to endorse or promote products derived
23 * from this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
26 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
36 */
37
38 /*
39 * Copyright (c) 1990, 1993
40 * The Regents of the University of California. All rights reserved.
41 *
42 * This code is derived from software contributed to Berkeley by
43 * the Systems Programming Group of the University of Utah Computer
44 * Science Department.
45 *
46 * Redistribution and use in source and binary forms, with or without
47 * modification, are permitted provided that the following conditions
48 * are met:
49 * 1. Redistributions of source code must retain the above copyright
50 * notice, this list of conditions and the following disclaimer.
51 * 2. Redistributions in binary form must reproduce the above copyright
52 * notice, this list of conditions and the following disclaimer in the
53 * documentation and/or other materials provided with the distribution.
54 * 3. Neither the name of the University nor the names of its contributors
55 * may be used to endorse or promote products derived from this software
56 * without specific prior written permission.
57 *
58 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
59 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
60 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
61 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
62 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
63 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
64 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
65 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
66 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
67 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
68 * SUCH DAMAGE.
69 *
70 * from: Utah $Hdr: cd.c 1.6 90/11/28$
71 *
72 * @(#)cd.c 8.2 (Berkeley) 11/16/93
73 */
74
75 /*
76 * Copyright (c) 1988 University of Utah.
77 *
78 * This code is derived from software contributed to Berkeley by
79 * the Systems Programming Group of the University of Utah Computer
80 * Science Department.
81 *
82 * Redistribution and use in source and binary forms, with or without
83 * modification, are permitted provided that the following conditions
84 * are met:
85 * 1. Redistributions of source code must retain the above copyright
86 * notice, this list of conditions and the following disclaimer.
87 * 2. Redistributions in binary form must reproduce the above copyright
88 * notice, this list of conditions and the following disclaimer in the
89 * documentation and/or other materials provided with the distribution.
90 * 3. All advertising materials mentioning features or use of this software
91 * must display the following acknowledgement:
92 * This product includes software developed by the University of
93 * California, Berkeley and its contributors.
94 * 4. Neither the name of the University nor the names of its contributors
95 * may be used to endorse or promote products derived from this software
96 * without specific prior written permission.
97 *
98 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
99 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
100 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
101 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
102 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
103 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
104 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
105 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
106 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
107 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
108 * SUCH DAMAGE.
109 *
110 * from: Utah $Hdr: cd.c 1.6 90/11/28$
111 *
112 * @(#)cd.c 8.2 (Berkeley) 11/16/93
113 */
114
115 /*
116 * Copyright (c) 1995 Carnegie-Mellon University.
117 * All rights reserved.
118 *
119 * Authors: Mark Holland, Jim Zelenka
120 *
121 * Permission to use, copy, modify and distribute this software and
122 * its documentation is hereby granted, provided that both the copyright
123 * notice and this permission notice appear in all copies of the
124 * software, derivative works or modified versions, and any portions
125 * thereof, and that both notices appear in supporting documentation.
126 *
127 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
128 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
129 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
130 *
131 * Carnegie Mellon requests users of this software to return to
132 *
133 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
134 * School of Computer Science
135 * Carnegie Mellon University
136 * Pittsburgh PA 15213-3890
137 *
138 * any improvements or extensions that they make and grant Carnegie the
139 * rights to redistribute these changes.
140 */
141
142 /***********************************************************
143 *
144 * rf_kintf.c -- the kernel interface routines for RAIDframe
145 *
146 ***********************************************************/
147
148 #include <sys/cdefs.h>
149 __KERNEL_RCSID(0, "$NetBSD: rf_netbsdkintf.c,v 1.218 2006/10/08 22:57:51 christos Exp $");
150
151 #include <sys/param.h>
152 #include <sys/errno.h>
153 #include <sys/pool.h>
154 #include <sys/proc.h>
155 #include <sys/queue.h>
156 #include <sys/disk.h>
157 #include <sys/device.h>
158 #include <sys/stat.h>
159 #include <sys/ioctl.h>
160 #include <sys/fcntl.h>
161 #include <sys/systm.h>
162 #include <sys/namei.h>
163 #include <sys/vnode.h>
164 #include <sys/disklabel.h>
165 #include <sys/conf.h>
166 #include <sys/lock.h>
167 #include <sys/buf.h>
168 #include <sys/bufq.h>
169 #include <sys/user.h>
170 #include <sys/reboot.h>
171 #include <sys/kauth.h>
172
173 #include <dev/raidframe/raidframevar.h>
174 #include <dev/raidframe/raidframeio.h>
175 #include "raid.h"
176 #include "opt_raid_autoconfig.h"
177 #include "rf_raid.h"
178 #include "rf_copyback.h"
179 #include "rf_dag.h"
180 #include "rf_dagflags.h"
181 #include "rf_desc.h"
182 #include "rf_diskqueue.h"
183 #include "rf_etimer.h"
184 #include "rf_general.h"
185 #include "rf_kintf.h"
186 #include "rf_options.h"
187 #include "rf_driver.h"
188 #include "rf_parityscan.h"
189 #include "rf_threadstuff.h"
190
191 #ifdef DEBUG
192 int rf_kdebug_level = 0;
193 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
194 #else /* DEBUG */
195 #define db1_printf(a) { }
196 #endif /* DEBUG */
197
198 static RF_Raid_t **raidPtrs; /* global raid device descriptors */
199
200 RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex)
201
202 static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a
203 * spare table */
204 static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from
205 * installation process */
206
207 MALLOC_DEFINE(M_RAIDFRAME, "RAIDframe", "RAIDframe structures");
208
209 /* prototypes */
210 static void KernelWakeupFunc(struct buf *);
211 static void InitBP(struct buf *, struct vnode *, unsigned,
212 dev_t, RF_SectorNum_t, RF_SectorCount_t, caddr_t, void (*) (struct buf *),
213 void *, int, struct proc *);
214 static void raidinit(RF_Raid_t *, int);
215
216 void raidattach(int);
217 static int raid_match(struct device *, struct cfdata *, void *);
218 static void raid_attach(struct device *, struct device *, void *);
219 static int raid_detach(struct device *, int);
220
221 dev_type_open(raidopen);
222 dev_type_close(raidclose);
223 dev_type_read(raidread);
224 dev_type_write(raidwrite);
225 dev_type_ioctl(raidioctl);
226 dev_type_strategy(raidstrategy);
227 dev_type_dump(raiddump);
228 dev_type_size(raidsize);
229
230 const struct bdevsw raid_bdevsw = {
231 raidopen, raidclose, raidstrategy, raidioctl,
232 raiddump, raidsize, D_DISK
233 };
234
235 const struct cdevsw raid_cdevsw = {
236 raidopen, raidclose, raidread, raidwrite, raidioctl,
237 nostop, notty, nopoll, nommap, nokqfilter, D_DISK
238 };
239
240 /* XXX Not sure if the following should be replacing the raidPtrs above,
241 or if it should be used in conjunction with that...
242 */
243
244 struct raid_softc {
245 struct device *sc_dev;
246 int sc_flags; /* flags */
247 int sc_cflags; /* configuration flags */
248 uint64_t sc_size; /* size of the raid device */
249 char sc_xname[20]; /* XXX external name */
250 struct disk sc_dkdev; /* generic disk device info */
251 struct bufq_state *buf_queue; /* used for the device queue */
252 };
253 /* sc_flags */
254 #define RAIDF_INITED 0x01 /* unit has been initialized */
255 #define RAIDF_WLABEL 0x02 /* label area is writable */
256 #define RAIDF_LABELLING 0x04 /* unit is currently being labelled */
257 #define RAIDF_WANTED 0x40 /* someone is waiting to obtain a lock */
258 #define RAIDF_LOCKED 0x80 /* unit is locked */
259
260 #define raidunit(x) DISKUNIT(x)
261 int numraid = 0;
262
263 extern struct cfdriver raid_cd;
264 CFATTACH_DECL(raid, sizeof(struct raid_softc),
265 raid_match, raid_attach, raid_detach, NULL);
266
267 /*
268 * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
269 * Be aware that large numbers can allow the driver to consume a lot of
270 * kernel memory, especially on writes, and in degraded mode reads.
271 *
272 * For example: with a stripe width of 64 blocks (32k) and 5 disks,
273 * a single 64K write will typically require 64K for the old data,
274 * 64K for the old parity, and 64K for the new parity, for a total
275 * of 192K (if the parity buffer is not re-used immediately).
276 * Even it if is used immediately, that's still 128K, which when multiplied
277 * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
278 *
279 * Now in degraded mode, for example, a 64K read on the above setup may
280 * require data reconstruction, which will require *all* of the 4 remaining
281 * disks to participate -- 4 * 32K/disk == 128K again.
282 */
283
284 #ifndef RAIDOUTSTANDING
285 #define RAIDOUTSTANDING 6
286 #endif
287
288 #define RAIDLABELDEV(dev) \
289 (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
290
291 /* declared here, and made public, for the benefit of KVM stuff.. */
292 struct raid_softc *raid_softc;
293
294 static void raidgetdefaultlabel(RF_Raid_t *, struct raid_softc *,
295 struct disklabel *);
296 static void raidgetdisklabel(dev_t);
297 static void raidmakedisklabel(struct raid_softc *);
298
299 static int raidlock(struct raid_softc *);
300 static void raidunlock(struct raid_softc *);
301
302 static void rf_markalldirty(RF_Raid_t *);
303
304 void rf_ReconThread(struct rf_recon_req *);
305 void rf_RewriteParityThread(RF_Raid_t *raidPtr);
306 void rf_CopybackThread(RF_Raid_t *raidPtr);
307 void rf_ReconstructInPlaceThread(struct rf_recon_req *);
308 int rf_autoconfig(struct device *self);
309 void rf_buildroothack(RF_ConfigSet_t *);
310
311 RF_AutoConfig_t *rf_find_raid_components(void);
312 RF_ConfigSet_t *rf_create_auto_sets(RF_AutoConfig_t *);
313 static int rf_does_it_fit(RF_ConfigSet_t *,RF_AutoConfig_t *);
314 static int rf_reasonable_label(RF_ComponentLabel_t *);
315 void rf_create_configuration(RF_AutoConfig_t *,RF_Config_t *, RF_Raid_t *);
316 int rf_set_autoconfig(RF_Raid_t *, int);
317 int rf_set_rootpartition(RF_Raid_t *, int);
318 void rf_release_all_vps(RF_ConfigSet_t *);
319 void rf_cleanup_config_set(RF_ConfigSet_t *);
320 int rf_have_enough_components(RF_ConfigSet_t *);
321 int rf_auto_config_set(RF_ConfigSet_t *, int *);
322
323 static int raidautoconfig = 0; /* Debugging, mostly. Set to 0 to not
324 allow autoconfig to take place.
325 Note that this is overridden by having
326 RAID_AUTOCONFIG as an option in the
327 kernel config file. */
328
329 struct RF_Pools_s rf_pools;
330
331 void
332 raidattach(int num)
333 {
334 int raidID;
335 int i, rc;
336
337 #ifdef DEBUG
338 printf("raidattach: Asked for %d units\n", num);
339 #endif
340
341 if (num <= 0) {
342 #ifdef DIAGNOSTIC
343 panic("raidattach: count <= 0");
344 #endif
345 return;
346 }
347 /* This is where all the initialization stuff gets done. */
348
349 numraid = num;
350
351 /* Make some space for requested number of units... */
352
353 RF_Malloc(raidPtrs, num * sizeof(RF_Raid_t *), (RF_Raid_t **));
354 if (raidPtrs == NULL) {
355 panic("raidPtrs is NULL!!");
356 }
357
358 rf_mutex_init(&rf_sparet_wait_mutex);
359
360 rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
361
362 for (i = 0; i < num; i++)
363 raidPtrs[i] = NULL;
364 rc = rf_BootRaidframe();
365 if (rc == 0)
366 printf("Kernelized RAIDframe activated\n");
367 else
368 panic("Serious error booting RAID!!");
369
370 /* put together some datastructures like the CCD device does.. This
371 * lets us lock the device and what-not when it gets opened. */
372
373 raid_softc = (struct raid_softc *)
374 malloc(num * sizeof(struct raid_softc),
375 M_RAIDFRAME, M_NOWAIT);
376 if (raid_softc == NULL) {
377 printf("WARNING: no memory for RAIDframe driver\n");
378 return;
379 }
380
381 memset(raid_softc, 0, num * sizeof(struct raid_softc));
382
383 for (raidID = 0; raidID < num; raidID++) {
384 bufq_alloc(&raid_softc[raidID].buf_queue, "fcfs", 0);
385
386 RF_Malloc(raidPtrs[raidID], sizeof(RF_Raid_t),
387 (RF_Raid_t *));
388 if (raidPtrs[raidID] == NULL) {
389 printf("WARNING: raidPtrs[%d] is NULL\n", raidID);
390 numraid = raidID;
391 return;
392 }
393 }
394
395 if (config_cfattach_attach(raid_cd.cd_name, &raid_ca)) {
396 printf("config_cfattach_attach failed?\n");
397 }
398
399 #ifdef RAID_AUTOCONFIG
400 raidautoconfig = 1;
401 #endif
402
403 /*
404 * Register a finalizer which will be used to auto-config RAID
405 * sets once all real hardware devices have been found.
406 */
407 if (config_finalize_register(NULL, rf_autoconfig) != 0)
408 printf("WARNING: unable to register RAIDframe finalizer\n");
409 }
410
411 int
412 rf_autoconfig(struct device *self)
413 {
414 RF_AutoConfig_t *ac_list;
415 RF_ConfigSet_t *config_sets;
416 int i;
417
418 if (raidautoconfig == 0)
419 return (0);
420
421 /* XXX This code can only be run once. */
422 raidautoconfig = 0;
423
424 /* 1. locate all RAID components on the system */
425 #ifdef DEBUG
426 printf("Searching for RAID components...\n");
427 #endif
428 ac_list = rf_find_raid_components();
429
430 /* 2. Sort them into their respective sets. */
431 config_sets = rf_create_auto_sets(ac_list);
432
433 /*
434 * 3. Evaluate each set andconfigure the valid ones.
435 * This gets done in rf_buildroothack().
436 */
437 rf_buildroothack(config_sets);
438
439 for (i = 0; i < numraid; i++)
440 if (raidPtrs[i] != NULL && raidPtrs[i]->valid)
441 dkwedge_discover(&raid_softc[i].sc_dkdev);
442
443 return 1;
444 }
445
446 void
447 rf_buildroothack(RF_ConfigSet_t *config_sets)
448 {
449 RF_ConfigSet_t *cset;
450 RF_ConfigSet_t *next_cset;
451 int retcode;
452 int raidID;
453 int rootID;
454 int num_root;
455
456 rootID = 0;
457 num_root = 0;
458 cset = config_sets;
459 while(cset != NULL ) {
460 next_cset = cset->next;
461 if (rf_have_enough_components(cset) &&
462 cset->ac->clabel->autoconfigure==1) {
463 retcode = rf_auto_config_set(cset,&raidID);
464 if (!retcode) {
465 #ifdef DEBUG
466 printf("raid%d: configured ok\n", raidID);
467 #endif
468 if (cset->rootable) {
469 rootID = raidID;
470 num_root++;
471 }
472 } else {
473 /* The autoconfig didn't work :( */
474 #if DEBUG
475 printf("Autoconfig failed with code %d for raid%d\n", retcode, raidID);
476 #endif
477 rf_release_all_vps(cset);
478 }
479 } else {
480 #ifdef DEBUG
481 printf("raid%d: not enough components\n", raidID);
482 #endif
483 /* we're not autoconfiguring this set...
484 release the associated resources */
485 rf_release_all_vps(cset);
486 }
487 /* cleanup */
488 rf_cleanup_config_set(cset);
489 cset = next_cset;
490 }
491
492 /* we found something bootable... */
493
494 if (num_root == 1) {
495 booted_device = raid_softc[rootID].sc_dev;
496 } else if (num_root > 1) {
497 /* we can't guess.. require the user to answer... */
498 boothowto |= RB_ASKNAME;
499 }
500 }
501
502
503 int
504 raidsize(dev_t dev)
505 {
506 struct raid_softc *rs;
507 struct disklabel *lp;
508 int part, unit, omask, size;
509
510 unit = raidunit(dev);
511 if (unit >= numraid)
512 return (-1);
513 rs = &raid_softc[unit];
514
515 if ((rs->sc_flags & RAIDF_INITED) == 0)
516 return (-1);
517
518 part = DISKPART(dev);
519 omask = rs->sc_dkdev.dk_openmask & (1 << part);
520 lp = rs->sc_dkdev.dk_label;
521
522 if (omask == 0 && raidopen(dev, 0, S_IFBLK, curlwp))
523 return (-1);
524
525 if (lp->d_partitions[part].p_fstype != FS_SWAP)
526 size = -1;
527 else
528 size = lp->d_partitions[part].p_size *
529 (lp->d_secsize / DEV_BSIZE);
530
531 if (omask == 0 && raidclose(dev, 0, S_IFBLK, curlwp))
532 return (-1);
533
534 return (size);
535
536 }
537
538 int
539 raiddump(dev_t dev, daddr_t blkno, caddr_t va, size_t size)
540 {
541 /* Not implemented. */
542 return ENXIO;
543 }
544 /* ARGSUSED */
545 int
546 raidopen(dev_t dev, int flags, int fmt, struct lwp *l)
547 {
548 int unit = raidunit(dev);
549 struct raid_softc *rs;
550 struct disklabel *lp;
551 int part, pmask;
552 int error = 0;
553
554 if (unit >= numraid)
555 return (ENXIO);
556 rs = &raid_softc[unit];
557
558 if ((error = raidlock(rs)) != 0)
559 return (error);
560 lp = rs->sc_dkdev.dk_label;
561
562 part = DISKPART(dev);
563
564 /*
565 * If there are wedges, and this is not RAW_PART, then we
566 * need to fail.
567 */
568 if (rs->sc_dkdev.dk_nwedges != 0 && part != RAW_PART) {
569 error = EBUSY;
570 goto bad;
571 }
572 pmask = (1 << part);
573
574 if ((rs->sc_flags & RAIDF_INITED) &&
575 (rs->sc_dkdev.dk_openmask == 0))
576 raidgetdisklabel(dev);
577
578 /* make sure that this partition exists */
579
580 if (part != RAW_PART) {
581 if (((rs->sc_flags & RAIDF_INITED) == 0) ||
582 ((part >= lp->d_npartitions) ||
583 (lp->d_partitions[part].p_fstype == FS_UNUSED))) {
584 error = ENXIO;
585 goto bad;
586 }
587 }
588 /* Prevent this unit from being unconfigured while open. */
589 switch (fmt) {
590 case S_IFCHR:
591 rs->sc_dkdev.dk_copenmask |= pmask;
592 break;
593
594 case S_IFBLK:
595 rs->sc_dkdev.dk_bopenmask |= pmask;
596 break;
597 }
598
599 if ((rs->sc_dkdev.dk_openmask == 0) &&
600 ((rs->sc_flags & RAIDF_INITED) != 0)) {
601 /* First one... mark things as dirty... Note that we *MUST*
602 have done a configure before this. I DO NOT WANT TO BE
603 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
604 THAT THEY BELONG TOGETHER!!!!! */
605 /* XXX should check to see if we're only open for reading
606 here... If so, we needn't do this, but then need some
607 other way of keeping track of what's happened.. */
608
609 rf_markalldirty( raidPtrs[unit] );
610 }
611
612
613 rs->sc_dkdev.dk_openmask =
614 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
615
616 bad:
617 raidunlock(rs);
618
619 return (error);
620
621
622 }
623 /* ARGSUSED */
624 int
625 raidclose(dev_t dev, int flags, int fmt, struct lwp *l)
626 {
627 int unit = raidunit(dev);
628 struct cfdata *cf;
629 struct raid_softc *rs;
630 int error = 0;
631 int part;
632
633 if (unit >= numraid)
634 return (ENXIO);
635 rs = &raid_softc[unit];
636
637 if ((error = raidlock(rs)) != 0)
638 return (error);
639
640 part = DISKPART(dev);
641
642 /* ...that much closer to allowing unconfiguration... */
643 switch (fmt) {
644 case S_IFCHR:
645 rs->sc_dkdev.dk_copenmask &= ~(1 << part);
646 break;
647
648 case S_IFBLK:
649 rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
650 break;
651 }
652 rs->sc_dkdev.dk_openmask =
653 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
654
655 if ((rs->sc_dkdev.dk_openmask == 0) &&
656 ((rs->sc_flags & RAIDF_INITED) != 0)) {
657 /* Last one... device is not unconfigured yet.
658 Device shutdown has taken care of setting the
659 clean bits if RAIDF_INITED is not set
660 mark things as clean... */
661
662 rf_update_component_labels(raidPtrs[unit],
663 RF_FINAL_COMPONENT_UPDATE);
664 if (doing_shutdown) {
665 /* last one, and we're going down, so
666 lights out for this RAID set too. */
667 error = rf_Shutdown(raidPtrs[unit]);
668
669 /* It's no longer initialized... */
670 rs->sc_flags &= ~RAIDF_INITED;
671
672 /* detach the device */
673
674 cf = device_cfdata(rs->sc_dev);
675 error = config_detach(rs->sc_dev, DETACH_QUIET);
676 free(cf, M_RAIDFRAME);
677
678 /* Detach the disk. */
679 pseudo_disk_detach(&rs->sc_dkdev);
680 }
681 }
682
683 raidunlock(rs);
684 return (0);
685
686 }
687
688 void
689 raidstrategy(struct buf *bp)
690 {
691 int s;
692
693 unsigned int raidID = raidunit(bp->b_dev);
694 RF_Raid_t *raidPtr;
695 struct raid_softc *rs = &raid_softc[raidID];
696 int wlabel;
697
698 if ((rs->sc_flags & RAIDF_INITED) ==0) {
699 bp->b_error = ENXIO;
700 bp->b_flags |= B_ERROR;
701 goto done;
702 }
703 if (raidID >= numraid || !raidPtrs[raidID]) {
704 bp->b_error = ENODEV;
705 bp->b_flags |= B_ERROR;
706 goto done;
707 }
708 raidPtr = raidPtrs[raidID];
709 if (!raidPtr->valid) {
710 bp->b_error = ENODEV;
711 bp->b_flags |= B_ERROR;
712 goto done;
713 }
714 if (bp->b_bcount == 0) {
715 db1_printf(("b_bcount is zero..\n"));
716 goto done;
717 }
718
719 /*
720 * Do bounds checking and adjust transfer. If there's an
721 * error, the bounds check will flag that for us.
722 */
723
724 wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
725 if (DISKPART(bp->b_dev) == RAW_PART) {
726 uint64_t size; /* device size in DEV_BSIZE unit */
727
728 if (raidPtr->logBytesPerSector > DEV_BSHIFT) {
729 size = raidPtr->totalSectors <<
730 (raidPtr->logBytesPerSector - DEV_BSHIFT);
731 } else {
732 size = raidPtr->totalSectors >>
733 (DEV_BSHIFT - raidPtr->logBytesPerSector);
734 }
735 if (bounds_check_with_mediasize(bp, DEV_BSIZE, size) <= 0) {
736 goto done;
737 }
738 } else {
739 if (bounds_check_with_label(&rs->sc_dkdev, bp, wlabel) <= 0) {
740 db1_printf(("Bounds check failed!!:%d %d\n",
741 (int) bp->b_blkno, (int) wlabel));
742 goto done;
743 }
744 }
745 s = splbio();
746
747 bp->b_resid = 0;
748
749 /* stuff it onto our queue */
750 BUFQ_PUT(rs->buf_queue, bp);
751
752 /* scheduled the IO to happen at the next convenient time */
753 wakeup(&(raidPtrs[raidID]->iodone));
754
755 splx(s);
756 return;
757
758 done:
759 bp->b_resid = bp->b_bcount;
760 biodone(bp);
761 }
762 /* ARGSUSED */
763 int
764 raidread(dev_t dev, struct uio *uio, int flags)
765 {
766 int unit = raidunit(dev);
767 struct raid_softc *rs;
768
769 if (unit >= numraid)
770 return (ENXIO);
771 rs = &raid_softc[unit];
772
773 if ((rs->sc_flags & RAIDF_INITED) == 0)
774 return (ENXIO);
775
776 return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
777
778 }
779 /* ARGSUSED */
780 int
781 raidwrite(dev_t dev, struct uio *uio, int flags)
782 {
783 int unit = raidunit(dev);
784 struct raid_softc *rs;
785
786 if (unit >= numraid)
787 return (ENXIO);
788 rs = &raid_softc[unit];
789
790 if ((rs->sc_flags & RAIDF_INITED) == 0)
791 return (ENXIO);
792
793 return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
794
795 }
796
797 int
798 raidioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct lwp *l)
799 {
800 int unit = raidunit(dev);
801 int error = 0;
802 int part, pmask;
803 struct cfdata *cf;
804 struct raid_softc *rs;
805 RF_Config_t *k_cfg, *u_cfg;
806 RF_Raid_t *raidPtr;
807 RF_RaidDisk_t *diskPtr;
808 RF_AccTotals_t *totals;
809 RF_DeviceConfig_t *d_cfg, **ucfgp;
810 u_char *specific_buf;
811 int retcode = 0;
812 int column;
813 int raidid;
814 struct rf_recon_req *rrcopy, *rr;
815 RF_ComponentLabel_t *clabel;
816 RF_ComponentLabel_t *ci_label;
817 RF_ComponentLabel_t **clabel_ptr;
818 RF_SingleComponent_t *sparePtr,*componentPtr;
819 RF_SingleComponent_t component;
820 RF_ProgressInfo_t progressInfo, **progressInfoPtr;
821 int i, j, d;
822 #ifdef __HAVE_OLD_DISKLABEL
823 struct disklabel newlabel;
824 #endif
825 struct dkwedge_info *dkw;
826
827 if (unit >= numraid)
828 return (ENXIO);
829 rs = &raid_softc[unit];
830 raidPtr = raidPtrs[unit];
831
832 db1_printf(("raidioctl: %d %d %d %d\n", (int) dev,
833 (int) DISKPART(dev), (int) unit, (int) cmd));
834
835 /* Must be open for writes for these commands... */
836 switch (cmd) {
837 #ifdef DIOCGSECTORSIZE
838 case DIOCGSECTORSIZE:
839 *(u_int *)data = raidPtr->bytesPerSector;
840 return 0;
841 case DIOCGMEDIASIZE:
842 *(off_t *)data =
843 (off_t)raidPtr->totalSectors * raidPtr->bytesPerSector;
844 return 0;
845 #endif
846 case DIOCSDINFO:
847 case DIOCWDINFO:
848 #ifdef __HAVE_OLD_DISKLABEL
849 case ODIOCWDINFO:
850 case ODIOCSDINFO:
851 #endif
852 case DIOCWLABEL:
853 case DIOCAWEDGE:
854 case DIOCDWEDGE:
855 if ((flag & FWRITE) == 0)
856 return (EBADF);
857 }
858
859 /* Must be initialized for these... */
860 switch (cmd) {
861 case DIOCGDINFO:
862 case DIOCSDINFO:
863 case DIOCWDINFO:
864 #ifdef __HAVE_OLD_DISKLABEL
865 case ODIOCGDINFO:
866 case ODIOCWDINFO:
867 case ODIOCSDINFO:
868 case ODIOCGDEFLABEL:
869 #endif
870 case DIOCGPART:
871 case DIOCWLABEL:
872 case DIOCGDEFLABEL:
873 case DIOCAWEDGE:
874 case DIOCDWEDGE:
875 case DIOCLWEDGES:
876 case RAIDFRAME_SHUTDOWN:
877 case RAIDFRAME_REWRITEPARITY:
878 case RAIDFRAME_GET_INFO:
879 case RAIDFRAME_RESET_ACCTOTALS:
880 case RAIDFRAME_GET_ACCTOTALS:
881 case RAIDFRAME_KEEP_ACCTOTALS:
882 case RAIDFRAME_GET_SIZE:
883 case RAIDFRAME_FAIL_DISK:
884 case RAIDFRAME_COPYBACK:
885 case RAIDFRAME_CHECK_RECON_STATUS:
886 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
887 case RAIDFRAME_GET_COMPONENT_LABEL:
888 case RAIDFRAME_SET_COMPONENT_LABEL:
889 case RAIDFRAME_ADD_HOT_SPARE:
890 case RAIDFRAME_REMOVE_HOT_SPARE:
891 case RAIDFRAME_INIT_LABELS:
892 case RAIDFRAME_REBUILD_IN_PLACE:
893 case RAIDFRAME_CHECK_PARITY:
894 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
895 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
896 case RAIDFRAME_CHECK_COPYBACK_STATUS:
897 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
898 case RAIDFRAME_SET_AUTOCONFIG:
899 case RAIDFRAME_SET_ROOT:
900 case RAIDFRAME_DELETE_COMPONENT:
901 case RAIDFRAME_INCORPORATE_HOT_SPARE:
902 if ((rs->sc_flags & RAIDF_INITED) == 0)
903 return (ENXIO);
904 }
905
906 switch (cmd) {
907
908 /* configure the system */
909 case RAIDFRAME_CONFIGURE:
910
911 if (raidPtr->valid) {
912 /* There is a valid RAID set running on this unit! */
913 printf("raid%d: Device already configured!\n",unit);
914 return(EINVAL);
915 }
916
917 /* copy-in the configuration information */
918 /* data points to a pointer to the configuration structure */
919
920 u_cfg = *((RF_Config_t **) data);
921 RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
922 if (k_cfg == NULL) {
923 return (ENOMEM);
924 }
925 retcode = copyin(u_cfg, k_cfg, sizeof(RF_Config_t));
926 if (retcode) {
927 RF_Free(k_cfg, sizeof(RF_Config_t));
928 db1_printf(("rf_ioctl: retcode=%d copyin.1\n",
929 retcode));
930 return (retcode);
931 }
932 /* allocate a buffer for the layout-specific data, and copy it
933 * in */
934 if (k_cfg->layoutSpecificSize) {
935 if (k_cfg->layoutSpecificSize > 10000) {
936 /* sanity check */
937 RF_Free(k_cfg, sizeof(RF_Config_t));
938 return (EINVAL);
939 }
940 RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
941 (u_char *));
942 if (specific_buf == NULL) {
943 RF_Free(k_cfg, sizeof(RF_Config_t));
944 return (ENOMEM);
945 }
946 retcode = copyin(k_cfg->layoutSpecific, specific_buf,
947 k_cfg->layoutSpecificSize);
948 if (retcode) {
949 RF_Free(k_cfg, sizeof(RF_Config_t));
950 RF_Free(specific_buf,
951 k_cfg->layoutSpecificSize);
952 db1_printf(("rf_ioctl: retcode=%d copyin.2\n",
953 retcode));
954 return (retcode);
955 }
956 } else
957 specific_buf = NULL;
958 k_cfg->layoutSpecific = specific_buf;
959
960 /* should do some kind of sanity check on the configuration.
961 * Store the sum of all the bytes in the last byte? */
962
963 /* configure the system */
964
965 /*
966 * Clear the entire RAID descriptor, just to make sure
967 * there is no stale data left in the case of a
968 * reconfiguration
969 */
970 memset((char *) raidPtr, 0, sizeof(RF_Raid_t));
971 raidPtr->raidid = unit;
972
973 retcode = rf_Configure(raidPtr, k_cfg, NULL);
974
975 if (retcode == 0) {
976
977 /* allow this many simultaneous IO's to
978 this RAID device */
979 raidPtr->openings = RAIDOUTSTANDING;
980
981 raidinit(raidPtr, 0);
982 rf_markalldirty(raidPtr);
983 }
984 /* free the buffers. No return code here. */
985 if (k_cfg->layoutSpecificSize) {
986 RF_Free(specific_buf, k_cfg->layoutSpecificSize);
987 }
988 RF_Free(k_cfg, sizeof(RF_Config_t));
989
990 return (retcode);
991
992 /* shutdown the system */
993 case RAIDFRAME_SHUTDOWN:
994
995 if ((error = raidlock(rs)) != 0)
996 return (error);
997
998 /*
999 * If somebody has a partition mounted, we shouldn't
1000 * shutdown.
1001 */
1002
1003 part = DISKPART(dev);
1004 pmask = (1 << part);
1005 if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
1006 ((rs->sc_dkdev.dk_bopenmask & pmask) &&
1007 (rs->sc_dkdev.dk_copenmask & pmask))) {
1008 raidunlock(rs);
1009 return (EBUSY);
1010 }
1011
1012 retcode = rf_Shutdown(raidPtr);
1013
1014 /* It's no longer initialized... */
1015 rs->sc_flags &= ~RAIDF_INITED;
1016
1017 /* free the pseudo device attach bits */
1018
1019 cf = device_cfdata(rs->sc_dev);
1020 /* XXX this causes us to not return any errors
1021 from the above call to rf_Shutdown() */
1022 retcode = config_detach(rs->sc_dev, DETACH_QUIET);
1023 free(cf, M_RAIDFRAME);
1024
1025 /* Detach the disk. */
1026 pseudo_disk_detach(&rs->sc_dkdev);
1027
1028 raidunlock(rs);
1029
1030 return (retcode);
1031 case RAIDFRAME_GET_COMPONENT_LABEL:
1032 clabel_ptr = (RF_ComponentLabel_t **) data;
1033 /* need to read the component label for the disk indicated
1034 by row,column in clabel */
1035
1036 /* For practice, let's get it directly fromdisk, rather
1037 than from the in-core copy */
1038 RF_Malloc( clabel, sizeof( RF_ComponentLabel_t ),
1039 (RF_ComponentLabel_t *));
1040 if (clabel == NULL)
1041 return (ENOMEM);
1042
1043 retcode = copyin( *clabel_ptr, clabel,
1044 sizeof(RF_ComponentLabel_t));
1045
1046 if (retcode) {
1047 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
1048 return(retcode);
1049 }
1050
1051 clabel->row = 0; /* Don't allow looking at anything else.*/
1052
1053 column = clabel->column;
1054
1055 if ((column < 0) || (column >= raidPtr->numCol +
1056 raidPtr->numSpare)) {
1057 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
1058 return(EINVAL);
1059 }
1060
1061 retcode = raidread_component_label(raidPtr->Disks[column].dev,
1062 raidPtr->raid_cinfo[column].ci_vp,
1063 clabel );
1064
1065 if (retcode == 0) {
1066 retcode = copyout(clabel, *clabel_ptr,
1067 sizeof(RF_ComponentLabel_t));
1068 }
1069 RF_Free(clabel, sizeof(RF_ComponentLabel_t));
1070 return (retcode);
1071
1072 case RAIDFRAME_SET_COMPONENT_LABEL:
1073 clabel = (RF_ComponentLabel_t *) data;
1074
1075 /* XXX check the label for valid stuff... */
1076 /* Note that some things *should not* get modified --
1077 the user should be re-initing the labels instead of
1078 trying to patch things.
1079 */
1080
1081 raidid = raidPtr->raidid;
1082 #if DEBUG
1083 printf("raid%d: Got component label:\n", raidid);
1084 printf("raid%d: Version: %d\n", raidid, clabel->version);
1085 printf("raid%d: Serial Number: %d\n", raidid, clabel->serial_number);
1086 printf("raid%d: Mod counter: %d\n", raidid, clabel->mod_counter);
1087 printf("raid%d: Column: %d\n", raidid, clabel->column);
1088 printf("raid%d: Num Columns: %d\n", raidid, clabel->num_columns);
1089 printf("raid%d: Clean: %d\n", raidid, clabel->clean);
1090 printf("raid%d: Status: %d\n", raidid, clabel->status);
1091 #endif
1092 clabel->row = 0;
1093 column = clabel->column;
1094
1095 if ((column < 0) || (column >= raidPtr->numCol)) {
1096 return(EINVAL);
1097 }
1098
1099 /* XXX this isn't allowed to do anything for now :-) */
1100
1101 /* XXX and before it is, we need to fill in the rest
1102 of the fields!?!?!?! */
1103 #if 0
1104 raidwrite_component_label(
1105 raidPtr->Disks[column].dev,
1106 raidPtr->raid_cinfo[column].ci_vp,
1107 clabel );
1108 #endif
1109 return (0);
1110
1111 case RAIDFRAME_INIT_LABELS:
1112 clabel = (RF_ComponentLabel_t *) data;
1113 /*
1114 we only want the serial number from
1115 the above. We get all the rest of the information
1116 from the config that was used to create this RAID
1117 set.
1118 */
1119
1120 raidPtr->serial_number = clabel->serial_number;
1121
1122 RF_Malloc(ci_label, sizeof(RF_ComponentLabel_t),
1123 (RF_ComponentLabel_t *));
1124 if (ci_label == NULL)
1125 return (ENOMEM);
1126
1127 raid_init_component_label(raidPtr, ci_label);
1128 ci_label->serial_number = clabel->serial_number;
1129 ci_label->row = 0; /* we dont' pretend to support more */
1130
1131 for(column=0;column<raidPtr->numCol;column++) {
1132 diskPtr = &raidPtr->Disks[column];
1133 if (!RF_DEAD_DISK(diskPtr->status)) {
1134 ci_label->partitionSize = diskPtr->partitionSize;
1135 ci_label->column = column;
1136 raidwrite_component_label(
1137 raidPtr->Disks[column].dev,
1138 raidPtr->raid_cinfo[column].ci_vp,
1139 ci_label );
1140 }
1141 }
1142 RF_Free(ci_label, sizeof(RF_ComponentLabel_t));
1143
1144 return (retcode);
1145 case RAIDFRAME_SET_AUTOCONFIG:
1146 d = rf_set_autoconfig(raidPtr, *(int *) data);
1147 printf("raid%d: New autoconfig value is: %d\n",
1148 raidPtr->raidid, d);
1149 *(int *) data = d;
1150 return (retcode);
1151
1152 case RAIDFRAME_SET_ROOT:
1153 d = rf_set_rootpartition(raidPtr, *(int *) data);
1154 printf("raid%d: New rootpartition value is: %d\n",
1155 raidPtr->raidid, d);
1156 *(int *) data = d;
1157 return (retcode);
1158
1159 /* initialize all parity */
1160 case RAIDFRAME_REWRITEPARITY:
1161
1162 if (raidPtr->Layout.map->faultsTolerated == 0) {
1163 /* Parity for RAID 0 is trivially correct */
1164 raidPtr->parity_good = RF_RAID_CLEAN;
1165 return(0);
1166 }
1167
1168 if (raidPtr->parity_rewrite_in_progress == 1) {
1169 /* Re-write is already in progress! */
1170 return(EINVAL);
1171 }
1172
1173 retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
1174 rf_RewriteParityThread,
1175 raidPtr,"raid_parity");
1176 return (retcode);
1177
1178
1179 case RAIDFRAME_ADD_HOT_SPARE:
1180 sparePtr = (RF_SingleComponent_t *) data;
1181 memcpy( &component, sparePtr, sizeof(RF_SingleComponent_t));
1182 retcode = rf_add_hot_spare(raidPtr, &component);
1183 return(retcode);
1184
1185 case RAIDFRAME_REMOVE_HOT_SPARE:
1186 return(retcode);
1187
1188 case RAIDFRAME_DELETE_COMPONENT:
1189 componentPtr = (RF_SingleComponent_t *)data;
1190 memcpy( &component, componentPtr,
1191 sizeof(RF_SingleComponent_t));
1192 retcode = rf_delete_component(raidPtr, &component);
1193 return(retcode);
1194
1195 case RAIDFRAME_INCORPORATE_HOT_SPARE:
1196 componentPtr = (RF_SingleComponent_t *)data;
1197 memcpy( &component, componentPtr,
1198 sizeof(RF_SingleComponent_t));
1199 retcode = rf_incorporate_hot_spare(raidPtr, &component);
1200 return(retcode);
1201
1202 case RAIDFRAME_REBUILD_IN_PLACE:
1203
1204 if (raidPtr->Layout.map->faultsTolerated == 0) {
1205 /* Can't do this on a RAID 0!! */
1206 return(EINVAL);
1207 }
1208
1209 if (raidPtr->recon_in_progress == 1) {
1210 /* a reconstruct is already in progress! */
1211 return(EINVAL);
1212 }
1213
1214 componentPtr = (RF_SingleComponent_t *) data;
1215 memcpy( &component, componentPtr,
1216 sizeof(RF_SingleComponent_t));
1217 component.row = 0; /* we don't support any more */
1218 column = component.column;
1219
1220 if ((column < 0) || (column >= raidPtr->numCol)) {
1221 return(EINVAL);
1222 }
1223
1224 RF_LOCK_MUTEX(raidPtr->mutex);
1225 if ((raidPtr->Disks[column].status == rf_ds_optimal) &&
1226 (raidPtr->numFailures > 0)) {
1227 /* XXX 0 above shouldn't be constant!!! */
1228 /* some component other than this has failed.
1229 Let's not make things worse than they already
1230 are... */
1231 printf("raid%d: Unable to reconstruct to disk at:\n",
1232 raidPtr->raidid);
1233 printf("raid%d: Col: %d Too many failures.\n",
1234 raidPtr->raidid, column);
1235 RF_UNLOCK_MUTEX(raidPtr->mutex);
1236 return (EINVAL);
1237 }
1238 if (raidPtr->Disks[column].status ==
1239 rf_ds_reconstructing) {
1240 printf("raid%d: Unable to reconstruct to disk at:\n",
1241 raidPtr->raidid);
1242 printf("raid%d: Col: %d Reconstruction already occuring!\n", raidPtr->raidid, column);
1243
1244 RF_UNLOCK_MUTEX(raidPtr->mutex);
1245 return (EINVAL);
1246 }
1247 if (raidPtr->Disks[column].status == rf_ds_spared) {
1248 RF_UNLOCK_MUTEX(raidPtr->mutex);
1249 return (EINVAL);
1250 }
1251 RF_UNLOCK_MUTEX(raidPtr->mutex);
1252
1253 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1254 if (rrcopy == NULL)
1255 return(ENOMEM);
1256
1257 rrcopy->raidPtr = (void *) raidPtr;
1258 rrcopy->col = column;
1259
1260 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1261 rf_ReconstructInPlaceThread,
1262 rrcopy,"raid_reconip");
1263 return(retcode);
1264
1265 case RAIDFRAME_GET_INFO:
1266 if (!raidPtr->valid)
1267 return (ENODEV);
1268 ucfgp = (RF_DeviceConfig_t **) data;
1269 RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t),
1270 (RF_DeviceConfig_t *));
1271 if (d_cfg == NULL)
1272 return (ENOMEM);
1273 d_cfg->rows = 1; /* there is only 1 row now */
1274 d_cfg->cols = raidPtr->numCol;
1275 d_cfg->ndevs = raidPtr->numCol;
1276 if (d_cfg->ndevs >= RF_MAX_DISKS) {
1277 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1278 return (ENOMEM);
1279 }
1280 d_cfg->nspares = raidPtr->numSpare;
1281 if (d_cfg->nspares >= RF_MAX_DISKS) {
1282 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1283 return (ENOMEM);
1284 }
1285 d_cfg->maxqdepth = raidPtr->maxQueueDepth;
1286 d = 0;
1287 for (j = 0; j < d_cfg->cols; j++) {
1288 d_cfg->devs[d] = raidPtr->Disks[j];
1289 d++;
1290 }
1291 for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) {
1292 d_cfg->spares[i] = raidPtr->Disks[j];
1293 }
1294 retcode = copyout(d_cfg, *ucfgp, sizeof(RF_DeviceConfig_t));
1295 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1296
1297 return (retcode);
1298
1299 case RAIDFRAME_CHECK_PARITY:
1300 *(int *) data = raidPtr->parity_good;
1301 return (0);
1302
1303 case RAIDFRAME_RESET_ACCTOTALS:
1304 memset(&raidPtr->acc_totals, 0, sizeof(raidPtr->acc_totals));
1305 return (0);
1306
1307 case RAIDFRAME_GET_ACCTOTALS:
1308 totals = (RF_AccTotals_t *) data;
1309 *totals = raidPtr->acc_totals;
1310 return (0);
1311
1312 case RAIDFRAME_KEEP_ACCTOTALS:
1313 raidPtr->keep_acc_totals = *(int *)data;
1314 return (0);
1315
1316 case RAIDFRAME_GET_SIZE:
1317 *(int *) data = raidPtr->totalSectors;
1318 return (0);
1319
1320 /* fail a disk & optionally start reconstruction */
1321 case RAIDFRAME_FAIL_DISK:
1322
1323 if (raidPtr->Layout.map->faultsTolerated == 0) {
1324 /* Can't do this on a RAID 0!! */
1325 return(EINVAL);
1326 }
1327
1328 rr = (struct rf_recon_req *) data;
1329 rr->row = 0;
1330 if (rr->col < 0 || rr->col >= raidPtr->numCol)
1331 return (EINVAL);
1332
1333
1334 RF_LOCK_MUTEX(raidPtr->mutex);
1335 if (raidPtr->status == rf_rs_reconstructing) {
1336 /* you can't fail a disk while we're reconstructing! */
1337 /* XXX wrong for RAID6 */
1338 RF_UNLOCK_MUTEX(raidPtr->mutex);
1339 return (EINVAL);
1340 }
1341 if ((raidPtr->Disks[rr->col].status ==
1342 rf_ds_optimal) && (raidPtr->numFailures > 0)) {
1343 /* some other component has failed. Let's not make
1344 things worse. XXX wrong for RAID6 */
1345 RF_UNLOCK_MUTEX(raidPtr->mutex);
1346 return (EINVAL);
1347 }
1348 if (raidPtr->Disks[rr->col].status == rf_ds_spared) {
1349 /* Can't fail a spared disk! */
1350 RF_UNLOCK_MUTEX(raidPtr->mutex);
1351 return (EINVAL);
1352 }
1353 RF_UNLOCK_MUTEX(raidPtr->mutex);
1354
1355 /* make a copy of the recon request so that we don't rely on
1356 * the user's buffer */
1357 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1358 if (rrcopy == NULL)
1359 return(ENOMEM);
1360 memcpy(rrcopy, rr, sizeof(*rr));
1361 rrcopy->raidPtr = (void *) raidPtr;
1362
1363 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1364 rf_ReconThread,
1365 rrcopy,"raid_recon");
1366 return (0);
1367
1368 /* invoke a copyback operation after recon on whatever disk
1369 * needs it, if any */
1370 case RAIDFRAME_COPYBACK:
1371
1372 if (raidPtr->Layout.map->faultsTolerated == 0) {
1373 /* This makes no sense on a RAID 0!! */
1374 return(EINVAL);
1375 }
1376
1377 if (raidPtr->copyback_in_progress == 1) {
1378 /* Copyback is already in progress! */
1379 return(EINVAL);
1380 }
1381
1382 retcode = RF_CREATE_THREAD(raidPtr->copyback_thread,
1383 rf_CopybackThread,
1384 raidPtr,"raid_copyback");
1385 return (retcode);
1386
1387 /* return the percentage completion of reconstruction */
1388 case RAIDFRAME_CHECK_RECON_STATUS:
1389 if (raidPtr->Layout.map->faultsTolerated == 0) {
1390 /* This makes no sense on a RAID 0, so tell the
1391 user it's done. */
1392 *(int *) data = 100;
1393 return(0);
1394 }
1395 if (raidPtr->status != rf_rs_reconstructing)
1396 *(int *) data = 100;
1397 else {
1398 if (raidPtr->reconControl->numRUsTotal > 0) {
1399 *(int *) data = (raidPtr->reconControl->numRUsComplete * 100 / raidPtr->reconControl->numRUsTotal);
1400 } else {
1401 *(int *) data = 0;
1402 }
1403 }
1404 return (0);
1405 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
1406 progressInfoPtr = (RF_ProgressInfo_t **) data;
1407 if (raidPtr->status != rf_rs_reconstructing) {
1408 progressInfo.remaining = 0;
1409 progressInfo.completed = 100;
1410 progressInfo.total = 100;
1411 } else {
1412 progressInfo.total =
1413 raidPtr->reconControl->numRUsTotal;
1414 progressInfo.completed =
1415 raidPtr->reconControl->numRUsComplete;
1416 progressInfo.remaining = progressInfo.total -
1417 progressInfo.completed;
1418 }
1419 retcode = copyout(&progressInfo, *progressInfoPtr,
1420 sizeof(RF_ProgressInfo_t));
1421 return (retcode);
1422
1423 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1424 if (raidPtr->Layout.map->faultsTolerated == 0) {
1425 /* This makes no sense on a RAID 0, so tell the
1426 user it's done. */
1427 *(int *) data = 100;
1428 return(0);
1429 }
1430 if (raidPtr->parity_rewrite_in_progress == 1) {
1431 *(int *) data = 100 *
1432 raidPtr->parity_rewrite_stripes_done /
1433 raidPtr->Layout.numStripe;
1434 } else {
1435 *(int *) data = 100;
1436 }
1437 return (0);
1438
1439 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
1440 progressInfoPtr = (RF_ProgressInfo_t **) data;
1441 if (raidPtr->parity_rewrite_in_progress == 1) {
1442 progressInfo.total = raidPtr->Layout.numStripe;
1443 progressInfo.completed =
1444 raidPtr->parity_rewrite_stripes_done;
1445 progressInfo.remaining = progressInfo.total -
1446 progressInfo.completed;
1447 } else {
1448 progressInfo.remaining = 0;
1449 progressInfo.completed = 100;
1450 progressInfo.total = 100;
1451 }
1452 retcode = copyout(&progressInfo, *progressInfoPtr,
1453 sizeof(RF_ProgressInfo_t));
1454 return (retcode);
1455
1456 case RAIDFRAME_CHECK_COPYBACK_STATUS:
1457 if (raidPtr->Layout.map->faultsTolerated == 0) {
1458 /* This makes no sense on a RAID 0 */
1459 *(int *) data = 100;
1460 return(0);
1461 }
1462 if (raidPtr->copyback_in_progress == 1) {
1463 *(int *) data = 100 * raidPtr->copyback_stripes_done /
1464 raidPtr->Layout.numStripe;
1465 } else {
1466 *(int *) data = 100;
1467 }
1468 return (0);
1469
1470 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
1471 progressInfoPtr = (RF_ProgressInfo_t **) data;
1472 if (raidPtr->copyback_in_progress == 1) {
1473 progressInfo.total = raidPtr->Layout.numStripe;
1474 progressInfo.completed =
1475 raidPtr->copyback_stripes_done;
1476 progressInfo.remaining = progressInfo.total -
1477 progressInfo.completed;
1478 } else {
1479 progressInfo.remaining = 0;
1480 progressInfo.completed = 100;
1481 progressInfo.total = 100;
1482 }
1483 retcode = copyout(&progressInfo, *progressInfoPtr,
1484 sizeof(RF_ProgressInfo_t));
1485 return (retcode);
1486
1487 /* the sparetable daemon calls this to wait for the kernel to
1488 * need a spare table. this ioctl does not return until a
1489 * spare table is needed. XXX -- calling mpsleep here in the
1490 * ioctl code is almost certainly wrong and evil. -- XXX XXX
1491 * -- I should either compute the spare table in the kernel,
1492 * or have a different -- XXX XXX -- interface (a different
1493 * character device) for delivering the table -- XXX */
1494 #if 0
1495 case RAIDFRAME_SPARET_WAIT:
1496 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1497 while (!rf_sparet_wait_queue)
1498 mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE);
1499 waitreq = rf_sparet_wait_queue;
1500 rf_sparet_wait_queue = rf_sparet_wait_queue->next;
1501 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1502
1503 /* structure assignment */
1504 *((RF_SparetWait_t *) data) = *waitreq;
1505
1506 RF_Free(waitreq, sizeof(*waitreq));
1507 return (0);
1508
1509 /* wakes up a process waiting on SPARET_WAIT and puts an error
1510 * code in it that will cause the dameon to exit */
1511 case RAIDFRAME_ABORT_SPARET_WAIT:
1512 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1513 waitreq->fcol = -1;
1514 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1515 waitreq->next = rf_sparet_wait_queue;
1516 rf_sparet_wait_queue = waitreq;
1517 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1518 wakeup(&rf_sparet_wait_queue);
1519 return (0);
1520
1521 /* used by the spare table daemon to deliver a spare table
1522 * into the kernel */
1523 case RAIDFRAME_SEND_SPARET:
1524
1525 /* install the spare table */
1526 retcode = rf_SetSpareTable(raidPtr, *(void **) data);
1527
1528 /* respond to the requestor. the return status of the spare
1529 * table installation is passed in the "fcol" field */
1530 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1531 waitreq->fcol = retcode;
1532 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1533 waitreq->next = rf_sparet_resp_queue;
1534 rf_sparet_resp_queue = waitreq;
1535 wakeup(&rf_sparet_resp_queue);
1536 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1537
1538 return (retcode);
1539 #endif
1540
1541 default:
1542 break; /* fall through to the os-specific code below */
1543
1544 }
1545
1546 if (!raidPtr->valid)
1547 return (EINVAL);
1548
1549 /*
1550 * Add support for "regular" device ioctls here.
1551 */
1552
1553 switch (cmd) {
1554 case DIOCGDINFO:
1555 *(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
1556 break;
1557 #ifdef __HAVE_OLD_DISKLABEL
1558 case ODIOCGDINFO:
1559 newlabel = *(rs->sc_dkdev.dk_label);
1560 if (newlabel.d_npartitions > OLDMAXPARTITIONS)
1561 return ENOTTY;
1562 memcpy(data, &newlabel, sizeof (struct olddisklabel));
1563 break;
1564 #endif
1565
1566 case DIOCGPART:
1567 ((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
1568 ((struct partinfo *) data)->part =
1569 &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
1570 break;
1571
1572 case DIOCWDINFO:
1573 case DIOCSDINFO:
1574 #ifdef __HAVE_OLD_DISKLABEL
1575 case ODIOCWDINFO:
1576 case ODIOCSDINFO:
1577 #endif
1578 {
1579 struct disklabel *lp;
1580 #ifdef __HAVE_OLD_DISKLABEL
1581 if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) {
1582 memset(&newlabel, 0, sizeof newlabel);
1583 memcpy(&newlabel, data, sizeof (struct olddisklabel));
1584 lp = &newlabel;
1585 } else
1586 #endif
1587 lp = (struct disklabel *)data;
1588
1589 if ((error = raidlock(rs)) != 0)
1590 return (error);
1591
1592 rs->sc_flags |= RAIDF_LABELLING;
1593
1594 error = setdisklabel(rs->sc_dkdev.dk_label,
1595 lp, 0, rs->sc_dkdev.dk_cpulabel);
1596 if (error == 0) {
1597 if (cmd == DIOCWDINFO
1598 #ifdef __HAVE_OLD_DISKLABEL
1599 || cmd == ODIOCWDINFO
1600 #endif
1601 )
1602 error = writedisklabel(RAIDLABELDEV(dev),
1603 raidstrategy, rs->sc_dkdev.dk_label,
1604 rs->sc_dkdev.dk_cpulabel);
1605 }
1606 rs->sc_flags &= ~RAIDF_LABELLING;
1607
1608 raidunlock(rs);
1609
1610 if (error)
1611 return (error);
1612 break;
1613 }
1614
1615 case DIOCWLABEL:
1616 if (*(int *) data != 0)
1617 rs->sc_flags |= RAIDF_WLABEL;
1618 else
1619 rs->sc_flags &= ~RAIDF_WLABEL;
1620 break;
1621
1622 case DIOCGDEFLABEL:
1623 raidgetdefaultlabel(raidPtr, rs, (struct disklabel *) data);
1624 break;
1625
1626 #ifdef __HAVE_OLD_DISKLABEL
1627 case ODIOCGDEFLABEL:
1628 raidgetdefaultlabel(raidPtr, rs, &newlabel);
1629 if (newlabel.d_npartitions > OLDMAXPARTITIONS)
1630 return ENOTTY;
1631 memcpy(data, &newlabel, sizeof (struct olddisklabel));
1632 break;
1633 #endif
1634
1635 case DIOCAWEDGE:
1636 case DIOCDWEDGE:
1637 dkw = (void *)data;
1638
1639 /* If the ioctl happens here, the parent is us. */
1640 (void)strcpy(dkw->dkw_parent, rs->sc_xname);
1641 return cmd == DIOCAWEDGE ? dkwedge_add(dkw) : dkwedge_del(dkw);
1642
1643 case DIOCLWEDGES:
1644 return dkwedge_list(&rs->sc_dkdev,
1645 (struct dkwedge_list *)data, l);
1646
1647 default:
1648 retcode = ENOTTY;
1649 }
1650 return (retcode);
1651
1652 }
1653
1654
1655 /* raidinit -- complete the rest of the initialization for the
1656 RAIDframe device. */
1657
1658
1659 static void
1660 raidinit(RF_Raid_t *raidPtr, int autoconf)
1661 {
1662 struct cfdata *cf;
1663 struct raid_softc *rs;
1664 int unit;
1665
1666 unit = raidPtr->raidid;
1667
1668 rs = &raid_softc[unit];
1669
1670 /* XXX should check return code first... */
1671 rs->sc_flags |= RAIDF_INITED;
1672
1673 /* XXX doesn't check bounds. */
1674 snprintf(rs->sc_xname, sizeof(rs->sc_xname), "raid%d", unit);
1675
1676 rs->sc_dkdev.dk_name = rs->sc_xname;
1677
1678 /* attach the pseudo device */
1679 cf = malloc(sizeof(*cf), M_RAIDFRAME, M_WAITOK);
1680 cf->cf_name = raid_cd.cd_name;
1681 cf->cf_atname = raid_cd.cd_name;
1682 cf->cf_unit = unit;
1683 cf->cf_fstate = FSTATE_STAR;
1684
1685 rs->sc_dev = config_attach_pseudo(cf);
1686
1687 if (rs->sc_dev==NULL) {
1688 printf("raid%d: config_attach_pseudo failed\n",
1689 raidPtr->raidid);
1690 }
1691
1692 /* disk_attach actually creates space for the CPU disklabel, among
1693 * other things, so it's critical to call this *BEFORE* we try putzing
1694 * with disklabels. */
1695 if (autoconf)
1696 pseudo_disk_init(&rs->sc_dkdev);
1697
1698 pseudo_disk_attach(&rs->sc_dkdev);
1699
1700 /* XXX There may be a weird interaction here between this, and
1701 * protectedSectors, as used in RAIDframe. */
1702
1703 rs->sc_size = raidPtr->totalSectors;
1704 }
1705 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
1706 /* wake up the daemon & tell it to get us a spare table
1707 * XXX
1708 * the entries in the queues should be tagged with the raidPtr
1709 * so that in the extremely rare case that two recons happen at once,
1710 * we know for which device were requesting a spare table
1711 * XXX
1712 *
1713 * XXX This code is not currently used. GO
1714 */
1715 int
1716 rf_GetSpareTableFromDaemon(RF_SparetWait_t *req)
1717 {
1718 int retcode;
1719
1720 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1721 req->next = rf_sparet_wait_queue;
1722 rf_sparet_wait_queue = req;
1723 wakeup(&rf_sparet_wait_queue);
1724
1725 /* mpsleep unlocks the mutex */
1726 while (!rf_sparet_resp_queue) {
1727 tsleep(&rf_sparet_resp_queue, PRIBIO,
1728 "raidframe getsparetable", 0);
1729 }
1730 req = rf_sparet_resp_queue;
1731 rf_sparet_resp_queue = req->next;
1732 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1733
1734 retcode = req->fcol;
1735 RF_Free(req, sizeof(*req)); /* this is not the same req as we
1736 * alloc'd */
1737 return (retcode);
1738 }
1739 #endif
1740
1741 /* a wrapper around rf_DoAccess that extracts appropriate info from the
1742 * bp & passes it down.
1743 * any calls originating in the kernel must use non-blocking I/O
1744 * do some extra sanity checking to return "appropriate" error values for
1745 * certain conditions (to make some standard utilities work)
1746 *
1747 * Formerly known as: rf_DoAccessKernel
1748 */
1749 void
1750 raidstart(RF_Raid_t *raidPtr)
1751 {
1752 RF_SectorCount_t num_blocks, pb, sum;
1753 RF_RaidAddr_t raid_addr;
1754 struct partition *pp;
1755 daddr_t blocknum;
1756 int unit;
1757 struct raid_softc *rs;
1758 int do_async;
1759 struct buf *bp;
1760 int rc;
1761
1762 unit = raidPtr->raidid;
1763 rs = &raid_softc[unit];
1764
1765 /* quick check to see if anything has died recently */
1766 RF_LOCK_MUTEX(raidPtr->mutex);
1767 if (raidPtr->numNewFailures > 0) {
1768 RF_UNLOCK_MUTEX(raidPtr->mutex);
1769 rf_update_component_labels(raidPtr,
1770 RF_NORMAL_COMPONENT_UPDATE);
1771 RF_LOCK_MUTEX(raidPtr->mutex);
1772 raidPtr->numNewFailures--;
1773 }
1774
1775 /* Check to see if we're at the limit... */
1776 while (raidPtr->openings > 0) {
1777 RF_UNLOCK_MUTEX(raidPtr->mutex);
1778
1779 /* get the next item, if any, from the queue */
1780 if ((bp = BUFQ_GET(rs->buf_queue)) == NULL) {
1781 /* nothing more to do */
1782 return;
1783 }
1784
1785 /* Ok, for the bp we have here, bp->b_blkno is relative to the
1786 * partition.. Need to make it absolute to the underlying
1787 * device.. */
1788
1789 blocknum = bp->b_blkno;
1790 if (DISKPART(bp->b_dev) != RAW_PART) {
1791 pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
1792 blocknum += pp->p_offset;
1793 }
1794
1795 db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
1796 (int) blocknum));
1797
1798 db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
1799 db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
1800
1801 /* *THIS* is where we adjust what block we're going to...
1802 * but DO NOT TOUCH bp->b_blkno!!! */
1803 raid_addr = blocknum;
1804
1805 num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
1806 pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
1807 sum = raid_addr + num_blocks + pb;
1808 if (1 || rf_debugKernelAccess) {
1809 db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
1810 (int) raid_addr, (int) sum, (int) num_blocks,
1811 (int) pb, (int) bp->b_resid));
1812 }
1813 if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
1814 || (sum < num_blocks) || (sum < pb)) {
1815 bp->b_error = ENOSPC;
1816 bp->b_flags |= B_ERROR;
1817 bp->b_resid = bp->b_bcount;
1818 biodone(bp);
1819 RF_LOCK_MUTEX(raidPtr->mutex);
1820 continue;
1821 }
1822 /*
1823 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
1824 */
1825
1826 if (bp->b_bcount & raidPtr->sectorMask) {
1827 bp->b_error = EINVAL;
1828 bp->b_flags |= B_ERROR;
1829 bp->b_resid = bp->b_bcount;
1830 biodone(bp);
1831 RF_LOCK_MUTEX(raidPtr->mutex);
1832 continue;
1833
1834 }
1835 db1_printf(("Calling DoAccess..\n"));
1836
1837
1838 RF_LOCK_MUTEX(raidPtr->mutex);
1839 raidPtr->openings--;
1840 RF_UNLOCK_MUTEX(raidPtr->mutex);
1841
1842 /*
1843 * Everything is async.
1844 */
1845 do_async = 1;
1846
1847 disk_busy(&rs->sc_dkdev);
1848
1849 /* XXX we're still at splbio() here... do we *really*
1850 need to be? */
1851
1852 /* don't ever condition on bp->b_flags & B_WRITE.
1853 * always condition on B_READ instead */
1854
1855 rc = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
1856 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
1857 do_async, raid_addr, num_blocks,
1858 bp->b_data, bp, RF_DAG_NONBLOCKING_IO);
1859
1860 if (rc) {
1861 bp->b_error = rc;
1862 bp->b_flags |= B_ERROR;
1863 bp->b_resid = bp->b_bcount;
1864 biodone(bp);
1865 /* continue loop */
1866 }
1867
1868 RF_LOCK_MUTEX(raidPtr->mutex);
1869 }
1870 RF_UNLOCK_MUTEX(raidPtr->mutex);
1871 }
1872
1873
1874
1875
1876 /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */
1877
1878 int
1879 rf_DispatchKernelIO(RF_DiskQueue_t *queue, RF_DiskQueueData_t *req)
1880 {
1881 int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
1882 struct buf *bp;
1883
1884 req->queue = queue;
1885
1886 #if DIAGNOSTIC
1887 if (queue->raidPtr->raidid >= numraid) {
1888 printf("Invalid unit number: %d %d\n", queue->raidPtr->raidid,
1889 numraid);
1890 panic("Invalid Unit number in rf_DispatchKernelIO");
1891 }
1892 #endif
1893
1894 bp = req->bp;
1895
1896 switch (req->type) {
1897 case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */
1898 /* XXX need to do something extra here.. */
1899 /* I'm leaving this in, as I've never actually seen it used,
1900 * and I'd like folks to report it... GO */
1901 printf(("WAKEUP CALLED\n"));
1902 queue->numOutstanding++;
1903
1904 bp->b_flags = 0;
1905 bp->b_private = req;
1906
1907 KernelWakeupFunc(bp);
1908 break;
1909
1910 case RF_IO_TYPE_READ:
1911 case RF_IO_TYPE_WRITE:
1912 #if RF_ACC_TRACE > 0
1913 if (req->tracerec) {
1914 RF_ETIMER_START(req->tracerec->timer);
1915 }
1916 #endif
1917 InitBP(bp, queue->rf_cinfo->ci_vp,
1918 op, queue->rf_cinfo->ci_dev,
1919 req->sectorOffset, req->numSector,
1920 req->buf, KernelWakeupFunc, (void *) req,
1921 queue->raidPtr->logBytesPerSector, req->b_proc);
1922
1923 if (rf_debugKernelAccess) {
1924 db1_printf(("dispatch: bp->b_blkno = %ld\n",
1925 (long) bp->b_blkno));
1926 }
1927 queue->numOutstanding++;
1928 queue->last_deq_sector = req->sectorOffset;
1929 /* acc wouldn't have been let in if there were any pending
1930 * reqs at any other priority */
1931 queue->curPriority = req->priority;
1932
1933 db1_printf(("Going for %c to unit %d col %d\n",
1934 req->type, queue->raidPtr->raidid,
1935 queue->col));
1936 db1_printf(("sector %d count %d (%d bytes) %d\n",
1937 (int) req->sectorOffset, (int) req->numSector,
1938 (int) (req->numSector <<
1939 queue->raidPtr->logBytesPerSector),
1940 (int) queue->raidPtr->logBytesPerSector));
1941 VOP_STRATEGY(bp->b_vp, bp);
1942
1943 break;
1944
1945 default:
1946 panic("bad req->type in rf_DispatchKernelIO");
1947 }
1948 db1_printf(("Exiting from DispatchKernelIO\n"));
1949
1950 return (0);
1951 }
1952 /* this is the callback function associated with a I/O invoked from
1953 kernel code.
1954 */
1955 static void
1956 KernelWakeupFunc(struct buf *bp)
1957 {
1958 RF_DiskQueueData_t *req = NULL;
1959 RF_DiskQueue_t *queue;
1960 int s;
1961
1962 s = splbio();
1963 db1_printf(("recovering the request queue:\n"));
1964 req = bp->b_private;
1965
1966 queue = (RF_DiskQueue_t *) req->queue;
1967
1968 #if RF_ACC_TRACE > 0
1969 if (req->tracerec) {
1970 RF_ETIMER_STOP(req->tracerec->timer);
1971 RF_ETIMER_EVAL(req->tracerec->timer);
1972 RF_LOCK_MUTEX(rf_tracing_mutex);
1973 req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1974 req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1975 req->tracerec->num_phys_ios++;
1976 RF_UNLOCK_MUTEX(rf_tracing_mutex);
1977 }
1978 #endif
1979
1980 /* XXX Ok, let's get aggressive... If B_ERROR is set, let's go
1981 * ballistic, and mark the component as hosed... */
1982
1983 if (bp->b_flags & B_ERROR) {
1984 /* Mark the disk as dead */
1985 /* but only mark it once... */
1986 /* and only if it wouldn't leave this RAID set
1987 completely broken */
1988 if (((queue->raidPtr->Disks[queue->col].status ==
1989 rf_ds_optimal) ||
1990 (queue->raidPtr->Disks[queue->col].status ==
1991 rf_ds_used_spare)) &&
1992 (queue->raidPtr->numFailures <
1993 queue->raidPtr->Layout.map->faultsTolerated)) {
1994 printf("raid%d: IO Error. Marking %s as failed.\n",
1995 queue->raidPtr->raidid,
1996 queue->raidPtr->Disks[queue->col].devname);
1997 queue->raidPtr->Disks[queue->col].status =
1998 rf_ds_failed;
1999 queue->raidPtr->status = rf_rs_degraded;
2000 queue->raidPtr->numFailures++;
2001 queue->raidPtr->numNewFailures++;
2002 } else { /* Disk is already dead... */
2003 /* printf("Disk already marked as dead!\n"); */
2004 }
2005
2006 }
2007
2008 /* Fill in the error value */
2009
2010 req->error = (bp->b_flags & B_ERROR) ? bp->b_error : 0;
2011
2012 simple_lock(&queue->raidPtr->iodone_lock);
2013
2014 /* Drop this one on the "finished" queue... */
2015 TAILQ_INSERT_TAIL(&(queue->raidPtr->iodone), req, iodone_entries);
2016
2017 /* Let the raidio thread know there is work to be done. */
2018 wakeup(&(queue->raidPtr->iodone));
2019
2020 simple_unlock(&queue->raidPtr->iodone_lock);
2021
2022 splx(s);
2023 }
2024
2025
2026
2027 /*
2028 * initialize a buf structure for doing an I/O in the kernel.
2029 */
2030 static void
2031 InitBP(struct buf *bp, struct vnode *b_vp, unsigned rw_flag, dev_t dev,
2032 RF_SectorNum_t startSect, RF_SectorCount_t numSect, caddr_t bf,
2033 void (*cbFunc) (struct buf *), void *cbArg, int logBytesPerSector,
2034 struct proc *b_proc)
2035 {
2036 /* bp->b_flags = B_PHYS | rw_flag; */
2037 bp->b_flags = B_CALL | rw_flag; /* XXX need B_PHYS here too??? */
2038 bp->b_bcount = numSect << logBytesPerSector;
2039 bp->b_bufsize = bp->b_bcount;
2040 bp->b_error = 0;
2041 bp->b_dev = dev;
2042 bp->b_data = bf;
2043 bp->b_blkno = startSect;
2044 bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */
2045 if (bp->b_bcount == 0) {
2046 panic("bp->b_bcount is zero in InitBP!!");
2047 }
2048 bp->b_proc = b_proc;
2049 bp->b_iodone = cbFunc;
2050 bp->b_private = cbArg;
2051 bp->b_vp = b_vp;
2052 if ((bp->b_flags & B_READ) == 0) {
2053 bp->b_vp->v_numoutput++;
2054 }
2055
2056 }
2057
2058 static void
2059 raidgetdefaultlabel(RF_Raid_t *raidPtr, struct raid_softc *rs,
2060 struct disklabel *lp)
2061 {
2062 memset(lp, 0, sizeof(*lp));
2063
2064 /* fabricate a label... */
2065 lp->d_secperunit = raidPtr->totalSectors;
2066 lp->d_secsize = raidPtr->bytesPerSector;
2067 lp->d_nsectors = raidPtr->Layout.dataSectorsPerStripe;
2068 lp->d_ntracks = 4 * raidPtr->numCol;
2069 lp->d_ncylinders = raidPtr->totalSectors /
2070 (lp->d_nsectors * lp->d_ntracks);
2071 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
2072
2073 strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
2074 lp->d_type = DTYPE_RAID;
2075 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
2076 lp->d_rpm = 3600;
2077 lp->d_interleave = 1;
2078 lp->d_flags = 0;
2079
2080 lp->d_partitions[RAW_PART].p_offset = 0;
2081 lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors;
2082 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
2083 lp->d_npartitions = RAW_PART + 1;
2084
2085 lp->d_magic = DISKMAGIC;
2086 lp->d_magic2 = DISKMAGIC;
2087 lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
2088
2089 }
2090 /*
2091 * Read the disklabel from the raid device. If one is not present, fake one
2092 * up.
2093 */
2094 static void
2095 raidgetdisklabel(dev_t dev)
2096 {
2097 int unit = raidunit(dev);
2098 struct raid_softc *rs = &raid_softc[unit];
2099 const char *errstring;
2100 struct disklabel *lp = rs->sc_dkdev.dk_label;
2101 struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel;
2102 RF_Raid_t *raidPtr;
2103
2104 db1_printf(("Getting the disklabel...\n"));
2105
2106 memset(clp, 0, sizeof(*clp));
2107
2108 raidPtr = raidPtrs[unit];
2109
2110 raidgetdefaultlabel(raidPtr, rs, lp);
2111
2112 /*
2113 * Call the generic disklabel extraction routine.
2114 */
2115 errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
2116 rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
2117 if (errstring)
2118 raidmakedisklabel(rs);
2119 else {
2120 int i;
2121 struct partition *pp;
2122
2123 /*
2124 * Sanity check whether the found disklabel is valid.
2125 *
2126 * This is necessary since total size of the raid device
2127 * may vary when an interleave is changed even though exactly
2128 * same components are used, and old disklabel may used
2129 * if that is found.
2130 */
2131 if (lp->d_secperunit != rs->sc_size)
2132 printf("raid%d: WARNING: %s: "
2133 "total sector size in disklabel (%d) != "
2134 "the size of raid (%ld)\n", unit, rs->sc_xname,
2135 lp->d_secperunit, (long) rs->sc_size);
2136 for (i = 0; i < lp->d_npartitions; i++) {
2137 pp = &lp->d_partitions[i];
2138 if (pp->p_offset + pp->p_size > rs->sc_size)
2139 printf("raid%d: WARNING: %s: end of partition `%c' "
2140 "exceeds the size of raid (%ld)\n",
2141 unit, rs->sc_xname, 'a' + i, (long) rs->sc_size);
2142 }
2143 }
2144
2145 }
2146 /*
2147 * Take care of things one might want to take care of in the event
2148 * that a disklabel isn't present.
2149 */
2150 static void
2151 raidmakedisklabel(struct raid_softc *rs)
2152 {
2153 struct disklabel *lp = rs->sc_dkdev.dk_label;
2154 db1_printf(("Making a label..\n"));
2155
2156 /*
2157 * For historical reasons, if there's no disklabel present
2158 * the raw partition must be marked FS_BSDFFS.
2159 */
2160
2161 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
2162
2163 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
2164
2165 lp->d_checksum = dkcksum(lp);
2166 }
2167 /*
2168 * Wait interruptibly for an exclusive lock.
2169 *
2170 * XXX
2171 * Several drivers do this; it should be abstracted and made MP-safe.
2172 * (Hmm... where have we seen this warning before :-> GO )
2173 */
2174 static int
2175 raidlock(struct raid_softc *rs)
2176 {
2177 int error;
2178
2179 while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
2180 rs->sc_flags |= RAIDF_WANTED;
2181 if ((error =
2182 tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
2183 return (error);
2184 }
2185 rs->sc_flags |= RAIDF_LOCKED;
2186 return (0);
2187 }
2188 /*
2189 * Unlock and wake up any waiters.
2190 */
2191 static void
2192 raidunlock(struct raid_softc *rs)
2193 {
2194
2195 rs->sc_flags &= ~RAIDF_LOCKED;
2196 if ((rs->sc_flags & RAIDF_WANTED) != 0) {
2197 rs->sc_flags &= ~RAIDF_WANTED;
2198 wakeup(rs);
2199 }
2200 }
2201
2202
2203 #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */
2204 #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */
2205
2206 int
2207 raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter)
2208 {
2209 RF_ComponentLabel_t clabel;
2210 raidread_component_label(dev, b_vp, &clabel);
2211 clabel.mod_counter = mod_counter;
2212 clabel.clean = RF_RAID_CLEAN;
2213 raidwrite_component_label(dev, b_vp, &clabel);
2214 return(0);
2215 }
2216
2217
2218 int
2219 raidmarkdirty(dev_t dev, struct vnode *b_vp, int mod_counter)
2220 {
2221 RF_ComponentLabel_t clabel;
2222 raidread_component_label(dev, b_vp, &clabel);
2223 clabel.mod_counter = mod_counter;
2224 clabel.clean = RF_RAID_DIRTY;
2225 raidwrite_component_label(dev, b_vp, &clabel);
2226 return(0);
2227 }
2228
2229 /* ARGSUSED */
2230 int
2231 raidread_component_label(dev_t dev, struct vnode *b_vp,
2232 RF_ComponentLabel_t *clabel)
2233 {
2234 struct buf *bp;
2235 const struct bdevsw *bdev;
2236 int error;
2237
2238 /* XXX should probably ensure that we don't try to do this if
2239 someone has changed rf_protected_sectors. */
2240
2241 if (b_vp == NULL) {
2242 /* For whatever reason, this component is not valid.
2243 Don't try to read a component label from it. */
2244 return(EINVAL);
2245 }
2246
2247 /* get a block of the appropriate size... */
2248 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2249 bp->b_dev = dev;
2250
2251 /* get our ducks in a row for the read */
2252 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2253 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2254 bp->b_flags |= B_READ;
2255 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2256
2257 bdev = bdevsw_lookup(bp->b_dev);
2258 if (bdev == NULL)
2259 return (ENXIO);
2260 (*bdev->d_strategy)(bp);
2261
2262 error = biowait(bp);
2263
2264 if (!error) {
2265 memcpy(clabel, bp->b_data,
2266 sizeof(RF_ComponentLabel_t));
2267 }
2268
2269 brelse(bp);
2270 return(error);
2271 }
2272 /* ARGSUSED */
2273 int
2274 raidwrite_component_label(dev_t dev, struct vnode *b_vp,
2275 RF_ComponentLabel_t *clabel)
2276 {
2277 struct buf *bp;
2278 const struct bdevsw *bdev;
2279 int error;
2280
2281 /* get a block of the appropriate size... */
2282 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2283 bp->b_dev = dev;
2284
2285 /* get our ducks in a row for the write */
2286 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2287 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2288 bp->b_flags |= B_WRITE;
2289 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2290
2291 memset(bp->b_data, 0, RF_COMPONENT_INFO_SIZE );
2292
2293 memcpy(bp->b_data, clabel, sizeof(RF_ComponentLabel_t));
2294
2295 bdev = bdevsw_lookup(bp->b_dev);
2296 if (bdev == NULL)
2297 return (ENXIO);
2298 (*bdev->d_strategy)(bp);
2299 error = biowait(bp);
2300 brelse(bp);
2301 if (error) {
2302 #if 1
2303 printf("Failed to write RAID component info!\n");
2304 #endif
2305 }
2306
2307 return(error);
2308 }
2309
2310 void
2311 rf_markalldirty(RF_Raid_t *raidPtr)
2312 {
2313 RF_ComponentLabel_t clabel;
2314 int sparecol;
2315 int c;
2316 int j;
2317 int scol = -1;
2318
2319 raidPtr->mod_counter++;
2320 for (c = 0; c < raidPtr->numCol; c++) {
2321 /* we don't want to touch (at all) a disk that has
2322 failed */
2323 if (!RF_DEAD_DISK(raidPtr->Disks[c].status)) {
2324 raidread_component_label(
2325 raidPtr->Disks[c].dev,
2326 raidPtr->raid_cinfo[c].ci_vp,
2327 &clabel);
2328 if (clabel.status == rf_ds_spared) {
2329 /* XXX do something special...
2330 but whatever you do, don't
2331 try to access it!! */
2332 } else {
2333 raidmarkdirty(
2334 raidPtr->Disks[c].dev,
2335 raidPtr->raid_cinfo[c].ci_vp,
2336 raidPtr->mod_counter);
2337 }
2338 }
2339 }
2340
2341 for( c = 0; c < raidPtr->numSpare ; c++) {
2342 sparecol = raidPtr->numCol + c;
2343 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
2344 /*
2345
2346 we claim this disk is "optimal" if it's
2347 rf_ds_used_spare, as that means it should be
2348 directly substitutable for the disk it replaced.
2349 We note that too...
2350
2351 */
2352
2353 for(j=0;j<raidPtr->numCol;j++) {
2354 if (raidPtr->Disks[j].spareCol == sparecol) {
2355 scol = j;
2356 break;
2357 }
2358 }
2359
2360 raidread_component_label(
2361 raidPtr->Disks[sparecol].dev,
2362 raidPtr->raid_cinfo[sparecol].ci_vp,
2363 &clabel);
2364 /* make sure status is noted */
2365
2366 raid_init_component_label(raidPtr, &clabel);
2367
2368 clabel.row = 0;
2369 clabel.column = scol;
2370 /* Note: we *don't* change status from rf_ds_used_spare
2371 to rf_ds_optimal */
2372 /* clabel.status = rf_ds_optimal; */
2373
2374 raidmarkdirty(raidPtr->Disks[sparecol].dev,
2375 raidPtr->raid_cinfo[sparecol].ci_vp,
2376 raidPtr->mod_counter);
2377 }
2378 }
2379 }
2380
2381
2382 void
2383 rf_update_component_labels(RF_Raid_t *raidPtr, int final)
2384 {
2385 RF_ComponentLabel_t clabel;
2386 int sparecol;
2387 int c;
2388 int j;
2389 int scol;
2390
2391 scol = -1;
2392
2393 /* XXX should do extra checks to make sure things really are clean,
2394 rather than blindly setting the clean bit... */
2395
2396 raidPtr->mod_counter++;
2397
2398 for (c = 0; c < raidPtr->numCol; c++) {
2399 if (raidPtr->Disks[c].status == rf_ds_optimal) {
2400 raidread_component_label(
2401 raidPtr->Disks[c].dev,
2402 raidPtr->raid_cinfo[c].ci_vp,
2403 &clabel);
2404 /* make sure status is noted */
2405 clabel.status = rf_ds_optimal;
2406
2407 /* bump the counter */
2408 clabel.mod_counter = raidPtr->mod_counter;
2409
2410 /* note what unit we are configured as */
2411 clabel.last_unit = raidPtr->raidid;
2412
2413 raidwrite_component_label(
2414 raidPtr->Disks[c].dev,
2415 raidPtr->raid_cinfo[c].ci_vp,
2416 &clabel);
2417 if (final == RF_FINAL_COMPONENT_UPDATE) {
2418 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2419 raidmarkclean(
2420 raidPtr->Disks[c].dev,
2421 raidPtr->raid_cinfo[c].ci_vp,
2422 raidPtr->mod_counter);
2423 }
2424 }
2425 }
2426 /* else we don't touch it.. */
2427 }
2428
2429 for( c = 0; c < raidPtr->numSpare ; c++) {
2430 sparecol = raidPtr->numCol + c;
2431 /* Need to ensure that the reconstruct actually completed! */
2432 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
2433 /*
2434
2435 we claim this disk is "optimal" if it's
2436 rf_ds_used_spare, as that means it should be
2437 directly substitutable for the disk it replaced.
2438 We note that too...
2439
2440 */
2441
2442 for(j=0;j<raidPtr->numCol;j++) {
2443 if (raidPtr->Disks[j].spareCol == sparecol) {
2444 scol = j;
2445 break;
2446 }
2447 }
2448
2449 /* XXX shouldn't *really* need this... */
2450 raidread_component_label(
2451 raidPtr->Disks[sparecol].dev,
2452 raidPtr->raid_cinfo[sparecol].ci_vp,
2453 &clabel);
2454 /* make sure status is noted */
2455
2456 raid_init_component_label(raidPtr, &clabel);
2457
2458 clabel.mod_counter = raidPtr->mod_counter;
2459 clabel.column = scol;
2460 clabel.status = rf_ds_optimal;
2461 clabel.last_unit = raidPtr->raidid;
2462
2463 raidwrite_component_label(
2464 raidPtr->Disks[sparecol].dev,
2465 raidPtr->raid_cinfo[sparecol].ci_vp,
2466 &clabel);
2467 if (final == RF_FINAL_COMPONENT_UPDATE) {
2468 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2469 raidmarkclean( raidPtr->Disks[sparecol].dev,
2470 raidPtr->raid_cinfo[sparecol].ci_vp,
2471 raidPtr->mod_counter);
2472 }
2473 }
2474 }
2475 }
2476 }
2477
2478 void
2479 rf_close_component(RF_Raid_t *raidPtr, struct vnode *vp, int auto_configured)
2480 {
2481 struct proc *p;
2482 struct lwp *l;
2483
2484 p = raidPtr->engine_thread;
2485 l = LIST_FIRST(&p->p_lwps);
2486
2487 if (vp != NULL) {
2488 if (auto_configured == 1) {
2489 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2490 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
2491 vput(vp);
2492
2493 } else {
2494 (void) vn_close(vp, FREAD | FWRITE, p->p_cred, l);
2495 }
2496 }
2497 }
2498
2499
2500 void
2501 rf_UnconfigureVnodes(RF_Raid_t *raidPtr)
2502 {
2503 int r,c;
2504 struct vnode *vp;
2505 int acd;
2506
2507
2508 /* We take this opportunity to close the vnodes like we should.. */
2509
2510 for (c = 0; c < raidPtr->numCol; c++) {
2511 vp = raidPtr->raid_cinfo[c].ci_vp;
2512 acd = raidPtr->Disks[c].auto_configured;
2513 rf_close_component(raidPtr, vp, acd);
2514 raidPtr->raid_cinfo[c].ci_vp = NULL;
2515 raidPtr->Disks[c].auto_configured = 0;
2516 }
2517
2518 for (r = 0; r < raidPtr->numSpare; r++) {
2519 vp = raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp;
2520 acd = raidPtr->Disks[raidPtr->numCol + r].auto_configured;
2521 rf_close_component(raidPtr, vp, acd);
2522 raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp = NULL;
2523 raidPtr->Disks[raidPtr->numCol + r].auto_configured = 0;
2524 }
2525 }
2526
2527
2528 void
2529 rf_ReconThread(struct rf_recon_req *req)
2530 {
2531 int s;
2532 RF_Raid_t *raidPtr;
2533
2534 s = splbio();
2535 raidPtr = (RF_Raid_t *) req->raidPtr;
2536 raidPtr->recon_in_progress = 1;
2537
2538 rf_FailDisk((RF_Raid_t *) req->raidPtr, req->col,
2539 ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
2540
2541 RF_Free(req, sizeof(*req));
2542
2543 raidPtr->recon_in_progress = 0;
2544 splx(s);
2545
2546 /* That's all... */
2547 kthread_exit(0); /* does not return */
2548 }
2549
2550 void
2551 rf_RewriteParityThread(RF_Raid_t *raidPtr)
2552 {
2553 int retcode;
2554 int s;
2555
2556 raidPtr->parity_rewrite_stripes_done = 0;
2557 raidPtr->parity_rewrite_in_progress = 1;
2558 s = splbio();
2559 retcode = rf_RewriteParity(raidPtr);
2560 splx(s);
2561 if (retcode) {
2562 printf("raid%d: Error re-writing parity!\n",raidPtr->raidid);
2563 } else {
2564 /* set the clean bit! If we shutdown correctly,
2565 the clean bit on each component label will get
2566 set */
2567 raidPtr->parity_good = RF_RAID_CLEAN;
2568 }
2569 raidPtr->parity_rewrite_in_progress = 0;
2570
2571 /* Anyone waiting for us to stop? If so, inform them... */
2572 if (raidPtr->waitShutdown) {
2573 wakeup(&raidPtr->parity_rewrite_in_progress);
2574 }
2575
2576 /* That's all... */
2577 kthread_exit(0); /* does not return */
2578 }
2579
2580
2581 void
2582 rf_CopybackThread(RF_Raid_t *raidPtr)
2583 {
2584 int s;
2585
2586 raidPtr->copyback_in_progress = 1;
2587 s = splbio();
2588 rf_CopybackReconstructedData(raidPtr);
2589 splx(s);
2590 raidPtr->copyback_in_progress = 0;
2591
2592 /* That's all... */
2593 kthread_exit(0); /* does not return */
2594 }
2595
2596
2597 void
2598 rf_ReconstructInPlaceThread(struct rf_recon_req *req)
2599 {
2600 int s;
2601 RF_Raid_t *raidPtr;
2602
2603 s = splbio();
2604 raidPtr = req->raidPtr;
2605 raidPtr->recon_in_progress = 1;
2606 rf_ReconstructInPlace(raidPtr, req->col);
2607 RF_Free(req, sizeof(*req));
2608 raidPtr->recon_in_progress = 0;
2609 splx(s);
2610
2611 /* That's all... */
2612 kthread_exit(0); /* does not return */
2613 }
2614
2615 static RF_AutoConfig_t *
2616 rf_get_component(RF_AutoConfig_t *ac_list, dev_t dev, struct vnode *vp,
2617 const char *cname, RF_SectorCount_t size)
2618 {
2619 int good_one = 0;
2620 RF_ComponentLabel_t *clabel;
2621 RF_AutoConfig_t *ac;
2622
2623 clabel = malloc(sizeof(RF_ComponentLabel_t), M_RAIDFRAME, M_NOWAIT);
2624 if (clabel == NULL) {
2625 oomem:
2626 while(ac_list) {
2627 ac = ac_list;
2628 if (ac->clabel)
2629 free(ac->clabel, M_RAIDFRAME);
2630 ac_list = ac_list->next;
2631 free(ac, M_RAIDFRAME);
2632 }
2633 printf("RAID auto config: out of memory!\n");
2634 return NULL; /* XXX probably should panic? */
2635 }
2636
2637 if (!raidread_component_label(dev, vp, clabel)) {
2638 /* Got the label. Does it look reasonable? */
2639 if (rf_reasonable_label(clabel) &&
2640 (clabel->partitionSize <= size)) {
2641 #if DEBUG
2642 printf("Component on: %s: %llu\n",
2643 cname, (unsigned long long)size);
2644 rf_print_component_label(clabel);
2645 #endif
2646 /* if it's reasonable, add it, else ignore it. */
2647 ac = malloc(sizeof(RF_AutoConfig_t), M_RAIDFRAME,
2648 M_NOWAIT);
2649 if (ac == NULL) {
2650 free(clabel, M_RAIDFRAME);
2651 goto oomem;
2652 }
2653 strlcpy(ac->devname, cname, sizeof(ac->devname));
2654 ac->dev = dev;
2655 ac->vp = vp;
2656 ac->clabel = clabel;
2657 ac->next = ac_list;
2658 ac_list = ac;
2659 good_one = 1;
2660 }
2661 }
2662 if (!good_one) {
2663 /* cleanup */
2664 free(clabel, M_RAIDFRAME);
2665 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2666 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
2667 vput(vp);
2668 }
2669 return ac_list;
2670 }
2671
2672 RF_AutoConfig_t *
2673 rf_find_raid_components()
2674 {
2675 struct vnode *vp;
2676 struct disklabel label;
2677 struct device *dv;
2678 dev_t dev;
2679 int bmajor, bminor, wedge;
2680 int error;
2681 int i;
2682 RF_AutoConfig_t *ac_list;
2683
2684
2685 /* initialize the AutoConfig list */
2686 ac_list = NULL;
2687
2688 /* we begin by trolling through *all* the devices on the system */
2689
2690 for (dv = alldevs.tqh_first; dv != NULL;
2691 dv = dv->dv_list.tqe_next) {
2692
2693 /* we are only interested in disks... */
2694 if (device_class(dv) != DV_DISK)
2695 continue;
2696
2697 /* we don't care about floppies... */
2698 if (device_is_a(dv, "fd")) {
2699 continue;
2700 }
2701
2702 /* we don't care about CD's... */
2703 if (device_is_a(dv, "cd")) {
2704 continue;
2705 }
2706
2707 /* hdfd is the Atari/Hades floppy driver */
2708 if (device_is_a(dv, "hdfd")) {
2709 continue;
2710 }
2711
2712 /* fdisa is the Atari/Milan floppy driver */
2713 if (device_is_a(dv, "fdisa")) {
2714 continue;
2715 }
2716
2717 /* need to find the device_name_to_block_device_major stuff */
2718 bmajor = devsw_name2blk(dv->dv_xname, NULL, 0);
2719
2720 /* get a vnode for the raw partition of this disk */
2721
2722 wedge = device_is_a(dv, "dk");
2723 bminor = minor(device_unit(dv));
2724 dev = wedge ? makedev(bmajor, bminor) :
2725 MAKEDISKDEV(bmajor, bminor, RAW_PART);
2726 if (bdevvp(dev, &vp))
2727 panic("RAID can't alloc vnode");
2728
2729 error = VOP_OPEN(vp, FREAD, NOCRED, 0);
2730
2731 if (error) {
2732 /* "Who cares." Continue looking
2733 for something that exists*/
2734 vput(vp);
2735 continue;
2736 }
2737
2738 if (wedge) {
2739 struct dkwedge_info dkw;
2740 error = VOP_IOCTL(vp, DIOCGWEDGEINFO, &dkw, FREAD,
2741 NOCRED, 0);
2742 if (error) {
2743 printf("RAIDframe: can't get wedge info for "
2744 "dev %s (%d)\n", dv->dv_xname, error);
2745 out:
2746 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2747 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
2748 vput(vp);
2749 continue;
2750 }
2751
2752 if (strcmp(dkw.dkw_ptype, DKW_PTYPE_RAIDFRAME) != 0)
2753 goto out;
2754
2755 ac_list = rf_get_component(ac_list, dev, vp,
2756 dv->dv_xname, dkw.dkw_size);
2757 continue;
2758 }
2759
2760 /* Ok, the disk exists. Go get the disklabel. */
2761 error = VOP_IOCTL(vp, DIOCGDINFO, &label, FREAD, NOCRED, 0);
2762 if (error) {
2763 /*
2764 * XXX can't happen - open() would
2765 * have errored out (or faked up one)
2766 */
2767 if (error != ENOTTY)
2768 printf("RAIDframe: can't get label for dev "
2769 "%s (%d)\n", dv->dv_xname, error);
2770 }
2771
2772 /* don't need this any more. We'll allocate it again
2773 a little later if we really do... */
2774 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2775 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
2776 vput(vp);
2777
2778 if (error)
2779 continue;
2780
2781 for (i = 0; i < label.d_npartitions; i++) {
2782 char cname[sizeof(ac_list->devname)];
2783
2784 /* We only support partitions marked as RAID */
2785 if (label.d_partitions[i].p_fstype != FS_RAID)
2786 continue;
2787
2788 dev = MAKEDISKDEV(bmajor, device_unit(dv), i);
2789 if (bdevvp(dev, &vp))
2790 panic("RAID can't alloc vnode");
2791
2792 error = VOP_OPEN(vp, FREAD, NOCRED, 0);
2793 if (error) {
2794 /* Whatever... */
2795 vput(vp);
2796 continue;
2797 }
2798 snprintf(cname, sizeof(cname), "%s%c",
2799 dv->dv_xname, 'a' + i);
2800 ac_list = rf_get_component(ac_list, dev, vp, cname,
2801 label.d_partitions[i].p_size);
2802 }
2803 }
2804 return ac_list;
2805 }
2806
2807
2808 static int
2809 rf_reasonable_label(RF_ComponentLabel_t *clabel)
2810 {
2811
2812 if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) ||
2813 (clabel->version==RF_COMPONENT_LABEL_VERSION)) &&
2814 ((clabel->clean == RF_RAID_CLEAN) ||
2815 (clabel->clean == RF_RAID_DIRTY)) &&
2816 clabel->row >=0 &&
2817 clabel->column >= 0 &&
2818 clabel->num_rows > 0 &&
2819 clabel->num_columns > 0 &&
2820 clabel->row < clabel->num_rows &&
2821 clabel->column < clabel->num_columns &&
2822 clabel->blockSize > 0 &&
2823 clabel->numBlocks > 0) {
2824 /* label looks reasonable enough... */
2825 return(1);
2826 }
2827 return(0);
2828 }
2829
2830
2831 #if DEBUG
2832 void
2833 rf_print_component_label(RF_ComponentLabel_t *clabel)
2834 {
2835 printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
2836 clabel->row, clabel->column,
2837 clabel->num_rows, clabel->num_columns);
2838 printf(" Version: %d Serial Number: %d Mod Counter: %d\n",
2839 clabel->version, clabel->serial_number,
2840 clabel->mod_counter);
2841 printf(" Clean: %s Status: %d\n",
2842 clabel->clean ? "Yes" : "No", clabel->status );
2843 printf(" sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
2844 clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
2845 printf(" RAID Level: %c blocksize: %d numBlocks: %d\n",
2846 (char) clabel->parityConfig, clabel->blockSize,
2847 clabel->numBlocks);
2848 printf(" Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No" );
2849 printf(" Contains root partition: %s\n",
2850 clabel->root_partition ? "Yes" : "No" );
2851 printf(" Last configured as: raid%d\n", clabel->last_unit );
2852 #if 0
2853 printf(" Config order: %d\n", clabel->config_order);
2854 #endif
2855
2856 }
2857 #endif
2858
2859 RF_ConfigSet_t *
2860 rf_create_auto_sets(RF_AutoConfig_t *ac_list)
2861 {
2862 RF_AutoConfig_t *ac;
2863 RF_ConfigSet_t *config_sets;
2864 RF_ConfigSet_t *cset;
2865 RF_AutoConfig_t *ac_next;
2866
2867
2868 config_sets = NULL;
2869
2870 /* Go through the AutoConfig list, and figure out which components
2871 belong to what sets. */
2872 ac = ac_list;
2873 while(ac!=NULL) {
2874 /* we're going to putz with ac->next, so save it here
2875 for use at the end of the loop */
2876 ac_next = ac->next;
2877
2878 if (config_sets == NULL) {
2879 /* will need at least this one... */
2880 config_sets = (RF_ConfigSet_t *)
2881 malloc(sizeof(RF_ConfigSet_t),
2882 M_RAIDFRAME, M_NOWAIT);
2883 if (config_sets == NULL) {
2884 panic("rf_create_auto_sets: No memory!");
2885 }
2886 /* this one is easy :) */
2887 config_sets->ac = ac;
2888 config_sets->next = NULL;
2889 config_sets->rootable = 0;
2890 ac->next = NULL;
2891 } else {
2892 /* which set does this component fit into? */
2893 cset = config_sets;
2894 while(cset!=NULL) {
2895 if (rf_does_it_fit(cset, ac)) {
2896 /* looks like it matches... */
2897 ac->next = cset->ac;
2898 cset->ac = ac;
2899 break;
2900 }
2901 cset = cset->next;
2902 }
2903 if (cset==NULL) {
2904 /* didn't find a match above... new set..*/
2905 cset = (RF_ConfigSet_t *)
2906 malloc(sizeof(RF_ConfigSet_t),
2907 M_RAIDFRAME, M_NOWAIT);
2908 if (cset == NULL) {
2909 panic("rf_create_auto_sets: No memory!");
2910 }
2911 cset->ac = ac;
2912 ac->next = NULL;
2913 cset->next = config_sets;
2914 cset->rootable = 0;
2915 config_sets = cset;
2916 }
2917 }
2918 ac = ac_next;
2919 }
2920
2921
2922 return(config_sets);
2923 }
2924
2925 static int
2926 rf_does_it_fit(RF_ConfigSet_t *cset, RF_AutoConfig_t *ac)
2927 {
2928 RF_ComponentLabel_t *clabel1, *clabel2;
2929
2930 /* If this one matches the *first* one in the set, that's good
2931 enough, since the other members of the set would have been
2932 through here too... */
2933 /* note that we are not checking partitionSize here..
2934
2935 Note that we are also not checking the mod_counters here.
2936 If everything else matches execpt the mod_counter, that's
2937 good enough for this test. We will deal with the mod_counters
2938 a little later in the autoconfiguration process.
2939
2940 (clabel1->mod_counter == clabel2->mod_counter) &&
2941
2942 The reason we don't check for this is that failed disks
2943 will have lower modification counts. If those disks are
2944 not added to the set they used to belong to, then they will
2945 form their own set, which may result in 2 different sets,
2946 for example, competing to be configured at raid0, and
2947 perhaps competing to be the root filesystem set. If the
2948 wrong ones get configured, or both attempt to become /,
2949 weird behaviour and or serious lossage will occur. Thus we
2950 need to bring them into the fold here, and kick them out at
2951 a later point.
2952
2953 */
2954
2955 clabel1 = cset->ac->clabel;
2956 clabel2 = ac->clabel;
2957 if ((clabel1->version == clabel2->version) &&
2958 (clabel1->serial_number == clabel2->serial_number) &&
2959 (clabel1->num_rows == clabel2->num_rows) &&
2960 (clabel1->num_columns == clabel2->num_columns) &&
2961 (clabel1->sectPerSU == clabel2->sectPerSU) &&
2962 (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
2963 (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
2964 (clabel1->parityConfig == clabel2->parityConfig) &&
2965 (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
2966 (clabel1->blockSize == clabel2->blockSize) &&
2967 (clabel1->numBlocks == clabel2->numBlocks) &&
2968 (clabel1->autoconfigure == clabel2->autoconfigure) &&
2969 (clabel1->root_partition == clabel2->root_partition) &&
2970 (clabel1->last_unit == clabel2->last_unit) &&
2971 (clabel1->config_order == clabel2->config_order)) {
2972 /* if it get's here, it almost *has* to be a match */
2973 } else {
2974 /* it's not consistent with somebody in the set..
2975 punt */
2976 return(0);
2977 }
2978 /* all was fine.. it must fit... */
2979 return(1);
2980 }
2981
2982 int
2983 rf_have_enough_components(RF_ConfigSet_t *cset)
2984 {
2985 RF_AutoConfig_t *ac;
2986 RF_AutoConfig_t *auto_config;
2987 RF_ComponentLabel_t *clabel;
2988 int c;
2989 int num_cols;
2990 int num_missing;
2991 int mod_counter;
2992 int mod_counter_found;
2993 int even_pair_failed;
2994 char parity_type;
2995
2996
2997 /* check to see that we have enough 'live' components
2998 of this set. If so, we can configure it if necessary */
2999
3000 num_cols = cset->ac->clabel->num_columns;
3001 parity_type = cset->ac->clabel->parityConfig;
3002
3003 /* XXX Check for duplicate components!?!?!? */
3004
3005 /* Determine what the mod_counter is supposed to be for this set. */
3006
3007 mod_counter_found = 0;
3008 mod_counter = 0;
3009 ac = cset->ac;
3010 while(ac!=NULL) {
3011 if (mod_counter_found==0) {
3012 mod_counter = ac->clabel->mod_counter;
3013 mod_counter_found = 1;
3014 } else {
3015 if (ac->clabel->mod_counter > mod_counter) {
3016 mod_counter = ac->clabel->mod_counter;
3017 }
3018 }
3019 ac = ac->next;
3020 }
3021
3022 num_missing = 0;
3023 auto_config = cset->ac;
3024
3025 even_pair_failed = 0;
3026 for(c=0; c<num_cols; c++) {
3027 ac = auto_config;
3028 while(ac!=NULL) {
3029 if ((ac->clabel->column == c) &&
3030 (ac->clabel->mod_counter == mod_counter)) {
3031 /* it's this one... */
3032 #if DEBUG
3033 printf("Found: %s at %d\n",
3034 ac->devname,c);
3035 #endif
3036 break;
3037 }
3038 ac=ac->next;
3039 }
3040 if (ac==NULL) {
3041 /* Didn't find one here! */
3042 /* special case for RAID 1, especially
3043 where there are more than 2
3044 components (where RAIDframe treats
3045 things a little differently :( ) */
3046 if (parity_type == '1') {
3047 if (c%2 == 0) { /* even component */
3048 even_pair_failed = 1;
3049 } else { /* odd component. If
3050 we're failed, and
3051 so is the even
3052 component, it's
3053 "Good Night, Charlie" */
3054 if (even_pair_failed == 1) {
3055 return(0);
3056 }
3057 }
3058 } else {
3059 /* normal accounting */
3060 num_missing++;
3061 }
3062 }
3063 if ((parity_type == '1') && (c%2 == 1)) {
3064 /* Just did an even component, and we didn't
3065 bail.. reset the even_pair_failed flag,
3066 and go on to the next component.... */
3067 even_pair_failed = 0;
3068 }
3069 }
3070
3071 clabel = cset->ac->clabel;
3072
3073 if (((clabel->parityConfig == '0') && (num_missing > 0)) ||
3074 ((clabel->parityConfig == '4') && (num_missing > 1)) ||
3075 ((clabel->parityConfig == '5') && (num_missing > 1))) {
3076 /* XXX this needs to be made *much* more general */
3077 /* Too many failures */
3078 return(0);
3079 }
3080 /* otherwise, all is well, and we've got enough to take a kick
3081 at autoconfiguring this set */
3082 return(1);
3083 }
3084
3085 void
3086 rf_create_configuration(RF_AutoConfig_t *ac, RF_Config_t *config,
3087 RF_Raid_t *raidPtr)
3088 {
3089 RF_ComponentLabel_t *clabel;
3090 int i;
3091
3092 clabel = ac->clabel;
3093
3094 /* 1. Fill in the common stuff */
3095 config->numRow = clabel->num_rows = 1;
3096 config->numCol = clabel->num_columns;
3097 config->numSpare = 0; /* XXX should this be set here? */
3098 config->sectPerSU = clabel->sectPerSU;
3099 config->SUsPerPU = clabel->SUsPerPU;
3100 config->SUsPerRU = clabel->SUsPerRU;
3101 config->parityConfig = clabel->parityConfig;
3102 /* XXX... */
3103 strcpy(config->diskQueueType,"fifo");
3104 config->maxOutstandingDiskReqs = clabel->maxOutstanding;
3105 config->layoutSpecificSize = 0; /* XXX ?? */
3106
3107 while(ac!=NULL) {
3108 /* row/col values will be in range due to the checks
3109 in reasonable_label() */
3110 strcpy(config->devnames[0][ac->clabel->column],
3111 ac->devname);
3112 ac = ac->next;
3113 }
3114
3115 for(i=0;i<RF_MAXDBGV;i++) {
3116 config->debugVars[i][0] = 0;
3117 }
3118 }
3119
3120 int
3121 rf_set_autoconfig(RF_Raid_t *raidPtr, int new_value)
3122 {
3123 RF_ComponentLabel_t clabel;
3124 struct vnode *vp;
3125 dev_t dev;
3126 int column;
3127 int sparecol;
3128
3129 raidPtr->autoconfigure = new_value;
3130
3131 for(column=0; column<raidPtr->numCol; column++) {
3132 if (raidPtr->Disks[column].status == rf_ds_optimal) {
3133 dev = raidPtr->Disks[column].dev;
3134 vp = raidPtr->raid_cinfo[column].ci_vp;
3135 raidread_component_label(dev, vp, &clabel);
3136 clabel.autoconfigure = new_value;
3137 raidwrite_component_label(dev, vp, &clabel);
3138 }
3139 }
3140 for(column = 0; column < raidPtr->numSpare ; column++) {
3141 sparecol = raidPtr->numCol + column;
3142 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3143 dev = raidPtr->Disks[sparecol].dev;
3144 vp = raidPtr->raid_cinfo[sparecol].ci_vp;
3145 raidread_component_label(dev, vp, &clabel);
3146 clabel.autoconfigure = new_value;
3147 raidwrite_component_label(dev, vp, &clabel);
3148 }
3149 }
3150 return(new_value);
3151 }
3152
3153 int
3154 rf_set_rootpartition(RF_Raid_t *raidPtr, int new_value)
3155 {
3156 RF_ComponentLabel_t clabel;
3157 struct vnode *vp;
3158 dev_t dev;
3159 int column;
3160 int sparecol;
3161
3162 raidPtr->root_partition = new_value;
3163 for(column=0; column<raidPtr->numCol; column++) {
3164 if (raidPtr->Disks[column].status == rf_ds_optimal) {
3165 dev = raidPtr->Disks[column].dev;
3166 vp = raidPtr->raid_cinfo[column].ci_vp;
3167 raidread_component_label(dev, vp, &clabel);
3168 clabel.root_partition = new_value;
3169 raidwrite_component_label(dev, vp, &clabel);
3170 }
3171 }
3172 for(column = 0; column < raidPtr->numSpare ; column++) {
3173 sparecol = raidPtr->numCol + column;
3174 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3175 dev = raidPtr->Disks[sparecol].dev;
3176 vp = raidPtr->raid_cinfo[sparecol].ci_vp;
3177 raidread_component_label(dev, vp, &clabel);
3178 clabel.root_partition = new_value;
3179 raidwrite_component_label(dev, vp, &clabel);
3180 }
3181 }
3182 return(new_value);
3183 }
3184
3185 void
3186 rf_release_all_vps(RF_ConfigSet_t *cset)
3187 {
3188 RF_AutoConfig_t *ac;
3189
3190 ac = cset->ac;
3191 while(ac!=NULL) {
3192 /* Close the vp, and give it back */
3193 if (ac->vp) {
3194 vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY);
3195 VOP_CLOSE(ac->vp, FREAD, NOCRED, 0);
3196 vput(ac->vp);
3197 ac->vp = NULL;
3198 }
3199 ac = ac->next;
3200 }
3201 }
3202
3203
3204 void
3205 rf_cleanup_config_set(RF_ConfigSet_t *cset)
3206 {
3207 RF_AutoConfig_t *ac;
3208 RF_AutoConfig_t *next_ac;
3209
3210 ac = cset->ac;
3211 while(ac!=NULL) {
3212 next_ac = ac->next;
3213 /* nuke the label */
3214 free(ac->clabel, M_RAIDFRAME);
3215 /* cleanup the config structure */
3216 free(ac, M_RAIDFRAME);
3217 /* "next.." */
3218 ac = next_ac;
3219 }
3220 /* and, finally, nuke the config set */
3221 free(cset, M_RAIDFRAME);
3222 }
3223
3224
3225 void
3226 raid_init_component_label(RF_Raid_t *raidPtr, RF_ComponentLabel_t *clabel)
3227 {
3228 /* current version number */
3229 clabel->version = RF_COMPONENT_LABEL_VERSION;
3230 clabel->serial_number = raidPtr->serial_number;
3231 clabel->mod_counter = raidPtr->mod_counter;
3232 clabel->num_rows = 1;
3233 clabel->num_columns = raidPtr->numCol;
3234 clabel->clean = RF_RAID_DIRTY; /* not clean */
3235 clabel->status = rf_ds_optimal; /* "It's good!" */
3236
3237 clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
3238 clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
3239 clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
3240
3241 clabel->blockSize = raidPtr->bytesPerSector;
3242 clabel->numBlocks = raidPtr->sectorsPerDisk;
3243
3244 /* XXX not portable */
3245 clabel->parityConfig = raidPtr->Layout.map->parityConfig;
3246 clabel->maxOutstanding = raidPtr->maxOutstanding;
3247 clabel->autoconfigure = raidPtr->autoconfigure;
3248 clabel->root_partition = raidPtr->root_partition;
3249 clabel->last_unit = raidPtr->raidid;
3250 clabel->config_order = raidPtr->config_order;
3251 }
3252
3253 int
3254 rf_auto_config_set(RF_ConfigSet_t *cset, int *unit)
3255 {
3256 RF_Raid_t *raidPtr;
3257 RF_Config_t *config;
3258 int raidID;
3259 int retcode;
3260
3261 #if DEBUG
3262 printf("RAID autoconfigure\n");
3263 #endif
3264
3265 retcode = 0;
3266 *unit = -1;
3267
3268 /* 1. Create a config structure */
3269
3270 config = (RF_Config_t *)malloc(sizeof(RF_Config_t),
3271 M_RAIDFRAME,
3272 M_NOWAIT);
3273 if (config==NULL) {
3274 printf("Out of mem!?!?\n");
3275 /* XXX do something more intelligent here. */
3276 return(1);
3277 }
3278
3279 memset(config, 0, sizeof(RF_Config_t));
3280
3281 /*
3282 2. Figure out what RAID ID this one is supposed to live at
3283 See if we can get the same RAID dev that it was configured
3284 on last time..
3285 */
3286
3287 raidID = cset->ac->clabel->last_unit;
3288 if ((raidID < 0) || (raidID >= numraid)) {
3289 /* let's not wander off into lala land. */
3290 raidID = numraid - 1;
3291 }
3292 if (raidPtrs[raidID]->valid != 0) {
3293
3294 /*
3295 Nope... Go looking for an alternative...
3296 Start high so we don't immediately use raid0 if that's
3297 not taken.
3298 */
3299
3300 for(raidID = numraid - 1; raidID >= 0; raidID--) {
3301 if (raidPtrs[raidID]->valid == 0) {
3302 /* can use this one! */
3303 break;
3304 }
3305 }
3306 }
3307
3308 if (raidID < 0) {
3309 /* punt... */
3310 printf("Unable to auto configure this set!\n");
3311 printf("(Out of RAID devs!)\n");
3312 free(config, M_RAIDFRAME);
3313 return(1);
3314 }
3315
3316 #if DEBUG
3317 printf("Configuring raid%d:\n",raidID);
3318 #endif
3319
3320 raidPtr = raidPtrs[raidID];
3321
3322 /* XXX all this stuff should be done SOMEWHERE ELSE! */
3323 raidPtr->raidid = raidID;
3324 raidPtr->openings = RAIDOUTSTANDING;
3325
3326 /* 3. Build the configuration structure */
3327 rf_create_configuration(cset->ac, config, raidPtr);
3328
3329 /* 4. Do the configuration */
3330 retcode = rf_Configure(raidPtr, config, cset->ac);
3331
3332 if (retcode == 0) {
3333
3334 raidinit(raidPtrs[raidID], 1);
3335
3336 rf_markalldirty(raidPtrs[raidID]);
3337 raidPtrs[raidID]->autoconfigure = 1; /* XXX do this here? */
3338 if (cset->ac->clabel->root_partition==1) {
3339 /* everything configured just fine. Make a note
3340 that this set is eligible to be root. */
3341 cset->rootable = 1;
3342 /* XXX do this here? */
3343 raidPtrs[raidID]->root_partition = 1;
3344 }
3345 }
3346
3347 /* 5. Cleanup */
3348 free(config, M_RAIDFRAME);
3349
3350 *unit = raidID;
3351 return(retcode);
3352 }
3353
3354 void
3355 rf_disk_unbusy(RF_RaidAccessDesc_t *desc)
3356 {
3357 struct buf *bp;
3358
3359 bp = (struct buf *)desc->bp;
3360 disk_unbusy(&raid_softc[desc->raidPtr->raidid].sc_dkdev,
3361 (bp->b_bcount - bp->b_resid), (bp->b_flags & B_READ));
3362 }
3363
3364 void
3365 rf_pool_init(struct pool *p, size_t size, const char *w_chan,
3366 size_t xmin, size_t xmax)
3367 {
3368 pool_init(p, size, 0, 0, 0, w_chan, NULL);
3369 pool_sethiwat(p, xmax);
3370 pool_prime(p, xmin);
3371 pool_setlowat(p, xmin);
3372 }
3373
3374 /*
3375 * rf_buf_queue_check(int raidid) -- looks into the buf_queue to see
3376 * if there is IO pending and if that IO could possibly be done for a
3377 * given RAID set. Returns 0 if IO is waiting and can be done, 1
3378 * otherwise.
3379 *
3380 */
3381
3382 int
3383 rf_buf_queue_check(int raidid)
3384 {
3385 if ((BUFQ_PEEK(raid_softc[raidid].buf_queue) != NULL) &&
3386 raidPtrs[raidid]->openings > 0) {
3387 /* there is work to do */
3388 return 0;
3389 }
3390 /* default is nothing to do */
3391 return 1;
3392 }
3393
3394 int
3395 rf_getdisksize(struct vnode *vp, struct lwp *l, RF_RaidDisk_t *diskPtr)
3396 {
3397 struct partinfo dpart;
3398 struct dkwedge_info dkw;
3399 int error;
3400
3401 error = VOP_IOCTL(vp, DIOCGPART, &dpart, FREAD, l->l_cred, l);
3402 if (error == 0) {
3403 diskPtr->blockSize = dpart.disklab->d_secsize;
3404 diskPtr->numBlocks = dpart.part->p_size - rf_protectedSectors;
3405 diskPtr->partitionSize = dpart.part->p_size;
3406 return 0;
3407 }
3408
3409 error = VOP_IOCTL(vp, DIOCGWEDGEINFO, &dkw, FREAD, l->l_cred, l);
3410 if (error == 0) {
3411 diskPtr->blockSize = 512; /* XXX */
3412 diskPtr->numBlocks = dkw.dkw_size - rf_protectedSectors;
3413 diskPtr->partitionSize = dkw.dkw_size;
3414 return 0;
3415 }
3416 return error;
3417 }
3418
3419 static int
3420 raid_match(struct device *self, struct cfdata *cfdata, void *aux)
3421 {
3422 return 1;
3423 }
3424
3425 static void
3426 raid_attach(struct device *parent, struct device *self, void *aux)
3427 {
3428 struct raid_softc *rs = (struct raid_softc *)self;
3429
3430 pseudo_disk_init(&rs->sc_dkdev);
3431 }
3432
3433
3434 static int
3435 raid_detach(struct device *self, int flags)
3436 {
3437 struct raid_softc *rs = (struct raid_softc *)self;
3438
3439 if (rs->sc_flags & RAIDF_INITED)
3440 return EBUSY;
3441
3442 return 0;
3443 }
3444
3445
3446