rf_netbsdkintf.c revision 1.219 1 /* $NetBSD: rf_netbsdkintf.c,v 1.219 2006/10/08 23:22:26 oster Exp $ */
2 /*-
3 * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
4 * All rights reserved.
5 *
6 * This code is derived from software contributed to The NetBSD Foundation
7 * by Greg Oster; Jason R. Thorpe.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed by the NetBSD
20 * Foundation, Inc. and its contributors.
21 * 4. Neither the name of The NetBSD Foundation nor the names of its
22 * contributors may be used to endorse or promote products derived
23 * from this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
26 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
36 */
37
38 /*
39 * Copyright (c) 1990, 1993
40 * The Regents of the University of California. All rights reserved.
41 *
42 * This code is derived from software contributed to Berkeley by
43 * the Systems Programming Group of the University of Utah Computer
44 * Science Department.
45 *
46 * Redistribution and use in source and binary forms, with or without
47 * modification, are permitted provided that the following conditions
48 * are met:
49 * 1. Redistributions of source code must retain the above copyright
50 * notice, this list of conditions and the following disclaimer.
51 * 2. Redistributions in binary form must reproduce the above copyright
52 * notice, this list of conditions and the following disclaimer in the
53 * documentation and/or other materials provided with the distribution.
54 * 3. Neither the name of the University nor the names of its contributors
55 * may be used to endorse or promote products derived from this software
56 * without specific prior written permission.
57 *
58 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
59 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
60 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
61 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
62 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
63 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
64 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
65 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
66 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
67 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
68 * SUCH DAMAGE.
69 *
70 * from: Utah $Hdr: cd.c 1.6 90/11/28$
71 *
72 * @(#)cd.c 8.2 (Berkeley) 11/16/93
73 */
74
75 /*
76 * Copyright (c) 1988 University of Utah.
77 *
78 * This code is derived from software contributed to Berkeley by
79 * the Systems Programming Group of the University of Utah Computer
80 * Science Department.
81 *
82 * Redistribution and use in source and binary forms, with or without
83 * modification, are permitted provided that the following conditions
84 * are met:
85 * 1. Redistributions of source code must retain the above copyright
86 * notice, this list of conditions and the following disclaimer.
87 * 2. Redistributions in binary form must reproduce the above copyright
88 * notice, this list of conditions and the following disclaimer in the
89 * documentation and/or other materials provided with the distribution.
90 * 3. All advertising materials mentioning features or use of this software
91 * must display the following acknowledgement:
92 * This product includes software developed by the University of
93 * California, Berkeley and its contributors.
94 * 4. Neither the name of the University nor the names of its contributors
95 * may be used to endorse or promote products derived from this software
96 * without specific prior written permission.
97 *
98 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
99 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
100 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
101 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
102 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
103 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
104 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
105 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
106 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
107 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
108 * SUCH DAMAGE.
109 *
110 * from: Utah $Hdr: cd.c 1.6 90/11/28$
111 *
112 * @(#)cd.c 8.2 (Berkeley) 11/16/93
113 */
114
115 /*
116 * Copyright (c) 1995 Carnegie-Mellon University.
117 * All rights reserved.
118 *
119 * Authors: Mark Holland, Jim Zelenka
120 *
121 * Permission to use, copy, modify and distribute this software and
122 * its documentation is hereby granted, provided that both the copyright
123 * notice and this permission notice appear in all copies of the
124 * software, derivative works or modified versions, and any portions
125 * thereof, and that both notices appear in supporting documentation.
126 *
127 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
128 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
129 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
130 *
131 * Carnegie Mellon requests users of this software to return to
132 *
133 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
134 * School of Computer Science
135 * Carnegie Mellon University
136 * Pittsburgh PA 15213-3890
137 *
138 * any improvements or extensions that they make and grant Carnegie the
139 * rights to redistribute these changes.
140 */
141
142 /***********************************************************
143 *
144 * rf_kintf.c -- the kernel interface routines for RAIDframe
145 *
146 ***********************************************************/
147
148 #include <sys/cdefs.h>
149 __KERNEL_RCSID(0, "$NetBSD: rf_netbsdkintf.c,v 1.219 2006/10/08 23:22:26 oster Exp $");
150
151 #include <sys/param.h>
152 #include <sys/errno.h>
153 #include <sys/pool.h>
154 #include <sys/proc.h>
155 #include <sys/queue.h>
156 #include <sys/disk.h>
157 #include <sys/device.h>
158 #include <sys/stat.h>
159 #include <sys/ioctl.h>
160 #include <sys/fcntl.h>
161 #include <sys/systm.h>
162 #include <sys/namei.h>
163 #include <sys/vnode.h>
164 #include <sys/disklabel.h>
165 #include <sys/conf.h>
166 #include <sys/lock.h>
167 #include <sys/buf.h>
168 #include <sys/bufq.h>
169 #include <sys/user.h>
170 #include <sys/reboot.h>
171 #include <sys/kauth.h>
172
173 #include <dev/raidframe/raidframevar.h>
174 #include <dev/raidframe/raidframeio.h>
175 #include "raid.h"
176 #include "opt_raid_autoconfig.h"
177 #include "rf_raid.h"
178 #include "rf_copyback.h"
179 #include "rf_dag.h"
180 #include "rf_dagflags.h"
181 #include "rf_desc.h"
182 #include "rf_diskqueue.h"
183 #include "rf_etimer.h"
184 #include "rf_general.h"
185 #include "rf_kintf.h"
186 #include "rf_options.h"
187 #include "rf_driver.h"
188 #include "rf_parityscan.h"
189 #include "rf_threadstuff.h"
190
191 #ifdef DEBUG
192 int rf_kdebug_level = 0;
193 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
194 #else /* DEBUG */
195 #define db1_printf(a) { }
196 #endif /* DEBUG */
197
198 static RF_Raid_t **raidPtrs; /* global raid device descriptors */
199
200 RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex)
201
202 static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a
203 * spare table */
204 static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from
205 * installation process */
206
207 MALLOC_DEFINE(M_RAIDFRAME, "RAIDframe", "RAIDframe structures");
208
209 /* prototypes */
210 static void KernelWakeupFunc(struct buf *);
211 static void InitBP(struct buf *, struct vnode *, unsigned,
212 dev_t, RF_SectorNum_t, RF_SectorCount_t, caddr_t, void (*) (struct buf *),
213 void *, int, struct proc *);
214 static void raidinit(RF_Raid_t *);
215
216 void raidattach(int);
217 static int raid_match(struct device *, struct cfdata *, void *);
218 static void raid_attach(struct device *, struct device *, void *);
219 static int raid_detach(struct device *, int);
220
221 dev_type_open(raidopen);
222 dev_type_close(raidclose);
223 dev_type_read(raidread);
224 dev_type_write(raidwrite);
225 dev_type_ioctl(raidioctl);
226 dev_type_strategy(raidstrategy);
227 dev_type_dump(raiddump);
228 dev_type_size(raidsize);
229
230 const struct bdevsw raid_bdevsw = {
231 raidopen, raidclose, raidstrategy, raidioctl,
232 raiddump, raidsize, D_DISK
233 };
234
235 const struct cdevsw raid_cdevsw = {
236 raidopen, raidclose, raidread, raidwrite, raidioctl,
237 nostop, notty, nopoll, nommap, nokqfilter, D_DISK
238 };
239
240 /* XXX Not sure if the following should be replacing the raidPtrs above,
241 or if it should be used in conjunction with that...
242 */
243
244 struct raid_softc {
245 struct device *sc_dev;
246 int sc_flags; /* flags */
247 int sc_cflags; /* configuration flags */
248 uint64_t sc_size; /* size of the raid device */
249 char sc_xname[20]; /* XXX external name */
250 struct disk sc_dkdev; /* generic disk device info */
251 struct bufq_state *buf_queue; /* used for the device queue */
252 };
253 /* sc_flags */
254 #define RAIDF_INITED 0x01 /* unit has been initialized */
255 #define RAIDF_WLABEL 0x02 /* label area is writable */
256 #define RAIDF_LABELLING 0x04 /* unit is currently being labelled */
257 #define RAIDF_WANTED 0x40 /* someone is waiting to obtain a lock */
258 #define RAIDF_LOCKED 0x80 /* unit is locked */
259
260 #define raidunit(x) DISKUNIT(x)
261 int numraid = 0;
262
263 extern struct cfdriver raid_cd;
264 CFATTACH_DECL(raid, sizeof(struct raid_softc),
265 raid_match, raid_attach, raid_detach, NULL);
266
267 /*
268 * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
269 * Be aware that large numbers can allow the driver to consume a lot of
270 * kernel memory, especially on writes, and in degraded mode reads.
271 *
272 * For example: with a stripe width of 64 blocks (32k) and 5 disks,
273 * a single 64K write will typically require 64K for the old data,
274 * 64K for the old parity, and 64K for the new parity, for a total
275 * of 192K (if the parity buffer is not re-used immediately).
276 * Even it if is used immediately, that's still 128K, which when multiplied
277 * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
278 *
279 * Now in degraded mode, for example, a 64K read on the above setup may
280 * require data reconstruction, which will require *all* of the 4 remaining
281 * disks to participate -- 4 * 32K/disk == 128K again.
282 */
283
284 #ifndef RAIDOUTSTANDING
285 #define RAIDOUTSTANDING 6
286 #endif
287
288 #define RAIDLABELDEV(dev) \
289 (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
290
291 /* declared here, and made public, for the benefit of KVM stuff.. */
292 struct raid_softc *raid_softc;
293
294 static void raidgetdefaultlabel(RF_Raid_t *, struct raid_softc *,
295 struct disklabel *);
296 static void raidgetdisklabel(dev_t);
297 static void raidmakedisklabel(struct raid_softc *);
298
299 static int raidlock(struct raid_softc *);
300 static void raidunlock(struct raid_softc *);
301
302 static void rf_markalldirty(RF_Raid_t *);
303
304 void rf_ReconThread(struct rf_recon_req *);
305 void rf_RewriteParityThread(RF_Raid_t *raidPtr);
306 void rf_CopybackThread(RF_Raid_t *raidPtr);
307 void rf_ReconstructInPlaceThread(struct rf_recon_req *);
308 int rf_autoconfig(struct device *self);
309 void rf_buildroothack(RF_ConfigSet_t *);
310
311 RF_AutoConfig_t *rf_find_raid_components(void);
312 RF_ConfigSet_t *rf_create_auto_sets(RF_AutoConfig_t *);
313 static int rf_does_it_fit(RF_ConfigSet_t *,RF_AutoConfig_t *);
314 static int rf_reasonable_label(RF_ComponentLabel_t *);
315 void rf_create_configuration(RF_AutoConfig_t *,RF_Config_t *, RF_Raid_t *);
316 int rf_set_autoconfig(RF_Raid_t *, int);
317 int rf_set_rootpartition(RF_Raid_t *, int);
318 void rf_release_all_vps(RF_ConfigSet_t *);
319 void rf_cleanup_config_set(RF_ConfigSet_t *);
320 int rf_have_enough_components(RF_ConfigSet_t *);
321 int rf_auto_config_set(RF_ConfigSet_t *, int *);
322
323 static int raidautoconfig = 0; /* Debugging, mostly. Set to 0 to not
324 allow autoconfig to take place.
325 Note that this is overridden by having
326 RAID_AUTOCONFIG as an option in the
327 kernel config file. */
328
329 struct RF_Pools_s rf_pools;
330
331 void
332 raidattach(int num)
333 {
334 int raidID;
335 int i, rc;
336
337 #ifdef DEBUG
338 printf("raidattach: Asked for %d units\n", num);
339 #endif
340
341 if (num <= 0) {
342 #ifdef DIAGNOSTIC
343 panic("raidattach: count <= 0");
344 #endif
345 return;
346 }
347 /* This is where all the initialization stuff gets done. */
348
349 numraid = num;
350
351 /* Make some space for requested number of units... */
352
353 RF_Malloc(raidPtrs, num * sizeof(RF_Raid_t *), (RF_Raid_t **));
354 if (raidPtrs == NULL) {
355 panic("raidPtrs is NULL!!");
356 }
357
358 rf_mutex_init(&rf_sparet_wait_mutex);
359
360 rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
361
362 for (i = 0; i < num; i++)
363 raidPtrs[i] = NULL;
364 rc = rf_BootRaidframe();
365 if (rc == 0)
366 printf("Kernelized RAIDframe activated\n");
367 else
368 panic("Serious error booting RAID!!");
369
370 /* put together some datastructures like the CCD device does.. This
371 * lets us lock the device and what-not when it gets opened. */
372
373 raid_softc = (struct raid_softc *)
374 malloc(num * sizeof(struct raid_softc),
375 M_RAIDFRAME, M_NOWAIT);
376 if (raid_softc == NULL) {
377 printf("WARNING: no memory for RAIDframe driver\n");
378 return;
379 }
380
381 memset(raid_softc, 0, num * sizeof(struct raid_softc));
382
383 for (raidID = 0; raidID < num; raidID++) {
384 bufq_alloc(&raid_softc[raidID].buf_queue, "fcfs", 0);
385
386 RF_Malloc(raidPtrs[raidID], sizeof(RF_Raid_t),
387 (RF_Raid_t *));
388 if (raidPtrs[raidID] == NULL) {
389 printf("WARNING: raidPtrs[%d] is NULL\n", raidID);
390 numraid = raidID;
391 return;
392 }
393 }
394
395 if (config_cfattach_attach(raid_cd.cd_name, &raid_ca)) {
396 printf("config_cfattach_attach failed?\n");
397 }
398
399 #ifdef RAID_AUTOCONFIG
400 raidautoconfig = 1;
401 #endif
402
403 /*
404 * Register a finalizer which will be used to auto-config RAID
405 * sets once all real hardware devices have been found.
406 */
407 if (config_finalize_register(NULL, rf_autoconfig) != 0)
408 printf("WARNING: unable to register RAIDframe finalizer\n");
409 }
410
411 int
412 rf_autoconfig(struct device *self)
413 {
414 RF_AutoConfig_t *ac_list;
415 RF_ConfigSet_t *config_sets;
416 int i;
417
418 if (raidautoconfig == 0)
419 return (0);
420
421 /* XXX This code can only be run once. */
422 raidautoconfig = 0;
423
424 /* 1. locate all RAID components on the system */
425 #ifdef DEBUG
426 printf("Searching for RAID components...\n");
427 #endif
428 ac_list = rf_find_raid_components();
429
430 /* 2. Sort them into their respective sets. */
431 config_sets = rf_create_auto_sets(ac_list);
432
433 /*
434 * 3. Evaluate each set andconfigure the valid ones.
435 * This gets done in rf_buildroothack().
436 */
437 rf_buildroothack(config_sets);
438
439 for (i = 0; i < numraid; i++)
440 if (raidPtrs[i] != NULL && raidPtrs[i]->valid)
441 dkwedge_discover(&raid_softc[i].sc_dkdev);
442
443 return 1;
444 }
445
446 void
447 rf_buildroothack(RF_ConfigSet_t *config_sets)
448 {
449 RF_ConfigSet_t *cset;
450 RF_ConfigSet_t *next_cset;
451 int retcode;
452 int raidID;
453 int rootID;
454 int num_root;
455
456 rootID = 0;
457 num_root = 0;
458 cset = config_sets;
459 while(cset != NULL ) {
460 next_cset = cset->next;
461 if (rf_have_enough_components(cset) &&
462 cset->ac->clabel->autoconfigure==1) {
463 retcode = rf_auto_config_set(cset,&raidID);
464 if (!retcode) {
465 #ifdef DEBUG
466 printf("raid%d: configured ok\n", raidID);
467 #endif
468 if (cset->rootable) {
469 rootID = raidID;
470 num_root++;
471 }
472 } else {
473 /* The autoconfig didn't work :( */
474 #if DEBUG
475 printf("Autoconfig failed with code %d for raid%d\n", retcode, raidID);
476 #endif
477 rf_release_all_vps(cset);
478 }
479 } else {
480 #ifdef DEBUG
481 printf("raid%d: not enough components\n", raidID);
482 #endif
483 /* we're not autoconfiguring this set...
484 release the associated resources */
485 rf_release_all_vps(cset);
486 }
487 /* cleanup */
488 rf_cleanup_config_set(cset);
489 cset = next_cset;
490 }
491
492 /* we found something bootable... */
493
494 if (num_root == 1) {
495 booted_device = raid_softc[rootID].sc_dev;
496 } else if (num_root > 1) {
497 /* we can't guess.. require the user to answer... */
498 boothowto |= RB_ASKNAME;
499 }
500 }
501
502
503 int
504 raidsize(dev_t dev)
505 {
506 struct raid_softc *rs;
507 struct disklabel *lp;
508 int part, unit, omask, size;
509
510 unit = raidunit(dev);
511 if (unit >= numraid)
512 return (-1);
513 rs = &raid_softc[unit];
514
515 if ((rs->sc_flags & RAIDF_INITED) == 0)
516 return (-1);
517
518 part = DISKPART(dev);
519 omask = rs->sc_dkdev.dk_openmask & (1 << part);
520 lp = rs->sc_dkdev.dk_label;
521
522 if (omask == 0 && raidopen(dev, 0, S_IFBLK, curlwp))
523 return (-1);
524
525 if (lp->d_partitions[part].p_fstype != FS_SWAP)
526 size = -1;
527 else
528 size = lp->d_partitions[part].p_size *
529 (lp->d_secsize / DEV_BSIZE);
530
531 if (omask == 0 && raidclose(dev, 0, S_IFBLK, curlwp))
532 return (-1);
533
534 return (size);
535
536 }
537
538 int
539 raiddump(dev_t dev, daddr_t blkno, caddr_t va, size_t size)
540 {
541 /* Not implemented. */
542 return ENXIO;
543 }
544 /* ARGSUSED */
545 int
546 raidopen(dev_t dev, int flags, int fmt, struct lwp *l)
547 {
548 int unit = raidunit(dev);
549 struct raid_softc *rs;
550 struct disklabel *lp;
551 int part, pmask;
552 int error = 0;
553
554 if (unit >= numraid)
555 return (ENXIO);
556 rs = &raid_softc[unit];
557
558 if ((error = raidlock(rs)) != 0)
559 return (error);
560 lp = rs->sc_dkdev.dk_label;
561
562 part = DISKPART(dev);
563
564 /*
565 * If there are wedges, and this is not RAW_PART, then we
566 * need to fail.
567 */
568 if (rs->sc_dkdev.dk_nwedges != 0 && part != RAW_PART) {
569 error = EBUSY;
570 goto bad;
571 }
572 pmask = (1 << part);
573
574 if ((rs->sc_flags & RAIDF_INITED) &&
575 (rs->sc_dkdev.dk_openmask == 0))
576 raidgetdisklabel(dev);
577
578 /* make sure that this partition exists */
579
580 if (part != RAW_PART) {
581 if (((rs->sc_flags & RAIDF_INITED) == 0) ||
582 ((part >= lp->d_npartitions) ||
583 (lp->d_partitions[part].p_fstype == FS_UNUSED))) {
584 error = ENXIO;
585 goto bad;
586 }
587 }
588 /* Prevent this unit from being unconfigured while open. */
589 switch (fmt) {
590 case S_IFCHR:
591 rs->sc_dkdev.dk_copenmask |= pmask;
592 break;
593
594 case S_IFBLK:
595 rs->sc_dkdev.dk_bopenmask |= pmask;
596 break;
597 }
598
599 if ((rs->sc_dkdev.dk_openmask == 0) &&
600 ((rs->sc_flags & RAIDF_INITED) != 0)) {
601 /* First one... mark things as dirty... Note that we *MUST*
602 have done a configure before this. I DO NOT WANT TO BE
603 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
604 THAT THEY BELONG TOGETHER!!!!! */
605 /* XXX should check to see if we're only open for reading
606 here... If so, we needn't do this, but then need some
607 other way of keeping track of what's happened.. */
608
609 rf_markalldirty( raidPtrs[unit] );
610 }
611
612
613 rs->sc_dkdev.dk_openmask =
614 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
615
616 bad:
617 raidunlock(rs);
618
619 return (error);
620
621
622 }
623 /* ARGSUSED */
624 int
625 raidclose(dev_t dev, int flags, int fmt, struct lwp *l)
626 {
627 int unit = raidunit(dev);
628 struct cfdata *cf;
629 struct raid_softc *rs;
630 int error = 0;
631 int part;
632
633 if (unit >= numraid)
634 return (ENXIO);
635 rs = &raid_softc[unit];
636
637 if ((error = raidlock(rs)) != 0)
638 return (error);
639
640 part = DISKPART(dev);
641
642 /* ...that much closer to allowing unconfiguration... */
643 switch (fmt) {
644 case S_IFCHR:
645 rs->sc_dkdev.dk_copenmask &= ~(1 << part);
646 break;
647
648 case S_IFBLK:
649 rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
650 break;
651 }
652 rs->sc_dkdev.dk_openmask =
653 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
654
655 if ((rs->sc_dkdev.dk_openmask == 0) &&
656 ((rs->sc_flags & RAIDF_INITED) != 0)) {
657 /* Last one... device is not unconfigured yet.
658 Device shutdown has taken care of setting the
659 clean bits if RAIDF_INITED is not set
660 mark things as clean... */
661
662 rf_update_component_labels(raidPtrs[unit],
663 RF_FINAL_COMPONENT_UPDATE);
664 if (doing_shutdown) {
665 /* last one, and we're going down, so
666 lights out for this RAID set too. */
667 error = rf_Shutdown(raidPtrs[unit]);
668
669 /* It's no longer initialized... */
670 rs->sc_flags &= ~RAIDF_INITED;
671
672 /* detach the device */
673
674 cf = device_cfdata(rs->sc_dev);
675 error = config_detach(rs->sc_dev, DETACH_QUIET);
676 free(cf, M_RAIDFRAME);
677
678 /* Detach the disk. */
679 pseudo_disk_detach(&rs->sc_dkdev);
680 }
681 }
682
683 raidunlock(rs);
684 return (0);
685
686 }
687
688 void
689 raidstrategy(struct buf *bp)
690 {
691 int s;
692
693 unsigned int raidID = raidunit(bp->b_dev);
694 RF_Raid_t *raidPtr;
695 struct raid_softc *rs = &raid_softc[raidID];
696 int wlabel;
697
698 if ((rs->sc_flags & RAIDF_INITED) ==0) {
699 bp->b_error = ENXIO;
700 bp->b_flags |= B_ERROR;
701 goto done;
702 }
703 if (raidID >= numraid || !raidPtrs[raidID]) {
704 bp->b_error = ENODEV;
705 bp->b_flags |= B_ERROR;
706 goto done;
707 }
708 raidPtr = raidPtrs[raidID];
709 if (!raidPtr->valid) {
710 bp->b_error = ENODEV;
711 bp->b_flags |= B_ERROR;
712 goto done;
713 }
714 if (bp->b_bcount == 0) {
715 db1_printf(("b_bcount is zero..\n"));
716 goto done;
717 }
718
719 /*
720 * Do bounds checking and adjust transfer. If there's an
721 * error, the bounds check will flag that for us.
722 */
723
724 wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
725 if (DISKPART(bp->b_dev) == RAW_PART) {
726 uint64_t size; /* device size in DEV_BSIZE unit */
727
728 if (raidPtr->logBytesPerSector > DEV_BSHIFT) {
729 size = raidPtr->totalSectors <<
730 (raidPtr->logBytesPerSector - DEV_BSHIFT);
731 } else {
732 size = raidPtr->totalSectors >>
733 (DEV_BSHIFT - raidPtr->logBytesPerSector);
734 }
735 if (bounds_check_with_mediasize(bp, DEV_BSIZE, size) <= 0) {
736 goto done;
737 }
738 } else {
739 if (bounds_check_with_label(&rs->sc_dkdev, bp, wlabel) <= 0) {
740 db1_printf(("Bounds check failed!!:%d %d\n",
741 (int) bp->b_blkno, (int) wlabel));
742 goto done;
743 }
744 }
745 s = splbio();
746
747 bp->b_resid = 0;
748
749 /* stuff it onto our queue */
750 BUFQ_PUT(rs->buf_queue, bp);
751
752 /* scheduled the IO to happen at the next convenient time */
753 wakeup(&(raidPtrs[raidID]->iodone));
754
755 splx(s);
756 return;
757
758 done:
759 bp->b_resid = bp->b_bcount;
760 biodone(bp);
761 }
762 /* ARGSUSED */
763 int
764 raidread(dev_t dev, struct uio *uio, int flags)
765 {
766 int unit = raidunit(dev);
767 struct raid_softc *rs;
768
769 if (unit >= numraid)
770 return (ENXIO);
771 rs = &raid_softc[unit];
772
773 if ((rs->sc_flags & RAIDF_INITED) == 0)
774 return (ENXIO);
775
776 return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
777
778 }
779 /* ARGSUSED */
780 int
781 raidwrite(dev_t dev, struct uio *uio, int flags)
782 {
783 int unit = raidunit(dev);
784 struct raid_softc *rs;
785
786 if (unit >= numraid)
787 return (ENXIO);
788 rs = &raid_softc[unit];
789
790 if ((rs->sc_flags & RAIDF_INITED) == 0)
791 return (ENXIO);
792
793 return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
794
795 }
796
797 int
798 raidioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct lwp *l)
799 {
800 int unit = raidunit(dev);
801 int error = 0;
802 int part, pmask;
803 struct cfdata *cf;
804 struct raid_softc *rs;
805 RF_Config_t *k_cfg, *u_cfg;
806 RF_Raid_t *raidPtr;
807 RF_RaidDisk_t *diskPtr;
808 RF_AccTotals_t *totals;
809 RF_DeviceConfig_t *d_cfg, **ucfgp;
810 u_char *specific_buf;
811 int retcode = 0;
812 int column;
813 int raidid;
814 struct rf_recon_req *rrcopy, *rr;
815 RF_ComponentLabel_t *clabel;
816 RF_ComponentLabel_t *ci_label;
817 RF_ComponentLabel_t **clabel_ptr;
818 RF_SingleComponent_t *sparePtr,*componentPtr;
819 RF_SingleComponent_t component;
820 RF_ProgressInfo_t progressInfo, **progressInfoPtr;
821 int i, j, d;
822 #ifdef __HAVE_OLD_DISKLABEL
823 struct disklabel newlabel;
824 #endif
825 struct dkwedge_info *dkw;
826
827 if (unit >= numraid)
828 return (ENXIO);
829 rs = &raid_softc[unit];
830 raidPtr = raidPtrs[unit];
831
832 db1_printf(("raidioctl: %d %d %d %d\n", (int) dev,
833 (int) DISKPART(dev), (int) unit, (int) cmd));
834
835 /* Must be open for writes for these commands... */
836 switch (cmd) {
837 #ifdef DIOCGSECTORSIZE
838 case DIOCGSECTORSIZE:
839 *(u_int *)data = raidPtr->bytesPerSector;
840 return 0;
841 case DIOCGMEDIASIZE:
842 *(off_t *)data =
843 (off_t)raidPtr->totalSectors * raidPtr->bytesPerSector;
844 return 0;
845 #endif
846 case DIOCSDINFO:
847 case DIOCWDINFO:
848 #ifdef __HAVE_OLD_DISKLABEL
849 case ODIOCWDINFO:
850 case ODIOCSDINFO:
851 #endif
852 case DIOCWLABEL:
853 case DIOCAWEDGE:
854 case DIOCDWEDGE:
855 if ((flag & FWRITE) == 0)
856 return (EBADF);
857 }
858
859 /* Must be initialized for these... */
860 switch (cmd) {
861 case DIOCGDINFO:
862 case DIOCSDINFO:
863 case DIOCWDINFO:
864 #ifdef __HAVE_OLD_DISKLABEL
865 case ODIOCGDINFO:
866 case ODIOCWDINFO:
867 case ODIOCSDINFO:
868 case ODIOCGDEFLABEL:
869 #endif
870 case DIOCGPART:
871 case DIOCWLABEL:
872 case DIOCGDEFLABEL:
873 case DIOCAWEDGE:
874 case DIOCDWEDGE:
875 case DIOCLWEDGES:
876 case RAIDFRAME_SHUTDOWN:
877 case RAIDFRAME_REWRITEPARITY:
878 case RAIDFRAME_GET_INFO:
879 case RAIDFRAME_RESET_ACCTOTALS:
880 case RAIDFRAME_GET_ACCTOTALS:
881 case RAIDFRAME_KEEP_ACCTOTALS:
882 case RAIDFRAME_GET_SIZE:
883 case RAIDFRAME_FAIL_DISK:
884 case RAIDFRAME_COPYBACK:
885 case RAIDFRAME_CHECK_RECON_STATUS:
886 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
887 case RAIDFRAME_GET_COMPONENT_LABEL:
888 case RAIDFRAME_SET_COMPONENT_LABEL:
889 case RAIDFRAME_ADD_HOT_SPARE:
890 case RAIDFRAME_REMOVE_HOT_SPARE:
891 case RAIDFRAME_INIT_LABELS:
892 case RAIDFRAME_REBUILD_IN_PLACE:
893 case RAIDFRAME_CHECK_PARITY:
894 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
895 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
896 case RAIDFRAME_CHECK_COPYBACK_STATUS:
897 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
898 case RAIDFRAME_SET_AUTOCONFIG:
899 case RAIDFRAME_SET_ROOT:
900 case RAIDFRAME_DELETE_COMPONENT:
901 case RAIDFRAME_INCORPORATE_HOT_SPARE:
902 if ((rs->sc_flags & RAIDF_INITED) == 0)
903 return (ENXIO);
904 }
905
906 switch (cmd) {
907
908 /* configure the system */
909 case RAIDFRAME_CONFIGURE:
910
911 if (raidPtr->valid) {
912 /* There is a valid RAID set running on this unit! */
913 printf("raid%d: Device already configured!\n",unit);
914 return(EINVAL);
915 }
916
917 /* copy-in the configuration information */
918 /* data points to a pointer to the configuration structure */
919
920 u_cfg = *((RF_Config_t **) data);
921 RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
922 if (k_cfg == NULL) {
923 return (ENOMEM);
924 }
925 retcode = copyin(u_cfg, k_cfg, sizeof(RF_Config_t));
926 if (retcode) {
927 RF_Free(k_cfg, sizeof(RF_Config_t));
928 db1_printf(("rf_ioctl: retcode=%d copyin.1\n",
929 retcode));
930 return (retcode);
931 }
932 /* allocate a buffer for the layout-specific data, and copy it
933 * in */
934 if (k_cfg->layoutSpecificSize) {
935 if (k_cfg->layoutSpecificSize > 10000) {
936 /* sanity check */
937 RF_Free(k_cfg, sizeof(RF_Config_t));
938 return (EINVAL);
939 }
940 RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
941 (u_char *));
942 if (specific_buf == NULL) {
943 RF_Free(k_cfg, sizeof(RF_Config_t));
944 return (ENOMEM);
945 }
946 retcode = copyin(k_cfg->layoutSpecific, specific_buf,
947 k_cfg->layoutSpecificSize);
948 if (retcode) {
949 RF_Free(k_cfg, sizeof(RF_Config_t));
950 RF_Free(specific_buf,
951 k_cfg->layoutSpecificSize);
952 db1_printf(("rf_ioctl: retcode=%d copyin.2\n",
953 retcode));
954 return (retcode);
955 }
956 } else
957 specific_buf = NULL;
958 k_cfg->layoutSpecific = specific_buf;
959
960 /* should do some kind of sanity check on the configuration.
961 * Store the sum of all the bytes in the last byte? */
962
963 /* configure the system */
964
965 /*
966 * Clear the entire RAID descriptor, just to make sure
967 * there is no stale data left in the case of a
968 * reconfiguration
969 */
970 memset((char *) raidPtr, 0, sizeof(RF_Raid_t));
971 raidPtr->raidid = unit;
972
973 retcode = rf_Configure(raidPtr, k_cfg, NULL);
974
975 if (retcode == 0) {
976
977 /* allow this many simultaneous IO's to
978 this RAID device */
979 raidPtr->openings = RAIDOUTSTANDING;
980
981 raidinit(raidPtr);
982 rf_markalldirty(raidPtr);
983 }
984 /* free the buffers. No return code here. */
985 if (k_cfg->layoutSpecificSize) {
986 RF_Free(specific_buf, k_cfg->layoutSpecificSize);
987 }
988 RF_Free(k_cfg, sizeof(RF_Config_t));
989
990 return (retcode);
991
992 /* shutdown the system */
993 case RAIDFRAME_SHUTDOWN:
994
995 if ((error = raidlock(rs)) != 0)
996 return (error);
997
998 /*
999 * If somebody has a partition mounted, we shouldn't
1000 * shutdown.
1001 */
1002
1003 part = DISKPART(dev);
1004 pmask = (1 << part);
1005 if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
1006 ((rs->sc_dkdev.dk_bopenmask & pmask) &&
1007 (rs->sc_dkdev.dk_copenmask & pmask))) {
1008 raidunlock(rs);
1009 return (EBUSY);
1010 }
1011
1012 retcode = rf_Shutdown(raidPtr);
1013
1014 /* It's no longer initialized... */
1015 rs->sc_flags &= ~RAIDF_INITED;
1016
1017 /* free the pseudo device attach bits */
1018
1019 cf = device_cfdata(rs->sc_dev);
1020 /* XXX this causes us to not return any errors
1021 from the above call to rf_Shutdown() */
1022 retcode = config_detach(rs->sc_dev, DETACH_QUIET);
1023 free(cf, M_RAIDFRAME);
1024
1025 /* Detach the disk. */
1026 pseudo_disk_detach(&rs->sc_dkdev);
1027
1028 raidunlock(rs);
1029
1030 return (retcode);
1031 case RAIDFRAME_GET_COMPONENT_LABEL:
1032 clabel_ptr = (RF_ComponentLabel_t **) data;
1033 /* need to read the component label for the disk indicated
1034 by row,column in clabel */
1035
1036 /* For practice, let's get it directly fromdisk, rather
1037 than from the in-core copy */
1038 RF_Malloc( clabel, sizeof( RF_ComponentLabel_t ),
1039 (RF_ComponentLabel_t *));
1040 if (clabel == NULL)
1041 return (ENOMEM);
1042
1043 retcode = copyin( *clabel_ptr, clabel,
1044 sizeof(RF_ComponentLabel_t));
1045
1046 if (retcode) {
1047 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
1048 return(retcode);
1049 }
1050
1051 clabel->row = 0; /* Don't allow looking at anything else.*/
1052
1053 column = clabel->column;
1054
1055 if ((column < 0) || (column >= raidPtr->numCol +
1056 raidPtr->numSpare)) {
1057 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
1058 return(EINVAL);
1059 }
1060
1061 retcode = raidread_component_label(raidPtr->Disks[column].dev,
1062 raidPtr->raid_cinfo[column].ci_vp,
1063 clabel );
1064
1065 if (retcode == 0) {
1066 retcode = copyout(clabel, *clabel_ptr,
1067 sizeof(RF_ComponentLabel_t));
1068 }
1069 RF_Free(clabel, sizeof(RF_ComponentLabel_t));
1070 return (retcode);
1071
1072 case RAIDFRAME_SET_COMPONENT_LABEL:
1073 clabel = (RF_ComponentLabel_t *) data;
1074
1075 /* XXX check the label for valid stuff... */
1076 /* Note that some things *should not* get modified --
1077 the user should be re-initing the labels instead of
1078 trying to patch things.
1079 */
1080
1081 raidid = raidPtr->raidid;
1082 #if DEBUG
1083 printf("raid%d: Got component label:\n", raidid);
1084 printf("raid%d: Version: %d\n", raidid, clabel->version);
1085 printf("raid%d: Serial Number: %d\n", raidid, clabel->serial_number);
1086 printf("raid%d: Mod counter: %d\n", raidid, clabel->mod_counter);
1087 printf("raid%d: Column: %d\n", raidid, clabel->column);
1088 printf("raid%d: Num Columns: %d\n", raidid, clabel->num_columns);
1089 printf("raid%d: Clean: %d\n", raidid, clabel->clean);
1090 printf("raid%d: Status: %d\n", raidid, clabel->status);
1091 #endif
1092 clabel->row = 0;
1093 column = clabel->column;
1094
1095 if ((column < 0) || (column >= raidPtr->numCol)) {
1096 return(EINVAL);
1097 }
1098
1099 /* XXX this isn't allowed to do anything for now :-) */
1100
1101 /* XXX and before it is, we need to fill in the rest
1102 of the fields!?!?!?! */
1103 #if 0
1104 raidwrite_component_label(
1105 raidPtr->Disks[column].dev,
1106 raidPtr->raid_cinfo[column].ci_vp,
1107 clabel );
1108 #endif
1109 return (0);
1110
1111 case RAIDFRAME_INIT_LABELS:
1112 clabel = (RF_ComponentLabel_t *) data;
1113 /*
1114 we only want the serial number from
1115 the above. We get all the rest of the information
1116 from the config that was used to create this RAID
1117 set.
1118 */
1119
1120 raidPtr->serial_number = clabel->serial_number;
1121
1122 RF_Malloc(ci_label, sizeof(RF_ComponentLabel_t),
1123 (RF_ComponentLabel_t *));
1124 if (ci_label == NULL)
1125 return (ENOMEM);
1126
1127 raid_init_component_label(raidPtr, ci_label);
1128 ci_label->serial_number = clabel->serial_number;
1129 ci_label->row = 0; /* we dont' pretend to support more */
1130
1131 for(column=0;column<raidPtr->numCol;column++) {
1132 diskPtr = &raidPtr->Disks[column];
1133 if (!RF_DEAD_DISK(diskPtr->status)) {
1134 ci_label->partitionSize = diskPtr->partitionSize;
1135 ci_label->column = column;
1136 raidwrite_component_label(
1137 raidPtr->Disks[column].dev,
1138 raidPtr->raid_cinfo[column].ci_vp,
1139 ci_label );
1140 }
1141 }
1142 RF_Free(ci_label, sizeof(RF_ComponentLabel_t));
1143
1144 return (retcode);
1145 case RAIDFRAME_SET_AUTOCONFIG:
1146 d = rf_set_autoconfig(raidPtr, *(int *) data);
1147 printf("raid%d: New autoconfig value is: %d\n",
1148 raidPtr->raidid, d);
1149 *(int *) data = d;
1150 return (retcode);
1151
1152 case RAIDFRAME_SET_ROOT:
1153 d = rf_set_rootpartition(raidPtr, *(int *) data);
1154 printf("raid%d: New rootpartition value is: %d\n",
1155 raidPtr->raidid, d);
1156 *(int *) data = d;
1157 return (retcode);
1158
1159 /* initialize all parity */
1160 case RAIDFRAME_REWRITEPARITY:
1161
1162 if (raidPtr->Layout.map->faultsTolerated == 0) {
1163 /* Parity for RAID 0 is trivially correct */
1164 raidPtr->parity_good = RF_RAID_CLEAN;
1165 return(0);
1166 }
1167
1168 if (raidPtr->parity_rewrite_in_progress == 1) {
1169 /* Re-write is already in progress! */
1170 return(EINVAL);
1171 }
1172
1173 retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
1174 rf_RewriteParityThread,
1175 raidPtr,"raid_parity");
1176 return (retcode);
1177
1178
1179 case RAIDFRAME_ADD_HOT_SPARE:
1180 sparePtr = (RF_SingleComponent_t *) data;
1181 memcpy( &component, sparePtr, sizeof(RF_SingleComponent_t));
1182 retcode = rf_add_hot_spare(raidPtr, &component);
1183 return(retcode);
1184
1185 case RAIDFRAME_REMOVE_HOT_SPARE:
1186 return(retcode);
1187
1188 case RAIDFRAME_DELETE_COMPONENT:
1189 componentPtr = (RF_SingleComponent_t *)data;
1190 memcpy( &component, componentPtr,
1191 sizeof(RF_SingleComponent_t));
1192 retcode = rf_delete_component(raidPtr, &component);
1193 return(retcode);
1194
1195 case RAIDFRAME_INCORPORATE_HOT_SPARE:
1196 componentPtr = (RF_SingleComponent_t *)data;
1197 memcpy( &component, componentPtr,
1198 sizeof(RF_SingleComponent_t));
1199 retcode = rf_incorporate_hot_spare(raidPtr, &component);
1200 return(retcode);
1201
1202 case RAIDFRAME_REBUILD_IN_PLACE:
1203
1204 if (raidPtr->Layout.map->faultsTolerated == 0) {
1205 /* Can't do this on a RAID 0!! */
1206 return(EINVAL);
1207 }
1208
1209 if (raidPtr->recon_in_progress == 1) {
1210 /* a reconstruct is already in progress! */
1211 return(EINVAL);
1212 }
1213
1214 componentPtr = (RF_SingleComponent_t *) data;
1215 memcpy( &component, componentPtr,
1216 sizeof(RF_SingleComponent_t));
1217 component.row = 0; /* we don't support any more */
1218 column = component.column;
1219
1220 if ((column < 0) || (column >= raidPtr->numCol)) {
1221 return(EINVAL);
1222 }
1223
1224 RF_LOCK_MUTEX(raidPtr->mutex);
1225 if ((raidPtr->Disks[column].status == rf_ds_optimal) &&
1226 (raidPtr->numFailures > 0)) {
1227 /* XXX 0 above shouldn't be constant!!! */
1228 /* some component other than this has failed.
1229 Let's not make things worse than they already
1230 are... */
1231 printf("raid%d: Unable to reconstruct to disk at:\n",
1232 raidPtr->raidid);
1233 printf("raid%d: Col: %d Too many failures.\n",
1234 raidPtr->raidid, column);
1235 RF_UNLOCK_MUTEX(raidPtr->mutex);
1236 return (EINVAL);
1237 }
1238 if (raidPtr->Disks[column].status ==
1239 rf_ds_reconstructing) {
1240 printf("raid%d: Unable to reconstruct to disk at:\n",
1241 raidPtr->raidid);
1242 printf("raid%d: Col: %d Reconstruction already occuring!\n", raidPtr->raidid, column);
1243
1244 RF_UNLOCK_MUTEX(raidPtr->mutex);
1245 return (EINVAL);
1246 }
1247 if (raidPtr->Disks[column].status == rf_ds_spared) {
1248 RF_UNLOCK_MUTEX(raidPtr->mutex);
1249 return (EINVAL);
1250 }
1251 RF_UNLOCK_MUTEX(raidPtr->mutex);
1252
1253 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1254 if (rrcopy == NULL)
1255 return(ENOMEM);
1256
1257 rrcopy->raidPtr = (void *) raidPtr;
1258 rrcopy->col = column;
1259
1260 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1261 rf_ReconstructInPlaceThread,
1262 rrcopy,"raid_reconip");
1263 return(retcode);
1264
1265 case RAIDFRAME_GET_INFO:
1266 if (!raidPtr->valid)
1267 return (ENODEV);
1268 ucfgp = (RF_DeviceConfig_t **) data;
1269 RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t),
1270 (RF_DeviceConfig_t *));
1271 if (d_cfg == NULL)
1272 return (ENOMEM);
1273 d_cfg->rows = 1; /* there is only 1 row now */
1274 d_cfg->cols = raidPtr->numCol;
1275 d_cfg->ndevs = raidPtr->numCol;
1276 if (d_cfg->ndevs >= RF_MAX_DISKS) {
1277 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1278 return (ENOMEM);
1279 }
1280 d_cfg->nspares = raidPtr->numSpare;
1281 if (d_cfg->nspares >= RF_MAX_DISKS) {
1282 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1283 return (ENOMEM);
1284 }
1285 d_cfg->maxqdepth = raidPtr->maxQueueDepth;
1286 d = 0;
1287 for (j = 0; j < d_cfg->cols; j++) {
1288 d_cfg->devs[d] = raidPtr->Disks[j];
1289 d++;
1290 }
1291 for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) {
1292 d_cfg->spares[i] = raidPtr->Disks[j];
1293 }
1294 retcode = copyout(d_cfg, *ucfgp, sizeof(RF_DeviceConfig_t));
1295 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1296
1297 return (retcode);
1298
1299 case RAIDFRAME_CHECK_PARITY:
1300 *(int *) data = raidPtr->parity_good;
1301 return (0);
1302
1303 case RAIDFRAME_RESET_ACCTOTALS:
1304 memset(&raidPtr->acc_totals, 0, sizeof(raidPtr->acc_totals));
1305 return (0);
1306
1307 case RAIDFRAME_GET_ACCTOTALS:
1308 totals = (RF_AccTotals_t *) data;
1309 *totals = raidPtr->acc_totals;
1310 return (0);
1311
1312 case RAIDFRAME_KEEP_ACCTOTALS:
1313 raidPtr->keep_acc_totals = *(int *)data;
1314 return (0);
1315
1316 case RAIDFRAME_GET_SIZE:
1317 *(int *) data = raidPtr->totalSectors;
1318 return (0);
1319
1320 /* fail a disk & optionally start reconstruction */
1321 case RAIDFRAME_FAIL_DISK:
1322
1323 if (raidPtr->Layout.map->faultsTolerated == 0) {
1324 /* Can't do this on a RAID 0!! */
1325 return(EINVAL);
1326 }
1327
1328 rr = (struct rf_recon_req *) data;
1329 rr->row = 0;
1330 if (rr->col < 0 || rr->col >= raidPtr->numCol)
1331 return (EINVAL);
1332
1333
1334 RF_LOCK_MUTEX(raidPtr->mutex);
1335 if (raidPtr->status == rf_rs_reconstructing) {
1336 /* you can't fail a disk while we're reconstructing! */
1337 /* XXX wrong for RAID6 */
1338 RF_UNLOCK_MUTEX(raidPtr->mutex);
1339 return (EINVAL);
1340 }
1341 if ((raidPtr->Disks[rr->col].status ==
1342 rf_ds_optimal) && (raidPtr->numFailures > 0)) {
1343 /* some other component has failed. Let's not make
1344 things worse. XXX wrong for RAID6 */
1345 RF_UNLOCK_MUTEX(raidPtr->mutex);
1346 return (EINVAL);
1347 }
1348 if (raidPtr->Disks[rr->col].status == rf_ds_spared) {
1349 /* Can't fail a spared disk! */
1350 RF_UNLOCK_MUTEX(raidPtr->mutex);
1351 return (EINVAL);
1352 }
1353 RF_UNLOCK_MUTEX(raidPtr->mutex);
1354
1355 /* make a copy of the recon request so that we don't rely on
1356 * the user's buffer */
1357 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1358 if (rrcopy == NULL)
1359 return(ENOMEM);
1360 memcpy(rrcopy, rr, sizeof(*rr));
1361 rrcopy->raidPtr = (void *) raidPtr;
1362
1363 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1364 rf_ReconThread,
1365 rrcopy,"raid_recon");
1366 return (0);
1367
1368 /* invoke a copyback operation after recon on whatever disk
1369 * needs it, if any */
1370 case RAIDFRAME_COPYBACK:
1371
1372 if (raidPtr->Layout.map->faultsTolerated == 0) {
1373 /* This makes no sense on a RAID 0!! */
1374 return(EINVAL);
1375 }
1376
1377 if (raidPtr->copyback_in_progress == 1) {
1378 /* Copyback is already in progress! */
1379 return(EINVAL);
1380 }
1381
1382 retcode = RF_CREATE_THREAD(raidPtr->copyback_thread,
1383 rf_CopybackThread,
1384 raidPtr,"raid_copyback");
1385 return (retcode);
1386
1387 /* return the percentage completion of reconstruction */
1388 case RAIDFRAME_CHECK_RECON_STATUS:
1389 if (raidPtr->Layout.map->faultsTolerated == 0) {
1390 /* This makes no sense on a RAID 0, so tell the
1391 user it's done. */
1392 *(int *) data = 100;
1393 return(0);
1394 }
1395 if (raidPtr->status != rf_rs_reconstructing)
1396 *(int *) data = 100;
1397 else {
1398 if (raidPtr->reconControl->numRUsTotal > 0) {
1399 *(int *) data = (raidPtr->reconControl->numRUsComplete * 100 / raidPtr->reconControl->numRUsTotal);
1400 } else {
1401 *(int *) data = 0;
1402 }
1403 }
1404 return (0);
1405 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
1406 progressInfoPtr = (RF_ProgressInfo_t **) data;
1407 if (raidPtr->status != rf_rs_reconstructing) {
1408 progressInfo.remaining = 0;
1409 progressInfo.completed = 100;
1410 progressInfo.total = 100;
1411 } else {
1412 progressInfo.total =
1413 raidPtr->reconControl->numRUsTotal;
1414 progressInfo.completed =
1415 raidPtr->reconControl->numRUsComplete;
1416 progressInfo.remaining = progressInfo.total -
1417 progressInfo.completed;
1418 }
1419 retcode = copyout(&progressInfo, *progressInfoPtr,
1420 sizeof(RF_ProgressInfo_t));
1421 return (retcode);
1422
1423 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1424 if (raidPtr->Layout.map->faultsTolerated == 0) {
1425 /* This makes no sense on a RAID 0, so tell the
1426 user it's done. */
1427 *(int *) data = 100;
1428 return(0);
1429 }
1430 if (raidPtr->parity_rewrite_in_progress == 1) {
1431 *(int *) data = 100 *
1432 raidPtr->parity_rewrite_stripes_done /
1433 raidPtr->Layout.numStripe;
1434 } else {
1435 *(int *) data = 100;
1436 }
1437 return (0);
1438
1439 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
1440 progressInfoPtr = (RF_ProgressInfo_t **) data;
1441 if (raidPtr->parity_rewrite_in_progress == 1) {
1442 progressInfo.total = raidPtr->Layout.numStripe;
1443 progressInfo.completed =
1444 raidPtr->parity_rewrite_stripes_done;
1445 progressInfo.remaining = progressInfo.total -
1446 progressInfo.completed;
1447 } else {
1448 progressInfo.remaining = 0;
1449 progressInfo.completed = 100;
1450 progressInfo.total = 100;
1451 }
1452 retcode = copyout(&progressInfo, *progressInfoPtr,
1453 sizeof(RF_ProgressInfo_t));
1454 return (retcode);
1455
1456 case RAIDFRAME_CHECK_COPYBACK_STATUS:
1457 if (raidPtr->Layout.map->faultsTolerated == 0) {
1458 /* This makes no sense on a RAID 0 */
1459 *(int *) data = 100;
1460 return(0);
1461 }
1462 if (raidPtr->copyback_in_progress == 1) {
1463 *(int *) data = 100 * raidPtr->copyback_stripes_done /
1464 raidPtr->Layout.numStripe;
1465 } else {
1466 *(int *) data = 100;
1467 }
1468 return (0);
1469
1470 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
1471 progressInfoPtr = (RF_ProgressInfo_t **) data;
1472 if (raidPtr->copyback_in_progress == 1) {
1473 progressInfo.total = raidPtr->Layout.numStripe;
1474 progressInfo.completed =
1475 raidPtr->copyback_stripes_done;
1476 progressInfo.remaining = progressInfo.total -
1477 progressInfo.completed;
1478 } else {
1479 progressInfo.remaining = 0;
1480 progressInfo.completed = 100;
1481 progressInfo.total = 100;
1482 }
1483 retcode = copyout(&progressInfo, *progressInfoPtr,
1484 sizeof(RF_ProgressInfo_t));
1485 return (retcode);
1486
1487 /* the sparetable daemon calls this to wait for the kernel to
1488 * need a spare table. this ioctl does not return until a
1489 * spare table is needed. XXX -- calling mpsleep here in the
1490 * ioctl code is almost certainly wrong and evil. -- XXX XXX
1491 * -- I should either compute the spare table in the kernel,
1492 * or have a different -- XXX XXX -- interface (a different
1493 * character device) for delivering the table -- XXX */
1494 #if 0
1495 case RAIDFRAME_SPARET_WAIT:
1496 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1497 while (!rf_sparet_wait_queue)
1498 mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE);
1499 waitreq = rf_sparet_wait_queue;
1500 rf_sparet_wait_queue = rf_sparet_wait_queue->next;
1501 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1502
1503 /* structure assignment */
1504 *((RF_SparetWait_t *) data) = *waitreq;
1505
1506 RF_Free(waitreq, sizeof(*waitreq));
1507 return (0);
1508
1509 /* wakes up a process waiting on SPARET_WAIT and puts an error
1510 * code in it that will cause the dameon to exit */
1511 case RAIDFRAME_ABORT_SPARET_WAIT:
1512 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1513 waitreq->fcol = -1;
1514 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1515 waitreq->next = rf_sparet_wait_queue;
1516 rf_sparet_wait_queue = waitreq;
1517 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1518 wakeup(&rf_sparet_wait_queue);
1519 return (0);
1520
1521 /* used by the spare table daemon to deliver a spare table
1522 * into the kernel */
1523 case RAIDFRAME_SEND_SPARET:
1524
1525 /* install the spare table */
1526 retcode = rf_SetSpareTable(raidPtr, *(void **) data);
1527
1528 /* respond to the requestor. the return status of the spare
1529 * table installation is passed in the "fcol" field */
1530 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1531 waitreq->fcol = retcode;
1532 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1533 waitreq->next = rf_sparet_resp_queue;
1534 rf_sparet_resp_queue = waitreq;
1535 wakeup(&rf_sparet_resp_queue);
1536 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1537
1538 return (retcode);
1539 #endif
1540
1541 default:
1542 break; /* fall through to the os-specific code below */
1543
1544 }
1545
1546 if (!raidPtr->valid)
1547 return (EINVAL);
1548
1549 /*
1550 * Add support for "regular" device ioctls here.
1551 */
1552
1553 switch (cmd) {
1554 case DIOCGDINFO:
1555 *(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
1556 break;
1557 #ifdef __HAVE_OLD_DISKLABEL
1558 case ODIOCGDINFO:
1559 newlabel = *(rs->sc_dkdev.dk_label);
1560 if (newlabel.d_npartitions > OLDMAXPARTITIONS)
1561 return ENOTTY;
1562 memcpy(data, &newlabel, sizeof (struct olddisklabel));
1563 break;
1564 #endif
1565
1566 case DIOCGPART:
1567 ((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
1568 ((struct partinfo *) data)->part =
1569 &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
1570 break;
1571
1572 case DIOCWDINFO:
1573 case DIOCSDINFO:
1574 #ifdef __HAVE_OLD_DISKLABEL
1575 case ODIOCWDINFO:
1576 case ODIOCSDINFO:
1577 #endif
1578 {
1579 struct disklabel *lp;
1580 #ifdef __HAVE_OLD_DISKLABEL
1581 if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) {
1582 memset(&newlabel, 0, sizeof newlabel);
1583 memcpy(&newlabel, data, sizeof (struct olddisklabel));
1584 lp = &newlabel;
1585 } else
1586 #endif
1587 lp = (struct disklabel *)data;
1588
1589 if ((error = raidlock(rs)) != 0)
1590 return (error);
1591
1592 rs->sc_flags |= RAIDF_LABELLING;
1593
1594 error = setdisklabel(rs->sc_dkdev.dk_label,
1595 lp, 0, rs->sc_dkdev.dk_cpulabel);
1596 if (error == 0) {
1597 if (cmd == DIOCWDINFO
1598 #ifdef __HAVE_OLD_DISKLABEL
1599 || cmd == ODIOCWDINFO
1600 #endif
1601 )
1602 error = writedisklabel(RAIDLABELDEV(dev),
1603 raidstrategy, rs->sc_dkdev.dk_label,
1604 rs->sc_dkdev.dk_cpulabel);
1605 }
1606 rs->sc_flags &= ~RAIDF_LABELLING;
1607
1608 raidunlock(rs);
1609
1610 if (error)
1611 return (error);
1612 break;
1613 }
1614
1615 case DIOCWLABEL:
1616 if (*(int *) data != 0)
1617 rs->sc_flags |= RAIDF_WLABEL;
1618 else
1619 rs->sc_flags &= ~RAIDF_WLABEL;
1620 break;
1621
1622 case DIOCGDEFLABEL:
1623 raidgetdefaultlabel(raidPtr, rs, (struct disklabel *) data);
1624 break;
1625
1626 #ifdef __HAVE_OLD_DISKLABEL
1627 case ODIOCGDEFLABEL:
1628 raidgetdefaultlabel(raidPtr, rs, &newlabel);
1629 if (newlabel.d_npartitions > OLDMAXPARTITIONS)
1630 return ENOTTY;
1631 memcpy(data, &newlabel, sizeof (struct olddisklabel));
1632 break;
1633 #endif
1634
1635 case DIOCAWEDGE:
1636 case DIOCDWEDGE:
1637 dkw = (void *)data;
1638
1639 /* If the ioctl happens here, the parent is us. */
1640 (void)strcpy(dkw->dkw_parent, rs->sc_xname);
1641 return cmd == DIOCAWEDGE ? dkwedge_add(dkw) : dkwedge_del(dkw);
1642
1643 case DIOCLWEDGES:
1644 return dkwedge_list(&rs->sc_dkdev,
1645 (struct dkwedge_list *)data, l);
1646
1647 default:
1648 retcode = ENOTTY;
1649 }
1650 return (retcode);
1651
1652 }
1653
1654
1655 /* raidinit -- complete the rest of the initialization for the
1656 RAIDframe device. */
1657
1658
1659 static void
1660 raidinit(RF_Raid_t *raidPtr)
1661 {
1662 struct cfdata *cf;
1663 struct raid_softc *rs;
1664 int unit;
1665
1666 unit = raidPtr->raidid;
1667
1668 rs = &raid_softc[unit];
1669
1670 /* XXX should check return code first... */
1671 rs->sc_flags |= RAIDF_INITED;
1672
1673 /* XXX doesn't check bounds. */
1674 snprintf(rs->sc_xname, sizeof(rs->sc_xname), "raid%d", unit);
1675
1676 rs->sc_dkdev.dk_name = rs->sc_xname;
1677
1678 /* attach the pseudo device */
1679 cf = malloc(sizeof(*cf), M_RAIDFRAME, M_WAITOK);
1680 cf->cf_name = raid_cd.cd_name;
1681 cf->cf_atname = raid_cd.cd_name;
1682 cf->cf_unit = unit;
1683 cf->cf_fstate = FSTATE_STAR;
1684
1685 rs->sc_dev = config_attach_pseudo(cf);
1686
1687 if (rs->sc_dev==NULL) {
1688 printf("raid%d: config_attach_pseudo failed\n",
1689 raidPtr->raidid);
1690 }
1691
1692 /* disk_attach actually creates space for the CPU disklabel, among
1693 * other things, so it's critical to call this *BEFORE* we try putzing
1694 * with disklabels. */
1695
1696 disk_attach(&rs->sc_dkdev);
1697
1698 /* XXX There may be a weird interaction here between this, and
1699 * protectedSectors, as used in RAIDframe. */
1700
1701 rs->sc_size = raidPtr->totalSectors;
1702 }
1703 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
1704 /* wake up the daemon & tell it to get us a spare table
1705 * XXX
1706 * the entries in the queues should be tagged with the raidPtr
1707 * so that in the extremely rare case that two recons happen at once,
1708 * we know for which device were requesting a spare table
1709 * XXX
1710 *
1711 * XXX This code is not currently used. GO
1712 */
1713 int
1714 rf_GetSpareTableFromDaemon(RF_SparetWait_t *req)
1715 {
1716 int retcode;
1717
1718 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1719 req->next = rf_sparet_wait_queue;
1720 rf_sparet_wait_queue = req;
1721 wakeup(&rf_sparet_wait_queue);
1722
1723 /* mpsleep unlocks the mutex */
1724 while (!rf_sparet_resp_queue) {
1725 tsleep(&rf_sparet_resp_queue, PRIBIO,
1726 "raidframe getsparetable", 0);
1727 }
1728 req = rf_sparet_resp_queue;
1729 rf_sparet_resp_queue = req->next;
1730 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1731
1732 retcode = req->fcol;
1733 RF_Free(req, sizeof(*req)); /* this is not the same req as we
1734 * alloc'd */
1735 return (retcode);
1736 }
1737 #endif
1738
1739 /* a wrapper around rf_DoAccess that extracts appropriate info from the
1740 * bp & passes it down.
1741 * any calls originating in the kernel must use non-blocking I/O
1742 * do some extra sanity checking to return "appropriate" error values for
1743 * certain conditions (to make some standard utilities work)
1744 *
1745 * Formerly known as: rf_DoAccessKernel
1746 */
1747 void
1748 raidstart(RF_Raid_t *raidPtr)
1749 {
1750 RF_SectorCount_t num_blocks, pb, sum;
1751 RF_RaidAddr_t raid_addr;
1752 struct partition *pp;
1753 daddr_t blocknum;
1754 int unit;
1755 struct raid_softc *rs;
1756 int do_async;
1757 struct buf *bp;
1758 int rc;
1759
1760 unit = raidPtr->raidid;
1761 rs = &raid_softc[unit];
1762
1763 /* quick check to see if anything has died recently */
1764 RF_LOCK_MUTEX(raidPtr->mutex);
1765 if (raidPtr->numNewFailures > 0) {
1766 RF_UNLOCK_MUTEX(raidPtr->mutex);
1767 rf_update_component_labels(raidPtr,
1768 RF_NORMAL_COMPONENT_UPDATE);
1769 RF_LOCK_MUTEX(raidPtr->mutex);
1770 raidPtr->numNewFailures--;
1771 }
1772
1773 /* Check to see if we're at the limit... */
1774 while (raidPtr->openings > 0) {
1775 RF_UNLOCK_MUTEX(raidPtr->mutex);
1776
1777 /* get the next item, if any, from the queue */
1778 if ((bp = BUFQ_GET(rs->buf_queue)) == NULL) {
1779 /* nothing more to do */
1780 return;
1781 }
1782
1783 /* Ok, for the bp we have here, bp->b_blkno is relative to the
1784 * partition.. Need to make it absolute to the underlying
1785 * device.. */
1786
1787 blocknum = bp->b_blkno;
1788 if (DISKPART(bp->b_dev) != RAW_PART) {
1789 pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
1790 blocknum += pp->p_offset;
1791 }
1792
1793 db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
1794 (int) blocknum));
1795
1796 db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
1797 db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
1798
1799 /* *THIS* is where we adjust what block we're going to...
1800 * but DO NOT TOUCH bp->b_blkno!!! */
1801 raid_addr = blocknum;
1802
1803 num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
1804 pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
1805 sum = raid_addr + num_blocks + pb;
1806 if (1 || rf_debugKernelAccess) {
1807 db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
1808 (int) raid_addr, (int) sum, (int) num_blocks,
1809 (int) pb, (int) bp->b_resid));
1810 }
1811 if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
1812 || (sum < num_blocks) || (sum < pb)) {
1813 bp->b_error = ENOSPC;
1814 bp->b_flags |= B_ERROR;
1815 bp->b_resid = bp->b_bcount;
1816 biodone(bp);
1817 RF_LOCK_MUTEX(raidPtr->mutex);
1818 continue;
1819 }
1820 /*
1821 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
1822 */
1823
1824 if (bp->b_bcount & raidPtr->sectorMask) {
1825 bp->b_error = EINVAL;
1826 bp->b_flags |= B_ERROR;
1827 bp->b_resid = bp->b_bcount;
1828 biodone(bp);
1829 RF_LOCK_MUTEX(raidPtr->mutex);
1830 continue;
1831
1832 }
1833 db1_printf(("Calling DoAccess..\n"));
1834
1835
1836 RF_LOCK_MUTEX(raidPtr->mutex);
1837 raidPtr->openings--;
1838 RF_UNLOCK_MUTEX(raidPtr->mutex);
1839
1840 /*
1841 * Everything is async.
1842 */
1843 do_async = 1;
1844
1845 disk_busy(&rs->sc_dkdev);
1846
1847 /* XXX we're still at splbio() here... do we *really*
1848 need to be? */
1849
1850 /* don't ever condition on bp->b_flags & B_WRITE.
1851 * always condition on B_READ instead */
1852
1853 rc = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
1854 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
1855 do_async, raid_addr, num_blocks,
1856 bp->b_data, bp, RF_DAG_NONBLOCKING_IO);
1857
1858 if (rc) {
1859 bp->b_error = rc;
1860 bp->b_flags |= B_ERROR;
1861 bp->b_resid = bp->b_bcount;
1862 biodone(bp);
1863 /* continue loop */
1864 }
1865
1866 RF_LOCK_MUTEX(raidPtr->mutex);
1867 }
1868 RF_UNLOCK_MUTEX(raidPtr->mutex);
1869 }
1870
1871
1872
1873
1874 /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */
1875
1876 int
1877 rf_DispatchKernelIO(RF_DiskQueue_t *queue, RF_DiskQueueData_t *req)
1878 {
1879 int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
1880 struct buf *bp;
1881
1882 req->queue = queue;
1883
1884 #if DIAGNOSTIC
1885 if (queue->raidPtr->raidid >= numraid) {
1886 printf("Invalid unit number: %d %d\n", queue->raidPtr->raidid,
1887 numraid);
1888 panic("Invalid Unit number in rf_DispatchKernelIO");
1889 }
1890 #endif
1891
1892 bp = req->bp;
1893
1894 switch (req->type) {
1895 case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */
1896 /* XXX need to do something extra here.. */
1897 /* I'm leaving this in, as I've never actually seen it used,
1898 * and I'd like folks to report it... GO */
1899 printf(("WAKEUP CALLED\n"));
1900 queue->numOutstanding++;
1901
1902 bp->b_flags = 0;
1903 bp->b_private = req;
1904
1905 KernelWakeupFunc(bp);
1906 break;
1907
1908 case RF_IO_TYPE_READ:
1909 case RF_IO_TYPE_WRITE:
1910 #if RF_ACC_TRACE > 0
1911 if (req->tracerec) {
1912 RF_ETIMER_START(req->tracerec->timer);
1913 }
1914 #endif
1915 InitBP(bp, queue->rf_cinfo->ci_vp,
1916 op, queue->rf_cinfo->ci_dev,
1917 req->sectorOffset, req->numSector,
1918 req->buf, KernelWakeupFunc, (void *) req,
1919 queue->raidPtr->logBytesPerSector, req->b_proc);
1920
1921 if (rf_debugKernelAccess) {
1922 db1_printf(("dispatch: bp->b_blkno = %ld\n",
1923 (long) bp->b_blkno));
1924 }
1925 queue->numOutstanding++;
1926 queue->last_deq_sector = req->sectorOffset;
1927 /* acc wouldn't have been let in if there were any pending
1928 * reqs at any other priority */
1929 queue->curPriority = req->priority;
1930
1931 db1_printf(("Going for %c to unit %d col %d\n",
1932 req->type, queue->raidPtr->raidid,
1933 queue->col));
1934 db1_printf(("sector %d count %d (%d bytes) %d\n",
1935 (int) req->sectorOffset, (int) req->numSector,
1936 (int) (req->numSector <<
1937 queue->raidPtr->logBytesPerSector),
1938 (int) queue->raidPtr->logBytesPerSector));
1939 VOP_STRATEGY(bp->b_vp, bp);
1940
1941 break;
1942
1943 default:
1944 panic("bad req->type in rf_DispatchKernelIO");
1945 }
1946 db1_printf(("Exiting from DispatchKernelIO\n"));
1947
1948 return (0);
1949 }
1950 /* this is the callback function associated with a I/O invoked from
1951 kernel code.
1952 */
1953 static void
1954 KernelWakeupFunc(struct buf *bp)
1955 {
1956 RF_DiskQueueData_t *req = NULL;
1957 RF_DiskQueue_t *queue;
1958 int s;
1959
1960 s = splbio();
1961 db1_printf(("recovering the request queue:\n"));
1962 req = bp->b_private;
1963
1964 queue = (RF_DiskQueue_t *) req->queue;
1965
1966 #if RF_ACC_TRACE > 0
1967 if (req->tracerec) {
1968 RF_ETIMER_STOP(req->tracerec->timer);
1969 RF_ETIMER_EVAL(req->tracerec->timer);
1970 RF_LOCK_MUTEX(rf_tracing_mutex);
1971 req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1972 req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1973 req->tracerec->num_phys_ios++;
1974 RF_UNLOCK_MUTEX(rf_tracing_mutex);
1975 }
1976 #endif
1977
1978 /* XXX Ok, let's get aggressive... If B_ERROR is set, let's go
1979 * ballistic, and mark the component as hosed... */
1980
1981 if (bp->b_flags & B_ERROR) {
1982 /* Mark the disk as dead */
1983 /* but only mark it once... */
1984 /* and only if it wouldn't leave this RAID set
1985 completely broken */
1986 if (((queue->raidPtr->Disks[queue->col].status ==
1987 rf_ds_optimal) ||
1988 (queue->raidPtr->Disks[queue->col].status ==
1989 rf_ds_used_spare)) &&
1990 (queue->raidPtr->numFailures <
1991 queue->raidPtr->Layout.map->faultsTolerated)) {
1992 printf("raid%d: IO Error. Marking %s as failed.\n",
1993 queue->raidPtr->raidid,
1994 queue->raidPtr->Disks[queue->col].devname);
1995 queue->raidPtr->Disks[queue->col].status =
1996 rf_ds_failed;
1997 queue->raidPtr->status = rf_rs_degraded;
1998 queue->raidPtr->numFailures++;
1999 queue->raidPtr->numNewFailures++;
2000 } else { /* Disk is already dead... */
2001 /* printf("Disk already marked as dead!\n"); */
2002 }
2003
2004 }
2005
2006 /* Fill in the error value */
2007
2008 req->error = (bp->b_flags & B_ERROR) ? bp->b_error : 0;
2009
2010 simple_lock(&queue->raidPtr->iodone_lock);
2011
2012 /* Drop this one on the "finished" queue... */
2013 TAILQ_INSERT_TAIL(&(queue->raidPtr->iodone), req, iodone_entries);
2014
2015 /* Let the raidio thread know there is work to be done. */
2016 wakeup(&(queue->raidPtr->iodone));
2017
2018 simple_unlock(&queue->raidPtr->iodone_lock);
2019
2020 splx(s);
2021 }
2022
2023
2024
2025 /*
2026 * initialize a buf structure for doing an I/O in the kernel.
2027 */
2028 static void
2029 InitBP(struct buf *bp, struct vnode *b_vp, unsigned rw_flag, dev_t dev,
2030 RF_SectorNum_t startSect, RF_SectorCount_t numSect, caddr_t bf,
2031 void (*cbFunc) (struct buf *), void *cbArg, int logBytesPerSector,
2032 struct proc *b_proc)
2033 {
2034 /* bp->b_flags = B_PHYS | rw_flag; */
2035 bp->b_flags = B_CALL | rw_flag; /* XXX need B_PHYS here too??? */
2036 bp->b_bcount = numSect << logBytesPerSector;
2037 bp->b_bufsize = bp->b_bcount;
2038 bp->b_error = 0;
2039 bp->b_dev = dev;
2040 bp->b_data = bf;
2041 bp->b_blkno = startSect;
2042 bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */
2043 if (bp->b_bcount == 0) {
2044 panic("bp->b_bcount is zero in InitBP!!");
2045 }
2046 bp->b_proc = b_proc;
2047 bp->b_iodone = cbFunc;
2048 bp->b_private = cbArg;
2049 bp->b_vp = b_vp;
2050 if ((bp->b_flags & B_READ) == 0) {
2051 bp->b_vp->v_numoutput++;
2052 }
2053
2054 }
2055
2056 static void
2057 raidgetdefaultlabel(RF_Raid_t *raidPtr, struct raid_softc *rs,
2058 struct disklabel *lp)
2059 {
2060 memset(lp, 0, sizeof(*lp));
2061
2062 /* fabricate a label... */
2063 lp->d_secperunit = raidPtr->totalSectors;
2064 lp->d_secsize = raidPtr->bytesPerSector;
2065 lp->d_nsectors = raidPtr->Layout.dataSectorsPerStripe;
2066 lp->d_ntracks = 4 * raidPtr->numCol;
2067 lp->d_ncylinders = raidPtr->totalSectors /
2068 (lp->d_nsectors * lp->d_ntracks);
2069 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
2070
2071 strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
2072 lp->d_type = DTYPE_RAID;
2073 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
2074 lp->d_rpm = 3600;
2075 lp->d_interleave = 1;
2076 lp->d_flags = 0;
2077
2078 lp->d_partitions[RAW_PART].p_offset = 0;
2079 lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors;
2080 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
2081 lp->d_npartitions = RAW_PART + 1;
2082
2083 lp->d_magic = DISKMAGIC;
2084 lp->d_magic2 = DISKMAGIC;
2085 lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
2086
2087 }
2088 /*
2089 * Read the disklabel from the raid device. If one is not present, fake one
2090 * up.
2091 */
2092 static void
2093 raidgetdisklabel(dev_t dev)
2094 {
2095 int unit = raidunit(dev);
2096 struct raid_softc *rs = &raid_softc[unit];
2097 const char *errstring;
2098 struct disklabel *lp = rs->sc_dkdev.dk_label;
2099 struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel;
2100 RF_Raid_t *raidPtr;
2101
2102 db1_printf(("Getting the disklabel...\n"));
2103
2104 memset(clp, 0, sizeof(*clp));
2105
2106 raidPtr = raidPtrs[unit];
2107
2108 raidgetdefaultlabel(raidPtr, rs, lp);
2109
2110 /*
2111 * Call the generic disklabel extraction routine.
2112 */
2113 errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
2114 rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
2115 if (errstring)
2116 raidmakedisklabel(rs);
2117 else {
2118 int i;
2119 struct partition *pp;
2120
2121 /*
2122 * Sanity check whether the found disklabel is valid.
2123 *
2124 * This is necessary since total size of the raid device
2125 * may vary when an interleave is changed even though exactly
2126 * same components are used, and old disklabel may used
2127 * if that is found.
2128 */
2129 if (lp->d_secperunit != rs->sc_size)
2130 printf("raid%d: WARNING: %s: "
2131 "total sector size in disklabel (%d) != "
2132 "the size of raid (%ld)\n", unit, rs->sc_xname,
2133 lp->d_secperunit, (long) rs->sc_size);
2134 for (i = 0; i < lp->d_npartitions; i++) {
2135 pp = &lp->d_partitions[i];
2136 if (pp->p_offset + pp->p_size > rs->sc_size)
2137 printf("raid%d: WARNING: %s: end of partition `%c' "
2138 "exceeds the size of raid (%ld)\n",
2139 unit, rs->sc_xname, 'a' + i, (long) rs->sc_size);
2140 }
2141 }
2142
2143 }
2144 /*
2145 * Take care of things one might want to take care of in the event
2146 * that a disklabel isn't present.
2147 */
2148 static void
2149 raidmakedisklabel(struct raid_softc *rs)
2150 {
2151 struct disklabel *lp = rs->sc_dkdev.dk_label;
2152 db1_printf(("Making a label..\n"));
2153
2154 /*
2155 * For historical reasons, if there's no disklabel present
2156 * the raw partition must be marked FS_BSDFFS.
2157 */
2158
2159 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
2160
2161 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
2162
2163 lp->d_checksum = dkcksum(lp);
2164 }
2165 /*
2166 * Wait interruptibly for an exclusive lock.
2167 *
2168 * XXX
2169 * Several drivers do this; it should be abstracted and made MP-safe.
2170 * (Hmm... where have we seen this warning before :-> GO )
2171 */
2172 static int
2173 raidlock(struct raid_softc *rs)
2174 {
2175 int error;
2176
2177 while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
2178 rs->sc_flags |= RAIDF_WANTED;
2179 if ((error =
2180 tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
2181 return (error);
2182 }
2183 rs->sc_flags |= RAIDF_LOCKED;
2184 return (0);
2185 }
2186 /*
2187 * Unlock and wake up any waiters.
2188 */
2189 static void
2190 raidunlock(struct raid_softc *rs)
2191 {
2192
2193 rs->sc_flags &= ~RAIDF_LOCKED;
2194 if ((rs->sc_flags & RAIDF_WANTED) != 0) {
2195 rs->sc_flags &= ~RAIDF_WANTED;
2196 wakeup(rs);
2197 }
2198 }
2199
2200
2201 #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */
2202 #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */
2203
2204 int
2205 raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter)
2206 {
2207 RF_ComponentLabel_t clabel;
2208 raidread_component_label(dev, b_vp, &clabel);
2209 clabel.mod_counter = mod_counter;
2210 clabel.clean = RF_RAID_CLEAN;
2211 raidwrite_component_label(dev, b_vp, &clabel);
2212 return(0);
2213 }
2214
2215
2216 int
2217 raidmarkdirty(dev_t dev, struct vnode *b_vp, int mod_counter)
2218 {
2219 RF_ComponentLabel_t clabel;
2220 raidread_component_label(dev, b_vp, &clabel);
2221 clabel.mod_counter = mod_counter;
2222 clabel.clean = RF_RAID_DIRTY;
2223 raidwrite_component_label(dev, b_vp, &clabel);
2224 return(0);
2225 }
2226
2227 /* ARGSUSED */
2228 int
2229 raidread_component_label(dev_t dev, struct vnode *b_vp,
2230 RF_ComponentLabel_t *clabel)
2231 {
2232 struct buf *bp;
2233 const struct bdevsw *bdev;
2234 int error;
2235
2236 /* XXX should probably ensure that we don't try to do this if
2237 someone has changed rf_protected_sectors. */
2238
2239 if (b_vp == NULL) {
2240 /* For whatever reason, this component is not valid.
2241 Don't try to read a component label from it. */
2242 return(EINVAL);
2243 }
2244
2245 /* get a block of the appropriate size... */
2246 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2247 bp->b_dev = dev;
2248
2249 /* get our ducks in a row for the read */
2250 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2251 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2252 bp->b_flags |= B_READ;
2253 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2254
2255 bdev = bdevsw_lookup(bp->b_dev);
2256 if (bdev == NULL)
2257 return (ENXIO);
2258 (*bdev->d_strategy)(bp);
2259
2260 error = biowait(bp);
2261
2262 if (!error) {
2263 memcpy(clabel, bp->b_data,
2264 sizeof(RF_ComponentLabel_t));
2265 }
2266
2267 brelse(bp);
2268 return(error);
2269 }
2270 /* ARGSUSED */
2271 int
2272 raidwrite_component_label(dev_t dev, struct vnode *b_vp,
2273 RF_ComponentLabel_t *clabel)
2274 {
2275 struct buf *bp;
2276 const struct bdevsw *bdev;
2277 int error;
2278
2279 /* get a block of the appropriate size... */
2280 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2281 bp->b_dev = dev;
2282
2283 /* get our ducks in a row for the write */
2284 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2285 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2286 bp->b_flags |= B_WRITE;
2287 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2288
2289 memset(bp->b_data, 0, RF_COMPONENT_INFO_SIZE );
2290
2291 memcpy(bp->b_data, clabel, sizeof(RF_ComponentLabel_t));
2292
2293 bdev = bdevsw_lookup(bp->b_dev);
2294 if (bdev == NULL)
2295 return (ENXIO);
2296 (*bdev->d_strategy)(bp);
2297 error = biowait(bp);
2298 brelse(bp);
2299 if (error) {
2300 #if 1
2301 printf("Failed to write RAID component info!\n");
2302 #endif
2303 }
2304
2305 return(error);
2306 }
2307
2308 void
2309 rf_markalldirty(RF_Raid_t *raidPtr)
2310 {
2311 RF_ComponentLabel_t clabel;
2312 int sparecol;
2313 int c;
2314 int j;
2315 int scol = -1;
2316
2317 raidPtr->mod_counter++;
2318 for (c = 0; c < raidPtr->numCol; c++) {
2319 /* we don't want to touch (at all) a disk that has
2320 failed */
2321 if (!RF_DEAD_DISK(raidPtr->Disks[c].status)) {
2322 raidread_component_label(
2323 raidPtr->Disks[c].dev,
2324 raidPtr->raid_cinfo[c].ci_vp,
2325 &clabel);
2326 if (clabel.status == rf_ds_spared) {
2327 /* XXX do something special...
2328 but whatever you do, don't
2329 try to access it!! */
2330 } else {
2331 raidmarkdirty(
2332 raidPtr->Disks[c].dev,
2333 raidPtr->raid_cinfo[c].ci_vp,
2334 raidPtr->mod_counter);
2335 }
2336 }
2337 }
2338
2339 for( c = 0; c < raidPtr->numSpare ; c++) {
2340 sparecol = raidPtr->numCol + c;
2341 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
2342 /*
2343
2344 we claim this disk is "optimal" if it's
2345 rf_ds_used_spare, as that means it should be
2346 directly substitutable for the disk it replaced.
2347 We note that too...
2348
2349 */
2350
2351 for(j=0;j<raidPtr->numCol;j++) {
2352 if (raidPtr->Disks[j].spareCol == sparecol) {
2353 scol = j;
2354 break;
2355 }
2356 }
2357
2358 raidread_component_label(
2359 raidPtr->Disks[sparecol].dev,
2360 raidPtr->raid_cinfo[sparecol].ci_vp,
2361 &clabel);
2362 /* make sure status is noted */
2363
2364 raid_init_component_label(raidPtr, &clabel);
2365
2366 clabel.row = 0;
2367 clabel.column = scol;
2368 /* Note: we *don't* change status from rf_ds_used_spare
2369 to rf_ds_optimal */
2370 /* clabel.status = rf_ds_optimal; */
2371
2372 raidmarkdirty(raidPtr->Disks[sparecol].dev,
2373 raidPtr->raid_cinfo[sparecol].ci_vp,
2374 raidPtr->mod_counter);
2375 }
2376 }
2377 }
2378
2379
2380 void
2381 rf_update_component_labels(RF_Raid_t *raidPtr, int final)
2382 {
2383 RF_ComponentLabel_t clabel;
2384 int sparecol;
2385 int c;
2386 int j;
2387 int scol;
2388
2389 scol = -1;
2390
2391 /* XXX should do extra checks to make sure things really are clean,
2392 rather than blindly setting the clean bit... */
2393
2394 raidPtr->mod_counter++;
2395
2396 for (c = 0; c < raidPtr->numCol; c++) {
2397 if (raidPtr->Disks[c].status == rf_ds_optimal) {
2398 raidread_component_label(
2399 raidPtr->Disks[c].dev,
2400 raidPtr->raid_cinfo[c].ci_vp,
2401 &clabel);
2402 /* make sure status is noted */
2403 clabel.status = rf_ds_optimal;
2404
2405 /* bump the counter */
2406 clabel.mod_counter = raidPtr->mod_counter;
2407
2408 /* note what unit we are configured as */
2409 clabel.last_unit = raidPtr->raidid;
2410
2411 raidwrite_component_label(
2412 raidPtr->Disks[c].dev,
2413 raidPtr->raid_cinfo[c].ci_vp,
2414 &clabel);
2415 if (final == RF_FINAL_COMPONENT_UPDATE) {
2416 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2417 raidmarkclean(
2418 raidPtr->Disks[c].dev,
2419 raidPtr->raid_cinfo[c].ci_vp,
2420 raidPtr->mod_counter);
2421 }
2422 }
2423 }
2424 /* else we don't touch it.. */
2425 }
2426
2427 for( c = 0; c < raidPtr->numSpare ; c++) {
2428 sparecol = raidPtr->numCol + c;
2429 /* Need to ensure that the reconstruct actually completed! */
2430 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
2431 /*
2432
2433 we claim this disk is "optimal" if it's
2434 rf_ds_used_spare, as that means it should be
2435 directly substitutable for the disk it replaced.
2436 We note that too...
2437
2438 */
2439
2440 for(j=0;j<raidPtr->numCol;j++) {
2441 if (raidPtr->Disks[j].spareCol == sparecol) {
2442 scol = j;
2443 break;
2444 }
2445 }
2446
2447 /* XXX shouldn't *really* need this... */
2448 raidread_component_label(
2449 raidPtr->Disks[sparecol].dev,
2450 raidPtr->raid_cinfo[sparecol].ci_vp,
2451 &clabel);
2452 /* make sure status is noted */
2453
2454 raid_init_component_label(raidPtr, &clabel);
2455
2456 clabel.mod_counter = raidPtr->mod_counter;
2457 clabel.column = scol;
2458 clabel.status = rf_ds_optimal;
2459 clabel.last_unit = raidPtr->raidid;
2460
2461 raidwrite_component_label(
2462 raidPtr->Disks[sparecol].dev,
2463 raidPtr->raid_cinfo[sparecol].ci_vp,
2464 &clabel);
2465 if (final == RF_FINAL_COMPONENT_UPDATE) {
2466 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2467 raidmarkclean( raidPtr->Disks[sparecol].dev,
2468 raidPtr->raid_cinfo[sparecol].ci_vp,
2469 raidPtr->mod_counter);
2470 }
2471 }
2472 }
2473 }
2474 }
2475
2476 void
2477 rf_close_component(RF_Raid_t *raidPtr, struct vnode *vp, int auto_configured)
2478 {
2479 struct proc *p;
2480 struct lwp *l;
2481
2482 p = raidPtr->engine_thread;
2483 l = LIST_FIRST(&p->p_lwps);
2484
2485 if (vp != NULL) {
2486 if (auto_configured == 1) {
2487 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2488 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
2489 vput(vp);
2490
2491 } else {
2492 (void) vn_close(vp, FREAD | FWRITE, p->p_cred, l);
2493 }
2494 }
2495 }
2496
2497
2498 void
2499 rf_UnconfigureVnodes(RF_Raid_t *raidPtr)
2500 {
2501 int r,c;
2502 struct vnode *vp;
2503 int acd;
2504
2505
2506 /* We take this opportunity to close the vnodes like we should.. */
2507
2508 for (c = 0; c < raidPtr->numCol; c++) {
2509 vp = raidPtr->raid_cinfo[c].ci_vp;
2510 acd = raidPtr->Disks[c].auto_configured;
2511 rf_close_component(raidPtr, vp, acd);
2512 raidPtr->raid_cinfo[c].ci_vp = NULL;
2513 raidPtr->Disks[c].auto_configured = 0;
2514 }
2515
2516 for (r = 0; r < raidPtr->numSpare; r++) {
2517 vp = raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp;
2518 acd = raidPtr->Disks[raidPtr->numCol + r].auto_configured;
2519 rf_close_component(raidPtr, vp, acd);
2520 raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp = NULL;
2521 raidPtr->Disks[raidPtr->numCol + r].auto_configured = 0;
2522 }
2523 }
2524
2525
2526 void
2527 rf_ReconThread(struct rf_recon_req *req)
2528 {
2529 int s;
2530 RF_Raid_t *raidPtr;
2531
2532 s = splbio();
2533 raidPtr = (RF_Raid_t *) req->raidPtr;
2534 raidPtr->recon_in_progress = 1;
2535
2536 rf_FailDisk((RF_Raid_t *) req->raidPtr, req->col,
2537 ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
2538
2539 RF_Free(req, sizeof(*req));
2540
2541 raidPtr->recon_in_progress = 0;
2542 splx(s);
2543
2544 /* That's all... */
2545 kthread_exit(0); /* does not return */
2546 }
2547
2548 void
2549 rf_RewriteParityThread(RF_Raid_t *raidPtr)
2550 {
2551 int retcode;
2552 int s;
2553
2554 raidPtr->parity_rewrite_stripes_done = 0;
2555 raidPtr->parity_rewrite_in_progress = 1;
2556 s = splbio();
2557 retcode = rf_RewriteParity(raidPtr);
2558 splx(s);
2559 if (retcode) {
2560 printf("raid%d: Error re-writing parity!\n",raidPtr->raidid);
2561 } else {
2562 /* set the clean bit! If we shutdown correctly,
2563 the clean bit on each component label will get
2564 set */
2565 raidPtr->parity_good = RF_RAID_CLEAN;
2566 }
2567 raidPtr->parity_rewrite_in_progress = 0;
2568
2569 /* Anyone waiting for us to stop? If so, inform them... */
2570 if (raidPtr->waitShutdown) {
2571 wakeup(&raidPtr->parity_rewrite_in_progress);
2572 }
2573
2574 /* That's all... */
2575 kthread_exit(0); /* does not return */
2576 }
2577
2578
2579 void
2580 rf_CopybackThread(RF_Raid_t *raidPtr)
2581 {
2582 int s;
2583
2584 raidPtr->copyback_in_progress = 1;
2585 s = splbio();
2586 rf_CopybackReconstructedData(raidPtr);
2587 splx(s);
2588 raidPtr->copyback_in_progress = 0;
2589
2590 /* That's all... */
2591 kthread_exit(0); /* does not return */
2592 }
2593
2594
2595 void
2596 rf_ReconstructInPlaceThread(struct rf_recon_req *req)
2597 {
2598 int s;
2599 RF_Raid_t *raidPtr;
2600
2601 s = splbio();
2602 raidPtr = req->raidPtr;
2603 raidPtr->recon_in_progress = 1;
2604 rf_ReconstructInPlace(raidPtr, req->col);
2605 RF_Free(req, sizeof(*req));
2606 raidPtr->recon_in_progress = 0;
2607 splx(s);
2608
2609 /* That's all... */
2610 kthread_exit(0); /* does not return */
2611 }
2612
2613 static RF_AutoConfig_t *
2614 rf_get_component(RF_AutoConfig_t *ac_list, dev_t dev, struct vnode *vp,
2615 const char *cname, RF_SectorCount_t size)
2616 {
2617 int good_one = 0;
2618 RF_ComponentLabel_t *clabel;
2619 RF_AutoConfig_t *ac;
2620
2621 clabel = malloc(sizeof(RF_ComponentLabel_t), M_RAIDFRAME, M_NOWAIT);
2622 if (clabel == NULL) {
2623 oomem:
2624 while(ac_list) {
2625 ac = ac_list;
2626 if (ac->clabel)
2627 free(ac->clabel, M_RAIDFRAME);
2628 ac_list = ac_list->next;
2629 free(ac, M_RAIDFRAME);
2630 }
2631 printf("RAID auto config: out of memory!\n");
2632 return NULL; /* XXX probably should panic? */
2633 }
2634
2635 if (!raidread_component_label(dev, vp, clabel)) {
2636 /* Got the label. Does it look reasonable? */
2637 if (rf_reasonable_label(clabel) &&
2638 (clabel->partitionSize <= size)) {
2639 #if DEBUG
2640 printf("Component on: %s: %llu\n",
2641 cname, (unsigned long long)size);
2642 rf_print_component_label(clabel);
2643 #endif
2644 /* if it's reasonable, add it, else ignore it. */
2645 ac = malloc(sizeof(RF_AutoConfig_t), M_RAIDFRAME,
2646 M_NOWAIT);
2647 if (ac == NULL) {
2648 free(clabel, M_RAIDFRAME);
2649 goto oomem;
2650 }
2651 strlcpy(ac->devname, cname, sizeof(ac->devname));
2652 ac->dev = dev;
2653 ac->vp = vp;
2654 ac->clabel = clabel;
2655 ac->next = ac_list;
2656 ac_list = ac;
2657 good_one = 1;
2658 }
2659 }
2660 if (!good_one) {
2661 /* cleanup */
2662 free(clabel, M_RAIDFRAME);
2663 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2664 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
2665 vput(vp);
2666 }
2667 return ac_list;
2668 }
2669
2670 RF_AutoConfig_t *
2671 rf_find_raid_components()
2672 {
2673 struct vnode *vp;
2674 struct disklabel label;
2675 struct device *dv;
2676 dev_t dev;
2677 int bmajor, bminor, wedge;
2678 int error;
2679 int i;
2680 RF_AutoConfig_t *ac_list;
2681
2682
2683 /* initialize the AutoConfig list */
2684 ac_list = NULL;
2685
2686 /* we begin by trolling through *all* the devices on the system */
2687
2688 for (dv = alldevs.tqh_first; dv != NULL;
2689 dv = dv->dv_list.tqe_next) {
2690
2691 /* we are only interested in disks... */
2692 if (device_class(dv) != DV_DISK)
2693 continue;
2694
2695 /* we don't care about floppies... */
2696 if (device_is_a(dv, "fd")) {
2697 continue;
2698 }
2699
2700 /* we don't care about CD's... */
2701 if (device_is_a(dv, "cd")) {
2702 continue;
2703 }
2704
2705 /* hdfd is the Atari/Hades floppy driver */
2706 if (device_is_a(dv, "hdfd")) {
2707 continue;
2708 }
2709
2710 /* fdisa is the Atari/Milan floppy driver */
2711 if (device_is_a(dv, "fdisa")) {
2712 continue;
2713 }
2714
2715 /* need to find the device_name_to_block_device_major stuff */
2716 bmajor = devsw_name2blk(dv->dv_xname, NULL, 0);
2717
2718 /* get a vnode for the raw partition of this disk */
2719
2720 wedge = device_is_a(dv, "dk");
2721 bminor = minor(device_unit(dv));
2722 dev = wedge ? makedev(bmajor, bminor) :
2723 MAKEDISKDEV(bmajor, bminor, RAW_PART);
2724 if (bdevvp(dev, &vp))
2725 panic("RAID can't alloc vnode");
2726
2727 error = VOP_OPEN(vp, FREAD, NOCRED, 0);
2728
2729 if (error) {
2730 /* "Who cares." Continue looking
2731 for something that exists*/
2732 vput(vp);
2733 continue;
2734 }
2735
2736 if (wedge) {
2737 struct dkwedge_info dkw;
2738 error = VOP_IOCTL(vp, DIOCGWEDGEINFO, &dkw, FREAD,
2739 NOCRED, 0);
2740 if (error) {
2741 printf("RAIDframe: can't get wedge info for "
2742 "dev %s (%d)\n", dv->dv_xname, error);
2743 out:
2744 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2745 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
2746 vput(vp);
2747 continue;
2748 }
2749
2750 if (strcmp(dkw.dkw_ptype, DKW_PTYPE_RAIDFRAME) != 0)
2751 goto out;
2752
2753 ac_list = rf_get_component(ac_list, dev, vp,
2754 dv->dv_xname, dkw.dkw_size);
2755 continue;
2756 }
2757
2758 /* Ok, the disk exists. Go get the disklabel. */
2759 error = VOP_IOCTL(vp, DIOCGDINFO, &label, FREAD, NOCRED, 0);
2760 if (error) {
2761 /*
2762 * XXX can't happen - open() would
2763 * have errored out (or faked up one)
2764 */
2765 if (error != ENOTTY)
2766 printf("RAIDframe: can't get label for dev "
2767 "%s (%d)\n", dv->dv_xname, error);
2768 }
2769
2770 /* don't need this any more. We'll allocate it again
2771 a little later if we really do... */
2772 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2773 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
2774 vput(vp);
2775
2776 if (error)
2777 continue;
2778
2779 for (i = 0; i < label.d_npartitions; i++) {
2780 char cname[sizeof(ac_list->devname)];
2781
2782 /* We only support partitions marked as RAID */
2783 if (label.d_partitions[i].p_fstype != FS_RAID)
2784 continue;
2785
2786 dev = MAKEDISKDEV(bmajor, device_unit(dv), i);
2787 if (bdevvp(dev, &vp))
2788 panic("RAID can't alloc vnode");
2789
2790 error = VOP_OPEN(vp, FREAD, NOCRED, 0);
2791 if (error) {
2792 /* Whatever... */
2793 vput(vp);
2794 continue;
2795 }
2796 snprintf(cname, sizeof(cname), "%s%c",
2797 dv->dv_xname, 'a' + i);
2798 ac_list = rf_get_component(ac_list, dev, vp, cname,
2799 label.d_partitions[i].p_size);
2800 }
2801 }
2802 return ac_list;
2803 }
2804
2805
2806 static int
2807 rf_reasonable_label(RF_ComponentLabel_t *clabel)
2808 {
2809
2810 if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) ||
2811 (clabel->version==RF_COMPONENT_LABEL_VERSION)) &&
2812 ((clabel->clean == RF_RAID_CLEAN) ||
2813 (clabel->clean == RF_RAID_DIRTY)) &&
2814 clabel->row >=0 &&
2815 clabel->column >= 0 &&
2816 clabel->num_rows > 0 &&
2817 clabel->num_columns > 0 &&
2818 clabel->row < clabel->num_rows &&
2819 clabel->column < clabel->num_columns &&
2820 clabel->blockSize > 0 &&
2821 clabel->numBlocks > 0) {
2822 /* label looks reasonable enough... */
2823 return(1);
2824 }
2825 return(0);
2826 }
2827
2828
2829 #if DEBUG
2830 void
2831 rf_print_component_label(RF_ComponentLabel_t *clabel)
2832 {
2833 printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
2834 clabel->row, clabel->column,
2835 clabel->num_rows, clabel->num_columns);
2836 printf(" Version: %d Serial Number: %d Mod Counter: %d\n",
2837 clabel->version, clabel->serial_number,
2838 clabel->mod_counter);
2839 printf(" Clean: %s Status: %d\n",
2840 clabel->clean ? "Yes" : "No", clabel->status );
2841 printf(" sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
2842 clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
2843 printf(" RAID Level: %c blocksize: %d numBlocks: %d\n",
2844 (char) clabel->parityConfig, clabel->blockSize,
2845 clabel->numBlocks);
2846 printf(" Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No" );
2847 printf(" Contains root partition: %s\n",
2848 clabel->root_partition ? "Yes" : "No" );
2849 printf(" Last configured as: raid%d\n", clabel->last_unit );
2850 #if 0
2851 printf(" Config order: %d\n", clabel->config_order);
2852 #endif
2853
2854 }
2855 #endif
2856
2857 RF_ConfigSet_t *
2858 rf_create_auto_sets(RF_AutoConfig_t *ac_list)
2859 {
2860 RF_AutoConfig_t *ac;
2861 RF_ConfigSet_t *config_sets;
2862 RF_ConfigSet_t *cset;
2863 RF_AutoConfig_t *ac_next;
2864
2865
2866 config_sets = NULL;
2867
2868 /* Go through the AutoConfig list, and figure out which components
2869 belong to what sets. */
2870 ac = ac_list;
2871 while(ac!=NULL) {
2872 /* we're going to putz with ac->next, so save it here
2873 for use at the end of the loop */
2874 ac_next = ac->next;
2875
2876 if (config_sets == NULL) {
2877 /* will need at least this one... */
2878 config_sets = (RF_ConfigSet_t *)
2879 malloc(sizeof(RF_ConfigSet_t),
2880 M_RAIDFRAME, M_NOWAIT);
2881 if (config_sets == NULL) {
2882 panic("rf_create_auto_sets: No memory!");
2883 }
2884 /* this one is easy :) */
2885 config_sets->ac = ac;
2886 config_sets->next = NULL;
2887 config_sets->rootable = 0;
2888 ac->next = NULL;
2889 } else {
2890 /* which set does this component fit into? */
2891 cset = config_sets;
2892 while(cset!=NULL) {
2893 if (rf_does_it_fit(cset, ac)) {
2894 /* looks like it matches... */
2895 ac->next = cset->ac;
2896 cset->ac = ac;
2897 break;
2898 }
2899 cset = cset->next;
2900 }
2901 if (cset==NULL) {
2902 /* didn't find a match above... new set..*/
2903 cset = (RF_ConfigSet_t *)
2904 malloc(sizeof(RF_ConfigSet_t),
2905 M_RAIDFRAME, M_NOWAIT);
2906 if (cset == NULL) {
2907 panic("rf_create_auto_sets: No memory!");
2908 }
2909 cset->ac = ac;
2910 ac->next = NULL;
2911 cset->next = config_sets;
2912 cset->rootable = 0;
2913 config_sets = cset;
2914 }
2915 }
2916 ac = ac_next;
2917 }
2918
2919
2920 return(config_sets);
2921 }
2922
2923 static int
2924 rf_does_it_fit(RF_ConfigSet_t *cset, RF_AutoConfig_t *ac)
2925 {
2926 RF_ComponentLabel_t *clabel1, *clabel2;
2927
2928 /* If this one matches the *first* one in the set, that's good
2929 enough, since the other members of the set would have been
2930 through here too... */
2931 /* note that we are not checking partitionSize here..
2932
2933 Note that we are also not checking the mod_counters here.
2934 If everything else matches execpt the mod_counter, that's
2935 good enough for this test. We will deal with the mod_counters
2936 a little later in the autoconfiguration process.
2937
2938 (clabel1->mod_counter == clabel2->mod_counter) &&
2939
2940 The reason we don't check for this is that failed disks
2941 will have lower modification counts. If those disks are
2942 not added to the set they used to belong to, then they will
2943 form their own set, which may result in 2 different sets,
2944 for example, competing to be configured at raid0, and
2945 perhaps competing to be the root filesystem set. If the
2946 wrong ones get configured, or both attempt to become /,
2947 weird behaviour and or serious lossage will occur. Thus we
2948 need to bring them into the fold here, and kick them out at
2949 a later point.
2950
2951 */
2952
2953 clabel1 = cset->ac->clabel;
2954 clabel2 = ac->clabel;
2955 if ((clabel1->version == clabel2->version) &&
2956 (clabel1->serial_number == clabel2->serial_number) &&
2957 (clabel1->num_rows == clabel2->num_rows) &&
2958 (clabel1->num_columns == clabel2->num_columns) &&
2959 (clabel1->sectPerSU == clabel2->sectPerSU) &&
2960 (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
2961 (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
2962 (clabel1->parityConfig == clabel2->parityConfig) &&
2963 (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
2964 (clabel1->blockSize == clabel2->blockSize) &&
2965 (clabel1->numBlocks == clabel2->numBlocks) &&
2966 (clabel1->autoconfigure == clabel2->autoconfigure) &&
2967 (clabel1->root_partition == clabel2->root_partition) &&
2968 (clabel1->last_unit == clabel2->last_unit) &&
2969 (clabel1->config_order == clabel2->config_order)) {
2970 /* if it get's here, it almost *has* to be a match */
2971 } else {
2972 /* it's not consistent with somebody in the set..
2973 punt */
2974 return(0);
2975 }
2976 /* all was fine.. it must fit... */
2977 return(1);
2978 }
2979
2980 int
2981 rf_have_enough_components(RF_ConfigSet_t *cset)
2982 {
2983 RF_AutoConfig_t *ac;
2984 RF_AutoConfig_t *auto_config;
2985 RF_ComponentLabel_t *clabel;
2986 int c;
2987 int num_cols;
2988 int num_missing;
2989 int mod_counter;
2990 int mod_counter_found;
2991 int even_pair_failed;
2992 char parity_type;
2993
2994
2995 /* check to see that we have enough 'live' components
2996 of this set. If so, we can configure it if necessary */
2997
2998 num_cols = cset->ac->clabel->num_columns;
2999 parity_type = cset->ac->clabel->parityConfig;
3000
3001 /* XXX Check for duplicate components!?!?!? */
3002
3003 /* Determine what the mod_counter is supposed to be for this set. */
3004
3005 mod_counter_found = 0;
3006 mod_counter = 0;
3007 ac = cset->ac;
3008 while(ac!=NULL) {
3009 if (mod_counter_found==0) {
3010 mod_counter = ac->clabel->mod_counter;
3011 mod_counter_found = 1;
3012 } else {
3013 if (ac->clabel->mod_counter > mod_counter) {
3014 mod_counter = ac->clabel->mod_counter;
3015 }
3016 }
3017 ac = ac->next;
3018 }
3019
3020 num_missing = 0;
3021 auto_config = cset->ac;
3022
3023 even_pair_failed = 0;
3024 for(c=0; c<num_cols; c++) {
3025 ac = auto_config;
3026 while(ac!=NULL) {
3027 if ((ac->clabel->column == c) &&
3028 (ac->clabel->mod_counter == mod_counter)) {
3029 /* it's this one... */
3030 #if DEBUG
3031 printf("Found: %s at %d\n",
3032 ac->devname,c);
3033 #endif
3034 break;
3035 }
3036 ac=ac->next;
3037 }
3038 if (ac==NULL) {
3039 /* Didn't find one here! */
3040 /* special case for RAID 1, especially
3041 where there are more than 2
3042 components (where RAIDframe treats
3043 things a little differently :( ) */
3044 if (parity_type == '1') {
3045 if (c%2 == 0) { /* even component */
3046 even_pair_failed = 1;
3047 } else { /* odd component. If
3048 we're failed, and
3049 so is the even
3050 component, it's
3051 "Good Night, Charlie" */
3052 if (even_pair_failed == 1) {
3053 return(0);
3054 }
3055 }
3056 } else {
3057 /* normal accounting */
3058 num_missing++;
3059 }
3060 }
3061 if ((parity_type == '1') && (c%2 == 1)) {
3062 /* Just did an even component, and we didn't
3063 bail.. reset the even_pair_failed flag,
3064 and go on to the next component.... */
3065 even_pair_failed = 0;
3066 }
3067 }
3068
3069 clabel = cset->ac->clabel;
3070
3071 if (((clabel->parityConfig == '0') && (num_missing > 0)) ||
3072 ((clabel->parityConfig == '4') && (num_missing > 1)) ||
3073 ((clabel->parityConfig == '5') && (num_missing > 1))) {
3074 /* XXX this needs to be made *much* more general */
3075 /* Too many failures */
3076 return(0);
3077 }
3078 /* otherwise, all is well, and we've got enough to take a kick
3079 at autoconfiguring this set */
3080 return(1);
3081 }
3082
3083 void
3084 rf_create_configuration(RF_AutoConfig_t *ac, RF_Config_t *config,
3085 RF_Raid_t *raidPtr)
3086 {
3087 RF_ComponentLabel_t *clabel;
3088 int i;
3089
3090 clabel = ac->clabel;
3091
3092 /* 1. Fill in the common stuff */
3093 config->numRow = clabel->num_rows = 1;
3094 config->numCol = clabel->num_columns;
3095 config->numSpare = 0; /* XXX should this be set here? */
3096 config->sectPerSU = clabel->sectPerSU;
3097 config->SUsPerPU = clabel->SUsPerPU;
3098 config->SUsPerRU = clabel->SUsPerRU;
3099 config->parityConfig = clabel->parityConfig;
3100 /* XXX... */
3101 strcpy(config->diskQueueType,"fifo");
3102 config->maxOutstandingDiskReqs = clabel->maxOutstanding;
3103 config->layoutSpecificSize = 0; /* XXX ?? */
3104
3105 while(ac!=NULL) {
3106 /* row/col values will be in range due to the checks
3107 in reasonable_label() */
3108 strcpy(config->devnames[0][ac->clabel->column],
3109 ac->devname);
3110 ac = ac->next;
3111 }
3112
3113 for(i=0;i<RF_MAXDBGV;i++) {
3114 config->debugVars[i][0] = 0;
3115 }
3116 }
3117
3118 int
3119 rf_set_autoconfig(RF_Raid_t *raidPtr, int new_value)
3120 {
3121 RF_ComponentLabel_t clabel;
3122 struct vnode *vp;
3123 dev_t dev;
3124 int column;
3125 int sparecol;
3126
3127 raidPtr->autoconfigure = new_value;
3128
3129 for(column=0; column<raidPtr->numCol; column++) {
3130 if (raidPtr->Disks[column].status == rf_ds_optimal) {
3131 dev = raidPtr->Disks[column].dev;
3132 vp = raidPtr->raid_cinfo[column].ci_vp;
3133 raidread_component_label(dev, vp, &clabel);
3134 clabel.autoconfigure = new_value;
3135 raidwrite_component_label(dev, vp, &clabel);
3136 }
3137 }
3138 for(column = 0; column < raidPtr->numSpare ; column++) {
3139 sparecol = raidPtr->numCol + column;
3140 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3141 dev = raidPtr->Disks[sparecol].dev;
3142 vp = raidPtr->raid_cinfo[sparecol].ci_vp;
3143 raidread_component_label(dev, vp, &clabel);
3144 clabel.autoconfigure = new_value;
3145 raidwrite_component_label(dev, vp, &clabel);
3146 }
3147 }
3148 return(new_value);
3149 }
3150
3151 int
3152 rf_set_rootpartition(RF_Raid_t *raidPtr, int new_value)
3153 {
3154 RF_ComponentLabel_t clabel;
3155 struct vnode *vp;
3156 dev_t dev;
3157 int column;
3158 int sparecol;
3159
3160 raidPtr->root_partition = new_value;
3161 for(column=0; column<raidPtr->numCol; column++) {
3162 if (raidPtr->Disks[column].status == rf_ds_optimal) {
3163 dev = raidPtr->Disks[column].dev;
3164 vp = raidPtr->raid_cinfo[column].ci_vp;
3165 raidread_component_label(dev, vp, &clabel);
3166 clabel.root_partition = new_value;
3167 raidwrite_component_label(dev, vp, &clabel);
3168 }
3169 }
3170 for(column = 0; column < raidPtr->numSpare ; column++) {
3171 sparecol = raidPtr->numCol + column;
3172 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3173 dev = raidPtr->Disks[sparecol].dev;
3174 vp = raidPtr->raid_cinfo[sparecol].ci_vp;
3175 raidread_component_label(dev, vp, &clabel);
3176 clabel.root_partition = new_value;
3177 raidwrite_component_label(dev, vp, &clabel);
3178 }
3179 }
3180 return(new_value);
3181 }
3182
3183 void
3184 rf_release_all_vps(RF_ConfigSet_t *cset)
3185 {
3186 RF_AutoConfig_t *ac;
3187
3188 ac = cset->ac;
3189 while(ac!=NULL) {
3190 /* Close the vp, and give it back */
3191 if (ac->vp) {
3192 vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY);
3193 VOP_CLOSE(ac->vp, FREAD, NOCRED, 0);
3194 vput(ac->vp);
3195 ac->vp = NULL;
3196 }
3197 ac = ac->next;
3198 }
3199 }
3200
3201
3202 void
3203 rf_cleanup_config_set(RF_ConfigSet_t *cset)
3204 {
3205 RF_AutoConfig_t *ac;
3206 RF_AutoConfig_t *next_ac;
3207
3208 ac = cset->ac;
3209 while(ac!=NULL) {
3210 next_ac = ac->next;
3211 /* nuke the label */
3212 free(ac->clabel, M_RAIDFRAME);
3213 /* cleanup the config structure */
3214 free(ac, M_RAIDFRAME);
3215 /* "next.." */
3216 ac = next_ac;
3217 }
3218 /* and, finally, nuke the config set */
3219 free(cset, M_RAIDFRAME);
3220 }
3221
3222
3223 void
3224 raid_init_component_label(RF_Raid_t *raidPtr, RF_ComponentLabel_t *clabel)
3225 {
3226 /* current version number */
3227 clabel->version = RF_COMPONENT_LABEL_VERSION;
3228 clabel->serial_number = raidPtr->serial_number;
3229 clabel->mod_counter = raidPtr->mod_counter;
3230 clabel->num_rows = 1;
3231 clabel->num_columns = raidPtr->numCol;
3232 clabel->clean = RF_RAID_DIRTY; /* not clean */
3233 clabel->status = rf_ds_optimal; /* "It's good!" */
3234
3235 clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
3236 clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
3237 clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
3238
3239 clabel->blockSize = raidPtr->bytesPerSector;
3240 clabel->numBlocks = raidPtr->sectorsPerDisk;
3241
3242 /* XXX not portable */
3243 clabel->parityConfig = raidPtr->Layout.map->parityConfig;
3244 clabel->maxOutstanding = raidPtr->maxOutstanding;
3245 clabel->autoconfigure = raidPtr->autoconfigure;
3246 clabel->root_partition = raidPtr->root_partition;
3247 clabel->last_unit = raidPtr->raidid;
3248 clabel->config_order = raidPtr->config_order;
3249 }
3250
3251 int
3252 rf_auto_config_set(RF_ConfigSet_t *cset, int *unit)
3253 {
3254 RF_Raid_t *raidPtr;
3255 RF_Config_t *config;
3256 int raidID;
3257 int retcode;
3258
3259 #if DEBUG
3260 printf("RAID autoconfigure\n");
3261 #endif
3262
3263 retcode = 0;
3264 *unit = -1;
3265
3266 /* 1. Create a config structure */
3267
3268 config = (RF_Config_t *)malloc(sizeof(RF_Config_t),
3269 M_RAIDFRAME,
3270 M_NOWAIT);
3271 if (config==NULL) {
3272 printf("Out of mem!?!?\n");
3273 /* XXX do something more intelligent here. */
3274 return(1);
3275 }
3276
3277 memset(config, 0, sizeof(RF_Config_t));
3278
3279 /*
3280 2. Figure out what RAID ID this one is supposed to live at
3281 See if we can get the same RAID dev that it was configured
3282 on last time..
3283 */
3284
3285 raidID = cset->ac->clabel->last_unit;
3286 if ((raidID < 0) || (raidID >= numraid)) {
3287 /* let's not wander off into lala land. */
3288 raidID = numraid - 1;
3289 }
3290 if (raidPtrs[raidID]->valid != 0) {
3291
3292 /*
3293 Nope... Go looking for an alternative...
3294 Start high so we don't immediately use raid0 if that's
3295 not taken.
3296 */
3297
3298 for(raidID = numraid - 1; raidID >= 0; raidID--) {
3299 if (raidPtrs[raidID]->valid == 0) {
3300 /* can use this one! */
3301 break;
3302 }
3303 }
3304 }
3305
3306 if (raidID < 0) {
3307 /* punt... */
3308 printf("Unable to auto configure this set!\n");
3309 printf("(Out of RAID devs!)\n");
3310 free(config, M_RAIDFRAME);
3311 return(1);
3312 }
3313
3314 #if DEBUG
3315 printf("Configuring raid%d:\n",raidID);
3316 #endif
3317
3318 raidPtr = raidPtrs[raidID];
3319
3320 /* XXX all this stuff should be done SOMEWHERE ELSE! */
3321 raidPtr->raidid = raidID;
3322 raidPtr->openings = RAIDOUTSTANDING;
3323
3324 /* 3. Build the configuration structure */
3325 rf_create_configuration(cset->ac, config, raidPtr);
3326
3327 /* 4. Do the configuration */
3328 retcode = rf_Configure(raidPtr, config, cset->ac);
3329
3330 if (retcode == 0) {
3331
3332 raidinit(raidPtrs[raidID]);
3333
3334 rf_markalldirty(raidPtrs[raidID]);
3335 raidPtrs[raidID]->autoconfigure = 1; /* XXX do this here? */
3336 if (cset->ac->clabel->root_partition==1) {
3337 /* everything configured just fine. Make a note
3338 that this set is eligible to be root. */
3339 cset->rootable = 1;
3340 /* XXX do this here? */
3341 raidPtrs[raidID]->root_partition = 1;
3342 }
3343 }
3344
3345 /* 5. Cleanup */
3346 free(config, M_RAIDFRAME);
3347
3348 *unit = raidID;
3349 return(retcode);
3350 }
3351
3352 void
3353 rf_disk_unbusy(RF_RaidAccessDesc_t *desc)
3354 {
3355 struct buf *bp;
3356
3357 bp = (struct buf *)desc->bp;
3358 disk_unbusy(&raid_softc[desc->raidPtr->raidid].sc_dkdev,
3359 (bp->b_bcount - bp->b_resid), (bp->b_flags & B_READ));
3360 }
3361
3362 void
3363 rf_pool_init(struct pool *p, size_t size, const char *w_chan,
3364 size_t xmin, size_t xmax)
3365 {
3366 pool_init(p, size, 0, 0, 0, w_chan, NULL);
3367 pool_sethiwat(p, xmax);
3368 pool_prime(p, xmin);
3369 pool_setlowat(p, xmin);
3370 }
3371
3372 /*
3373 * rf_buf_queue_check(int raidid) -- looks into the buf_queue to see
3374 * if there is IO pending and if that IO could possibly be done for a
3375 * given RAID set. Returns 0 if IO is waiting and can be done, 1
3376 * otherwise.
3377 *
3378 */
3379
3380 int
3381 rf_buf_queue_check(int raidid)
3382 {
3383 if ((BUFQ_PEEK(raid_softc[raidid].buf_queue) != NULL) &&
3384 raidPtrs[raidid]->openings > 0) {
3385 /* there is work to do */
3386 return 0;
3387 }
3388 /* default is nothing to do */
3389 return 1;
3390 }
3391
3392 int
3393 rf_getdisksize(struct vnode *vp, struct lwp *l, RF_RaidDisk_t *diskPtr)
3394 {
3395 struct partinfo dpart;
3396 struct dkwedge_info dkw;
3397 int error;
3398
3399 error = VOP_IOCTL(vp, DIOCGPART, &dpart, FREAD, l->l_cred, l);
3400 if (error == 0) {
3401 diskPtr->blockSize = dpart.disklab->d_secsize;
3402 diskPtr->numBlocks = dpart.part->p_size - rf_protectedSectors;
3403 diskPtr->partitionSize = dpart.part->p_size;
3404 return 0;
3405 }
3406
3407 error = VOP_IOCTL(vp, DIOCGWEDGEINFO, &dkw, FREAD, l->l_cred, l);
3408 if (error == 0) {
3409 diskPtr->blockSize = 512; /* XXX */
3410 diskPtr->numBlocks = dkw.dkw_size - rf_protectedSectors;
3411 diskPtr->partitionSize = dkw.dkw_size;
3412 return 0;
3413 }
3414 return error;
3415 }
3416
3417 static int
3418 raid_match(struct device *self, struct cfdata *cfdata, void *aux)
3419 {
3420 return 1;
3421 }
3422
3423 static void
3424 raid_attach(struct device *parent, struct device *self, void *aux)
3425 {
3426 struct raid_softc *rs = (struct raid_softc *)self;
3427
3428 }
3429
3430
3431 static int
3432 raid_detach(struct device *self, int flags)
3433 {
3434 struct raid_softc *rs = (struct raid_softc *)self;
3435
3436 if (rs->sc_flags & RAIDF_INITED)
3437 return EBUSY;
3438
3439 return 0;
3440 }
3441
3442
3443