rf_netbsdkintf.c revision 1.226 1 /* $NetBSD: rf_netbsdkintf.c,v 1.226 2007/03/09 15:57:34 oster Exp $ */
2 /*-
3 * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
4 * All rights reserved.
5 *
6 * This code is derived from software contributed to The NetBSD Foundation
7 * by Greg Oster; Jason R. Thorpe.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed by the NetBSD
20 * Foundation, Inc. and its contributors.
21 * 4. Neither the name of The NetBSD Foundation nor the names of its
22 * contributors may be used to endorse or promote products derived
23 * from this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
26 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
36 */
37
38 /*
39 * Copyright (c) 1990, 1993
40 * The Regents of the University of California. All rights reserved.
41 *
42 * This code is derived from software contributed to Berkeley by
43 * the Systems Programming Group of the University of Utah Computer
44 * Science Department.
45 *
46 * Redistribution and use in source and binary forms, with or without
47 * modification, are permitted provided that the following conditions
48 * are met:
49 * 1. Redistributions of source code must retain the above copyright
50 * notice, this list of conditions and the following disclaimer.
51 * 2. Redistributions in binary form must reproduce the above copyright
52 * notice, this list of conditions and the following disclaimer in the
53 * documentation and/or other materials provided with the distribution.
54 * 3. Neither the name of the University nor the names of its contributors
55 * may be used to endorse or promote products derived from this software
56 * without specific prior written permission.
57 *
58 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
59 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
60 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
61 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
62 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
63 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
64 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
65 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
66 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
67 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
68 * SUCH DAMAGE.
69 *
70 * from: Utah $Hdr: cd.c 1.6 90/11/28$
71 *
72 * @(#)cd.c 8.2 (Berkeley) 11/16/93
73 */
74
75 /*
76 * Copyright (c) 1988 University of Utah.
77 *
78 * This code is derived from software contributed to Berkeley by
79 * the Systems Programming Group of the University of Utah Computer
80 * Science Department.
81 *
82 * Redistribution and use in source and binary forms, with or without
83 * modification, are permitted provided that the following conditions
84 * are met:
85 * 1. Redistributions of source code must retain the above copyright
86 * notice, this list of conditions and the following disclaimer.
87 * 2. Redistributions in binary form must reproduce the above copyright
88 * notice, this list of conditions and the following disclaimer in the
89 * documentation and/or other materials provided with the distribution.
90 * 3. All advertising materials mentioning features or use of this software
91 * must display the following acknowledgement:
92 * This product includes software developed by the University of
93 * California, Berkeley and its contributors.
94 * 4. Neither the name of the University nor the names of its contributors
95 * may be used to endorse or promote products derived from this software
96 * without specific prior written permission.
97 *
98 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
99 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
100 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
101 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
102 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
103 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
104 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
105 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
106 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
107 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
108 * SUCH DAMAGE.
109 *
110 * from: Utah $Hdr: cd.c 1.6 90/11/28$
111 *
112 * @(#)cd.c 8.2 (Berkeley) 11/16/93
113 */
114
115 /*
116 * Copyright (c) 1995 Carnegie-Mellon University.
117 * All rights reserved.
118 *
119 * Authors: Mark Holland, Jim Zelenka
120 *
121 * Permission to use, copy, modify and distribute this software and
122 * its documentation is hereby granted, provided that both the copyright
123 * notice and this permission notice appear in all copies of the
124 * software, derivative works or modified versions, and any portions
125 * thereof, and that both notices appear in supporting documentation.
126 *
127 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
128 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
129 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
130 *
131 * Carnegie Mellon requests users of this software to return to
132 *
133 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
134 * School of Computer Science
135 * Carnegie Mellon University
136 * Pittsburgh PA 15213-3890
137 *
138 * any improvements or extensions that they make and grant Carnegie the
139 * rights to redistribute these changes.
140 */
141
142 /***********************************************************
143 *
144 * rf_kintf.c -- the kernel interface routines for RAIDframe
145 *
146 ***********************************************************/
147
148 #include <sys/cdefs.h>
149 __KERNEL_RCSID(0, "$NetBSD: rf_netbsdkintf.c,v 1.226 2007/03/09 15:57:34 oster Exp $");
150
151 #include <sys/param.h>
152 #include <sys/errno.h>
153 #include <sys/pool.h>
154 #include <sys/proc.h>
155 #include <sys/queue.h>
156 #include <sys/disk.h>
157 #include <sys/device.h>
158 #include <sys/stat.h>
159 #include <sys/ioctl.h>
160 #include <sys/fcntl.h>
161 #include <sys/systm.h>
162 #include <sys/namei.h>
163 #include <sys/vnode.h>
164 #include <sys/disklabel.h>
165 #include <sys/conf.h>
166 #include <sys/lock.h>
167 #include <sys/buf.h>
168 #include <sys/bufq.h>
169 #include <sys/user.h>
170 #include <sys/reboot.h>
171 #include <sys/kauth.h>
172
173 #include <dev/raidframe/raidframevar.h>
174 #include <dev/raidframe/raidframeio.h>
175 #include "raid.h"
176 #include "opt_raid_autoconfig.h"
177 #include "rf_raid.h"
178 #include "rf_copyback.h"
179 #include "rf_dag.h"
180 #include "rf_dagflags.h"
181 #include "rf_desc.h"
182 #include "rf_diskqueue.h"
183 #include "rf_etimer.h"
184 #include "rf_general.h"
185 #include "rf_kintf.h"
186 #include "rf_options.h"
187 #include "rf_driver.h"
188 #include "rf_parityscan.h"
189 #include "rf_threadstuff.h"
190
191 #ifdef DEBUG
192 int rf_kdebug_level = 0;
193 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
194 #else /* DEBUG */
195 #define db1_printf(a) { }
196 #endif /* DEBUG */
197
198 static RF_Raid_t **raidPtrs; /* global raid device descriptors */
199
200 RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex)
201
202 static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a
203 * spare table */
204 static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from
205 * installation process */
206
207 MALLOC_DEFINE(M_RAIDFRAME, "RAIDframe", "RAIDframe structures");
208
209 /* prototypes */
210 static void KernelWakeupFunc(struct buf *);
211 static void InitBP(struct buf *, struct vnode *, unsigned,
212 dev_t, RF_SectorNum_t, RF_SectorCount_t, void *, void (*) (struct buf *),
213 void *, int, struct proc *);
214 static void raidinit(RF_Raid_t *);
215
216 void raidattach(int);
217 static int raid_match(struct device *, struct cfdata *, void *);
218 static void raid_attach(struct device *, struct device *, void *);
219 static int raid_detach(struct device *, int);
220
221 dev_type_open(raidopen);
222 dev_type_close(raidclose);
223 dev_type_read(raidread);
224 dev_type_write(raidwrite);
225 dev_type_ioctl(raidioctl);
226 dev_type_strategy(raidstrategy);
227 dev_type_dump(raiddump);
228 dev_type_size(raidsize);
229
230 const struct bdevsw raid_bdevsw = {
231 raidopen, raidclose, raidstrategy, raidioctl,
232 raiddump, raidsize, D_DISK
233 };
234
235 const struct cdevsw raid_cdevsw = {
236 raidopen, raidclose, raidread, raidwrite, raidioctl,
237 nostop, notty, nopoll, nommap, nokqfilter, D_DISK
238 };
239
240 /* XXX Not sure if the following should be replacing the raidPtrs above,
241 or if it should be used in conjunction with that...
242 */
243
244 struct raid_softc {
245 struct device *sc_dev;
246 int sc_flags; /* flags */
247 int sc_cflags; /* configuration flags */
248 uint64_t sc_size; /* size of the raid device */
249 char sc_xname[20]; /* XXX external name */
250 struct disk sc_dkdev; /* generic disk device info */
251 struct bufq_state *buf_queue; /* used for the device queue */
252 };
253 /* sc_flags */
254 #define RAIDF_INITED 0x01 /* unit has been initialized */
255 #define RAIDF_WLABEL 0x02 /* label area is writable */
256 #define RAIDF_LABELLING 0x04 /* unit is currently being labelled */
257 #define RAIDF_WANTED 0x40 /* someone is waiting to obtain a lock */
258 #define RAIDF_LOCKED 0x80 /* unit is locked */
259
260 #define raidunit(x) DISKUNIT(x)
261 int numraid = 0;
262
263 extern struct cfdriver raid_cd;
264 CFATTACH_DECL(raid, sizeof(struct raid_softc),
265 raid_match, raid_attach, raid_detach, NULL);
266
267 /*
268 * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
269 * Be aware that large numbers can allow the driver to consume a lot of
270 * kernel memory, especially on writes, and in degraded mode reads.
271 *
272 * For example: with a stripe width of 64 blocks (32k) and 5 disks,
273 * a single 64K write will typically require 64K for the old data,
274 * 64K for the old parity, and 64K for the new parity, for a total
275 * of 192K (if the parity buffer is not re-used immediately).
276 * Even it if is used immediately, that's still 128K, which when multiplied
277 * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
278 *
279 * Now in degraded mode, for example, a 64K read on the above setup may
280 * require data reconstruction, which will require *all* of the 4 remaining
281 * disks to participate -- 4 * 32K/disk == 128K again.
282 */
283
284 #ifndef RAIDOUTSTANDING
285 #define RAIDOUTSTANDING 6
286 #endif
287
288 #define RAIDLABELDEV(dev) \
289 (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
290
291 /* declared here, and made public, for the benefit of KVM stuff.. */
292 struct raid_softc *raid_softc;
293
294 static void raidgetdefaultlabel(RF_Raid_t *, struct raid_softc *,
295 struct disklabel *);
296 static void raidgetdisklabel(dev_t);
297 static void raidmakedisklabel(struct raid_softc *);
298
299 static int raidlock(struct raid_softc *);
300 static void raidunlock(struct raid_softc *);
301
302 static void rf_markalldirty(RF_Raid_t *);
303
304 void rf_ReconThread(struct rf_recon_req *);
305 void rf_RewriteParityThread(RF_Raid_t *raidPtr);
306 void rf_CopybackThread(RF_Raid_t *raidPtr);
307 void rf_ReconstructInPlaceThread(struct rf_recon_req *);
308 int rf_autoconfig(struct device *self);
309 void rf_buildroothack(RF_ConfigSet_t *);
310
311 RF_AutoConfig_t *rf_find_raid_components(void);
312 RF_ConfigSet_t *rf_create_auto_sets(RF_AutoConfig_t *);
313 static int rf_does_it_fit(RF_ConfigSet_t *,RF_AutoConfig_t *);
314 static int rf_reasonable_label(RF_ComponentLabel_t *);
315 void rf_create_configuration(RF_AutoConfig_t *,RF_Config_t *, RF_Raid_t *);
316 int rf_set_autoconfig(RF_Raid_t *, int);
317 int rf_set_rootpartition(RF_Raid_t *, int);
318 void rf_release_all_vps(RF_ConfigSet_t *);
319 void rf_cleanup_config_set(RF_ConfigSet_t *);
320 int rf_have_enough_components(RF_ConfigSet_t *);
321 int rf_auto_config_set(RF_ConfigSet_t *, int *);
322
323 static int raidautoconfig = 0; /* Debugging, mostly. Set to 0 to not
324 allow autoconfig to take place.
325 Note that this is overridden by having
326 RAID_AUTOCONFIG as an option in the
327 kernel config file. */
328
329 struct RF_Pools_s rf_pools;
330
331 void
332 raidattach(int num)
333 {
334 int raidID;
335 int i, rc;
336
337 #ifdef DEBUG
338 printf("raidattach: Asked for %d units\n", num);
339 #endif
340
341 if (num <= 0) {
342 #ifdef DIAGNOSTIC
343 panic("raidattach: count <= 0");
344 #endif
345 return;
346 }
347 /* This is where all the initialization stuff gets done. */
348
349 numraid = num;
350
351 /* Make some space for requested number of units... */
352
353 RF_Malloc(raidPtrs, num * sizeof(RF_Raid_t *), (RF_Raid_t **));
354 if (raidPtrs == NULL) {
355 panic("raidPtrs is NULL!!");
356 }
357
358 rf_mutex_init(&rf_sparet_wait_mutex);
359
360 rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
361
362 for (i = 0; i < num; i++)
363 raidPtrs[i] = NULL;
364 rc = rf_BootRaidframe();
365 if (rc == 0)
366 printf("Kernelized RAIDframe activated\n");
367 else
368 panic("Serious error booting RAID!!");
369
370 /* put together some datastructures like the CCD device does.. This
371 * lets us lock the device and what-not when it gets opened. */
372
373 raid_softc = (struct raid_softc *)
374 malloc(num * sizeof(struct raid_softc),
375 M_RAIDFRAME, M_NOWAIT);
376 if (raid_softc == NULL) {
377 printf("WARNING: no memory for RAIDframe driver\n");
378 return;
379 }
380
381 memset(raid_softc, 0, num * sizeof(struct raid_softc));
382
383 for (raidID = 0; raidID < num; raidID++) {
384 bufq_alloc(&raid_softc[raidID].buf_queue, "fcfs", 0);
385
386 RF_Malloc(raidPtrs[raidID], sizeof(RF_Raid_t),
387 (RF_Raid_t *));
388 if (raidPtrs[raidID] == NULL) {
389 printf("WARNING: raidPtrs[%d] is NULL\n", raidID);
390 numraid = raidID;
391 return;
392 }
393 }
394
395 if (config_cfattach_attach(raid_cd.cd_name, &raid_ca)) {
396 printf("config_cfattach_attach failed?\n");
397 }
398
399 #ifdef RAID_AUTOCONFIG
400 raidautoconfig = 1;
401 #endif
402
403 /*
404 * Register a finalizer which will be used to auto-config RAID
405 * sets once all real hardware devices have been found.
406 */
407 if (config_finalize_register(NULL, rf_autoconfig) != 0)
408 printf("WARNING: unable to register RAIDframe finalizer\n");
409 }
410
411 int
412 rf_autoconfig(struct device *self)
413 {
414 RF_AutoConfig_t *ac_list;
415 RF_ConfigSet_t *config_sets;
416 int i;
417
418 if (raidautoconfig == 0)
419 return (0);
420
421 /* XXX This code can only be run once. */
422 raidautoconfig = 0;
423
424 /* 1. locate all RAID components on the system */
425 #ifdef DEBUG
426 printf("Searching for RAID components...\n");
427 #endif
428 ac_list = rf_find_raid_components();
429
430 /* 2. Sort them into their respective sets. */
431 config_sets = rf_create_auto_sets(ac_list);
432
433 /*
434 * 3. Evaluate each set andconfigure the valid ones.
435 * This gets done in rf_buildroothack().
436 */
437 rf_buildroothack(config_sets);
438
439 for (i = 0; i < numraid; i++)
440 if (raidPtrs[i] != NULL && raidPtrs[i]->valid)
441 dkwedge_discover(&raid_softc[i].sc_dkdev);
442
443 return 1;
444 }
445
446 void
447 rf_buildroothack(RF_ConfigSet_t *config_sets)
448 {
449 RF_ConfigSet_t *cset;
450 RF_ConfigSet_t *next_cset;
451 int retcode;
452 int raidID;
453 int rootID;
454 int col;
455 int num_root;
456 char *devname;
457
458 rootID = 0;
459 num_root = 0;
460 cset = config_sets;
461 while(cset != NULL ) {
462 next_cset = cset->next;
463 if (rf_have_enough_components(cset) &&
464 cset->ac->clabel->autoconfigure==1) {
465 retcode = rf_auto_config_set(cset,&raidID);
466 if (!retcode) {
467 #ifdef DEBUG
468 printf("raid%d: configured ok\n", raidID);
469 #endif
470 if (cset->rootable) {
471 rootID = raidID;
472 num_root++;
473 }
474 } else {
475 /* The autoconfig didn't work :( */
476 #ifdef DEBUG
477 printf("Autoconfig failed with code %d for raid%d\n", retcode, raidID);
478 #endif
479 rf_release_all_vps(cset);
480 }
481 } else {
482 #ifdef DEBUG
483 printf("raid%d: not enough components\n", raidID);
484 #endif
485 /* we're not autoconfiguring this set...
486 release the associated resources */
487 rf_release_all_vps(cset);
488 }
489 /* cleanup */
490 rf_cleanup_config_set(cset);
491 cset = next_cset;
492 }
493
494 /* if the user has specified what the root device should be
495 then we don't touch booted_device or boothowto... */
496
497 if (rootspec != NULL)
498 return;
499
500 /* we found something bootable... */
501
502 if (num_root == 1) {
503 booted_device = raid_softc[rootID].sc_dev;
504 } else if (num_root > 1) {
505
506 /*
507 * Maybe the MD code can help. If it cannot, then
508 * setroot() will discover that we have no
509 * booted_device and will ask the user if nothing was
510 * hardwired in the kernel config file
511 */
512
513 if (booted_device == NULL)
514 cpu_rootconf();
515 if (booted_device == NULL)
516 return;
517
518 num_root = 0;
519 for (raidID = 0; raidID < numraid; raidID++) {
520 if (raidPtrs[raidID]->valid == 0)
521 continue;
522
523 if (raidPtrs[raidID]->root_partition == 0)
524 continue;
525
526 for (col = 0; col < raidPtrs[raidID]->numCol; col++) {
527 devname = raidPtrs[raidID]->Disks[col].devname;
528 devname += sizeof("/dev/") - 1;
529 if (strncmp(devname, booted_device->dv_xname,
530 strlen(booted_device->dv_xname)) != 0)
531 continue;
532 #ifdef DEBUG
533 printf("raid%d includes boot device %s\n",
534 raidID, devname);
535 #endif
536 num_root++;
537 rootID = raidID;
538 }
539 }
540
541 if (num_root == 1) {
542 booted_device = raid_softc[rootID].sc_dev;
543 } else {
544 /* we can't guess.. require the user to answer... */
545 boothowto |= RB_ASKNAME;
546 }
547 }
548 }
549
550
551 int
552 raidsize(dev_t dev)
553 {
554 struct raid_softc *rs;
555 struct disklabel *lp;
556 int part, unit, omask, size;
557
558 unit = raidunit(dev);
559 if (unit >= numraid)
560 return (-1);
561 rs = &raid_softc[unit];
562
563 if ((rs->sc_flags & RAIDF_INITED) == 0)
564 return (-1);
565
566 part = DISKPART(dev);
567 omask = rs->sc_dkdev.dk_openmask & (1 << part);
568 lp = rs->sc_dkdev.dk_label;
569
570 if (omask == 0 && raidopen(dev, 0, S_IFBLK, curlwp))
571 return (-1);
572
573 if (lp->d_partitions[part].p_fstype != FS_SWAP)
574 size = -1;
575 else
576 size = lp->d_partitions[part].p_size *
577 (lp->d_secsize / DEV_BSIZE);
578
579 if (omask == 0 && raidclose(dev, 0, S_IFBLK, curlwp))
580 return (-1);
581
582 return (size);
583
584 }
585
586 int
587 raiddump(dev_t dev, daddr_t blkno, void *va,
588 size_t size)
589 {
590 /* Not implemented. */
591 return ENXIO;
592 }
593 /* ARGSUSED */
594 int
595 raidopen(dev_t dev, int flags, int fmt,
596 struct lwp *l)
597 {
598 int unit = raidunit(dev);
599 struct raid_softc *rs;
600 struct disklabel *lp;
601 int part, pmask;
602 int error = 0;
603
604 if (unit >= numraid)
605 return (ENXIO);
606 rs = &raid_softc[unit];
607
608 if ((error = raidlock(rs)) != 0)
609 return (error);
610 lp = rs->sc_dkdev.dk_label;
611
612 part = DISKPART(dev);
613
614 /*
615 * If there are wedges, and this is not RAW_PART, then we
616 * need to fail.
617 */
618 if (rs->sc_dkdev.dk_nwedges != 0 && part != RAW_PART) {
619 error = EBUSY;
620 goto bad;
621 }
622 pmask = (1 << part);
623
624 if ((rs->sc_flags & RAIDF_INITED) &&
625 (rs->sc_dkdev.dk_openmask == 0))
626 raidgetdisklabel(dev);
627
628 /* make sure that this partition exists */
629
630 if (part != RAW_PART) {
631 if (((rs->sc_flags & RAIDF_INITED) == 0) ||
632 ((part >= lp->d_npartitions) ||
633 (lp->d_partitions[part].p_fstype == FS_UNUSED))) {
634 error = ENXIO;
635 goto bad;
636 }
637 }
638 /* Prevent this unit from being unconfigured while open. */
639 switch (fmt) {
640 case S_IFCHR:
641 rs->sc_dkdev.dk_copenmask |= pmask;
642 break;
643
644 case S_IFBLK:
645 rs->sc_dkdev.dk_bopenmask |= pmask;
646 break;
647 }
648
649 if ((rs->sc_dkdev.dk_openmask == 0) &&
650 ((rs->sc_flags & RAIDF_INITED) != 0)) {
651 /* First one... mark things as dirty... Note that we *MUST*
652 have done a configure before this. I DO NOT WANT TO BE
653 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
654 THAT THEY BELONG TOGETHER!!!!! */
655 /* XXX should check to see if we're only open for reading
656 here... If so, we needn't do this, but then need some
657 other way of keeping track of what's happened.. */
658
659 rf_markalldirty( raidPtrs[unit] );
660 }
661
662
663 rs->sc_dkdev.dk_openmask =
664 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
665
666 bad:
667 raidunlock(rs);
668
669 return (error);
670
671
672 }
673 /* ARGSUSED */
674 int
675 raidclose(dev_t dev, int flags, int fmt, struct lwp *l)
676 {
677 int unit = raidunit(dev);
678 struct cfdata *cf;
679 struct raid_softc *rs;
680 int error = 0;
681 int part;
682
683 if (unit >= numraid)
684 return (ENXIO);
685 rs = &raid_softc[unit];
686
687 if ((error = raidlock(rs)) != 0)
688 return (error);
689
690 part = DISKPART(dev);
691
692 /* ...that much closer to allowing unconfiguration... */
693 switch (fmt) {
694 case S_IFCHR:
695 rs->sc_dkdev.dk_copenmask &= ~(1 << part);
696 break;
697
698 case S_IFBLK:
699 rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
700 break;
701 }
702 rs->sc_dkdev.dk_openmask =
703 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
704
705 if ((rs->sc_dkdev.dk_openmask == 0) &&
706 ((rs->sc_flags & RAIDF_INITED) != 0)) {
707 /* Last one... device is not unconfigured yet.
708 Device shutdown has taken care of setting the
709 clean bits if RAIDF_INITED is not set
710 mark things as clean... */
711
712 rf_update_component_labels(raidPtrs[unit],
713 RF_FINAL_COMPONENT_UPDATE);
714 if (doing_shutdown) {
715 /* last one, and we're going down, so
716 lights out for this RAID set too. */
717 error = rf_Shutdown(raidPtrs[unit]);
718
719 /* It's no longer initialized... */
720 rs->sc_flags &= ~RAIDF_INITED;
721
722 /* detach the device */
723
724 cf = device_cfdata(rs->sc_dev);
725 error = config_detach(rs->sc_dev, DETACH_QUIET);
726 free(cf, M_RAIDFRAME);
727
728 /* Detach the disk. */
729 pseudo_disk_detach(&rs->sc_dkdev);
730 }
731 }
732
733 raidunlock(rs);
734 return (0);
735
736 }
737
738 void
739 raidstrategy(struct buf *bp)
740 {
741 int s;
742
743 unsigned int raidID = raidunit(bp->b_dev);
744 RF_Raid_t *raidPtr;
745 struct raid_softc *rs = &raid_softc[raidID];
746 int wlabel;
747
748 if ((rs->sc_flags & RAIDF_INITED) ==0) {
749 bp->b_error = ENXIO;
750 bp->b_flags |= B_ERROR;
751 goto done;
752 }
753 if (raidID >= numraid || !raidPtrs[raidID]) {
754 bp->b_error = ENODEV;
755 bp->b_flags |= B_ERROR;
756 goto done;
757 }
758 raidPtr = raidPtrs[raidID];
759 if (!raidPtr->valid) {
760 bp->b_error = ENODEV;
761 bp->b_flags |= B_ERROR;
762 goto done;
763 }
764 if (bp->b_bcount == 0) {
765 db1_printf(("b_bcount is zero..\n"));
766 goto done;
767 }
768
769 /*
770 * Do bounds checking and adjust transfer. If there's an
771 * error, the bounds check will flag that for us.
772 */
773
774 wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
775 if (DISKPART(bp->b_dev) == RAW_PART) {
776 uint64_t size; /* device size in DEV_BSIZE unit */
777
778 if (raidPtr->logBytesPerSector > DEV_BSHIFT) {
779 size = raidPtr->totalSectors <<
780 (raidPtr->logBytesPerSector - DEV_BSHIFT);
781 } else {
782 size = raidPtr->totalSectors >>
783 (DEV_BSHIFT - raidPtr->logBytesPerSector);
784 }
785 if (bounds_check_with_mediasize(bp, DEV_BSIZE, size) <= 0) {
786 goto done;
787 }
788 } else {
789 if (bounds_check_with_label(&rs->sc_dkdev, bp, wlabel) <= 0) {
790 db1_printf(("Bounds check failed!!:%d %d\n",
791 (int) bp->b_blkno, (int) wlabel));
792 goto done;
793 }
794 }
795 s = splbio();
796
797 bp->b_resid = 0;
798
799 /* stuff it onto our queue */
800 BUFQ_PUT(rs->buf_queue, bp);
801
802 /* scheduled the IO to happen at the next convenient time */
803 wakeup(&(raidPtrs[raidID]->iodone));
804
805 splx(s);
806 return;
807
808 done:
809 bp->b_resid = bp->b_bcount;
810 biodone(bp);
811 }
812 /* ARGSUSED */
813 int
814 raidread(dev_t dev, struct uio *uio, int flags)
815 {
816 int unit = raidunit(dev);
817 struct raid_softc *rs;
818
819 if (unit >= numraid)
820 return (ENXIO);
821 rs = &raid_softc[unit];
822
823 if ((rs->sc_flags & RAIDF_INITED) == 0)
824 return (ENXIO);
825
826 return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
827
828 }
829 /* ARGSUSED */
830 int
831 raidwrite(dev_t dev, struct uio *uio, int flags)
832 {
833 int unit = raidunit(dev);
834 struct raid_softc *rs;
835
836 if (unit >= numraid)
837 return (ENXIO);
838 rs = &raid_softc[unit];
839
840 if ((rs->sc_flags & RAIDF_INITED) == 0)
841 return (ENXIO);
842
843 return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
844
845 }
846
847 int
848 raidioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l)
849 {
850 int unit = raidunit(dev);
851 int error = 0;
852 int part, pmask;
853 struct cfdata *cf;
854 struct raid_softc *rs;
855 RF_Config_t *k_cfg, *u_cfg;
856 RF_Raid_t *raidPtr;
857 RF_RaidDisk_t *diskPtr;
858 RF_AccTotals_t *totals;
859 RF_DeviceConfig_t *d_cfg, **ucfgp;
860 u_char *specific_buf;
861 int retcode = 0;
862 int column;
863 int raidid;
864 struct rf_recon_req *rrcopy, *rr;
865 RF_ComponentLabel_t *clabel;
866 RF_ComponentLabel_t *ci_label;
867 RF_ComponentLabel_t **clabel_ptr;
868 RF_SingleComponent_t *sparePtr,*componentPtr;
869 RF_SingleComponent_t component;
870 RF_ProgressInfo_t progressInfo, **progressInfoPtr;
871 int i, j, d;
872 #ifdef __HAVE_OLD_DISKLABEL
873 struct disklabel newlabel;
874 #endif
875 struct dkwedge_info *dkw;
876
877 if (unit >= numraid)
878 return (ENXIO);
879 rs = &raid_softc[unit];
880 raidPtr = raidPtrs[unit];
881
882 db1_printf(("raidioctl: %d %d %d %d\n", (int) dev,
883 (int) DISKPART(dev), (int) unit, (int) cmd));
884
885 /* Must be open for writes for these commands... */
886 switch (cmd) {
887 #ifdef DIOCGSECTORSIZE
888 case DIOCGSECTORSIZE:
889 *(u_int *)data = raidPtr->bytesPerSector;
890 return 0;
891 case DIOCGMEDIASIZE:
892 *(off_t *)data =
893 (off_t)raidPtr->totalSectors * raidPtr->bytesPerSector;
894 return 0;
895 #endif
896 case DIOCSDINFO:
897 case DIOCWDINFO:
898 #ifdef __HAVE_OLD_DISKLABEL
899 case ODIOCWDINFO:
900 case ODIOCSDINFO:
901 #endif
902 case DIOCWLABEL:
903 case DIOCAWEDGE:
904 case DIOCDWEDGE:
905 if ((flag & FWRITE) == 0)
906 return (EBADF);
907 }
908
909 /* Must be initialized for these... */
910 switch (cmd) {
911 case DIOCGDINFO:
912 case DIOCSDINFO:
913 case DIOCWDINFO:
914 #ifdef __HAVE_OLD_DISKLABEL
915 case ODIOCGDINFO:
916 case ODIOCWDINFO:
917 case ODIOCSDINFO:
918 case ODIOCGDEFLABEL:
919 #endif
920 case DIOCGPART:
921 case DIOCWLABEL:
922 case DIOCGDEFLABEL:
923 case DIOCAWEDGE:
924 case DIOCDWEDGE:
925 case DIOCLWEDGES:
926 case RAIDFRAME_SHUTDOWN:
927 case RAIDFRAME_REWRITEPARITY:
928 case RAIDFRAME_GET_INFO:
929 case RAIDFRAME_RESET_ACCTOTALS:
930 case RAIDFRAME_GET_ACCTOTALS:
931 case RAIDFRAME_KEEP_ACCTOTALS:
932 case RAIDFRAME_GET_SIZE:
933 case RAIDFRAME_FAIL_DISK:
934 case RAIDFRAME_COPYBACK:
935 case RAIDFRAME_CHECK_RECON_STATUS:
936 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
937 case RAIDFRAME_GET_COMPONENT_LABEL:
938 case RAIDFRAME_SET_COMPONENT_LABEL:
939 case RAIDFRAME_ADD_HOT_SPARE:
940 case RAIDFRAME_REMOVE_HOT_SPARE:
941 case RAIDFRAME_INIT_LABELS:
942 case RAIDFRAME_REBUILD_IN_PLACE:
943 case RAIDFRAME_CHECK_PARITY:
944 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
945 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
946 case RAIDFRAME_CHECK_COPYBACK_STATUS:
947 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
948 case RAIDFRAME_SET_AUTOCONFIG:
949 case RAIDFRAME_SET_ROOT:
950 case RAIDFRAME_DELETE_COMPONENT:
951 case RAIDFRAME_INCORPORATE_HOT_SPARE:
952 if ((rs->sc_flags & RAIDF_INITED) == 0)
953 return (ENXIO);
954 }
955
956 switch (cmd) {
957
958 /* configure the system */
959 case RAIDFRAME_CONFIGURE:
960
961 if (raidPtr->valid) {
962 /* There is a valid RAID set running on this unit! */
963 printf("raid%d: Device already configured!\n",unit);
964 return(EINVAL);
965 }
966
967 /* copy-in the configuration information */
968 /* data points to a pointer to the configuration structure */
969
970 u_cfg = *((RF_Config_t **) data);
971 RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
972 if (k_cfg == NULL) {
973 return (ENOMEM);
974 }
975 retcode = copyin(u_cfg, k_cfg, sizeof(RF_Config_t));
976 if (retcode) {
977 RF_Free(k_cfg, sizeof(RF_Config_t));
978 db1_printf(("rf_ioctl: retcode=%d copyin.1\n",
979 retcode));
980 return (retcode);
981 }
982 /* allocate a buffer for the layout-specific data, and copy it
983 * in */
984 if (k_cfg->layoutSpecificSize) {
985 if (k_cfg->layoutSpecificSize > 10000) {
986 /* sanity check */
987 RF_Free(k_cfg, sizeof(RF_Config_t));
988 return (EINVAL);
989 }
990 RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
991 (u_char *));
992 if (specific_buf == NULL) {
993 RF_Free(k_cfg, sizeof(RF_Config_t));
994 return (ENOMEM);
995 }
996 retcode = copyin(k_cfg->layoutSpecific, specific_buf,
997 k_cfg->layoutSpecificSize);
998 if (retcode) {
999 RF_Free(k_cfg, sizeof(RF_Config_t));
1000 RF_Free(specific_buf,
1001 k_cfg->layoutSpecificSize);
1002 db1_printf(("rf_ioctl: retcode=%d copyin.2\n",
1003 retcode));
1004 return (retcode);
1005 }
1006 } else
1007 specific_buf = NULL;
1008 k_cfg->layoutSpecific = specific_buf;
1009
1010 /* should do some kind of sanity check on the configuration.
1011 * Store the sum of all the bytes in the last byte? */
1012
1013 /* configure the system */
1014
1015 /*
1016 * Clear the entire RAID descriptor, just to make sure
1017 * there is no stale data left in the case of a
1018 * reconfiguration
1019 */
1020 memset((char *) raidPtr, 0, sizeof(RF_Raid_t));
1021 raidPtr->raidid = unit;
1022
1023 retcode = rf_Configure(raidPtr, k_cfg, NULL);
1024
1025 if (retcode == 0) {
1026
1027 /* allow this many simultaneous IO's to
1028 this RAID device */
1029 raidPtr->openings = RAIDOUTSTANDING;
1030
1031 raidinit(raidPtr);
1032 rf_markalldirty(raidPtr);
1033 }
1034 /* free the buffers. No return code here. */
1035 if (k_cfg->layoutSpecificSize) {
1036 RF_Free(specific_buf, k_cfg->layoutSpecificSize);
1037 }
1038 RF_Free(k_cfg, sizeof(RF_Config_t));
1039
1040 return (retcode);
1041
1042 /* shutdown the system */
1043 case RAIDFRAME_SHUTDOWN:
1044
1045 if ((error = raidlock(rs)) != 0)
1046 return (error);
1047
1048 /*
1049 * If somebody has a partition mounted, we shouldn't
1050 * shutdown.
1051 */
1052
1053 part = DISKPART(dev);
1054 pmask = (1 << part);
1055 if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
1056 ((rs->sc_dkdev.dk_bopenmask & pmask) &&
1057 (rs->sc_dkdev.dk_copenmask & pmask))) {
1058 raidunlock(rs);
1059 return (EBUSY);
1060 }
1061
1062 retcode = rf_Shutdown(raidPtr);
1063
1064 /* It's no longer initialized... */
1065 rs->sc_flags &= ~RAIDF_INITED;
1066
1067 /* free the pseudo device attach bits */
1068
1069 cf = device_cfdata(rs->sc_dev);
1070 /* XXX this causes us to not return any errors
1071 from the above call to rf_Shutdown() */
1072 retcode = config_detach(rs->sc_dev, DETACH_QUIET);
1073 free(cf, M_RAIDFRAME);
1074
1075 /* Detach the disk. */
1076 pseudo_disk_detach(&rs->sc_dkdev);
1077
1078 raidunlock(rs);
1079
1080 return (retcode);
1081 case RAIDFRAME_GET_COMPONENT_LABEL:
1082 clabel_ptr = (RF_ComponentLabel_t **) data;
1083 /* need to read the component label for the disk indicated
1084 by row,column in clabel */
1085
1086 /* For practice, let's get it directly fromdisk, rather
1087 than from the in-core copy */
1088 RF_Malloc( clabel, sizeof( RF_ComponentLabel_t ),
1089 (RF_ComponentLabel_t *));
1090 if (clabel == NULL)
1091 return (ENOMEM);
1092
1093 retcode = copyin( *clabel_ptr, clabel,
1094 sizeof(RF_ComponentLabel_t));
1095
1096 if (retcode) {
1097 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
1098 return(retcode);
1099 }
1100
1101 clabel->row = 0; /* Don't allow looking at anything else.*/
1102
1103 column = clabel->column;
1104
1105 if ((column < 0) || (column >= raidPtr->numCol +
1106 raidPtr->numSpare)) {
1107 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
1108 return(EINVAL);
1109 }
1110
1111 retcode = raidread_component_label(raidPtr->Disks[column].dev,
1112 raidPtr->raid_cinfo[column].ci_vp,
1113 clabel );
1114
1115 if (retcode == 0) {
1116 retcode = copyout(clabel, *clabel_ptr,
1117 sizeof(RF_ComponentLabel_t));
1118 }
1119 RF_Free(clabel, sizeof(RF_ComponentLabel_t));
1120 return (retcode);
1121
1122 case RAIDFRAME_SET_COMPONENT_LABEL:
1123 clabel = (RF_ComponentLabel_t *) data;
1124
1125 /* XXX check the label for valid stuff... */
1126 /* Note that some things *should not* get modified --
1127 the user should be re-initing the labels instead of
1128 trying to patch things.
1129 */
1130
1131 raidid = raidPtr->raidid;
1132 #ifdef DEBUG
1133 printf("raid%d: Got component label:\n", raidid);
1134 printf("raid%d: Version: %d\n", raidid, clabel->version);
1135 printf("raid%d: Serial Number: %d\n", raidid, clabel->serial_number);
1136 printf("raid%d: Mod counter: %d\n", raidid, clabel->mod_counter);
1137 printf("raid%d: Column: %d\n", raidid, clabel->column);
1138 printf("raid%d: Num Columns: %d\n", raidid, clabel->num_columns);
1139 printf("raid%d: Clean: %d\n", raidid, clabel->clean);
1140 printf("raid%d: Status: %d\n", raidid, clabel->status);
1141 #endif
1142 clabel->row = 0;
1143 column = clabel->column;
1144
1145 if ((column < 0) || (column >= raidPtr->numCol)) {
1146 return(EINVAL);
1147 }
1148
1149 /* XXX this isn't allowed to do anything for now :-) */
1150
1151 /* XXX and before it is, we need to fill in the rest
1152 of the fields!?!?!?! */
1153 #if 0
1154 raidwrite_component_label(
1155 raidPtr->Disks[column].dev,
1156 raidPtr->raid_cinfo[column].ci_vp,
1157 clabel );
1158 #endif
1159 return (0);
1160
1161 case RAIDFRAME_INIT_LABELS:
1162 clabel = (RF_ComponentLabel_t *) data;
1163 /*
1164 we only want the serial number from
1165 the above. We get all the rest of the information
1166 from the config that was used to create this RAID
1167 set.
1168 */
1169
1170 raidPtr->serial_number = clabel->serial_number;
1171
1172 RF_Malloc(ci_label, sizeof(RF_ComponentLabel_t),
1173 (RF_ComponentLabel_t *));
1174 if (ci_label == NULL)
1175 return (ENOMEM);
1176
1177 raid_init_component_label(raidPtr, ci_label);
1178 ci_label->serial_number = clabel->serial_number;
1179 ci_label->row = 0; /* we dont' pretend to support more */
1180
1181 for(column=0;column<raidPtr->numCol;column++) {
1182 diskPtr = &raidPtr->Disks[column];
1183 if (!RF_DEAD_DISK(diskPtr->status)) {
1184 ci_label->partitionSize = diskPtr->partitionSize;
1185 ci_label->column = column;
1186 raidwrite_component_label(
1187 raidPtr->Disks[column].dev,
1188 raidPtr->raid_cinfo[column].ci_vp,
1189 ci_label );
1190 }
1191 }
1192 RF_Free(ci_label, sizeof(RF_ComponentLabel_t));
1193
1194 return (retcode);
1195 case RAIDFRAME_SET_AUTOCONFIG:
1196 d = rf_set_autoconfig(raidPtr, *(int *) data);
1197 printf("raid%d: New autoconfig value is: %d\n",
1198 raidPtr->raidid, d);
1199 *(int *) data = d;
1200 return (retcode);
1201
1202 case RAIDFRAME_SET_ROOT:
1203 d = rf_set_rootpartition(raidPtr, *(int *) data);
1204 printf("raid%d: New rootpartition value is: %d\n",
1205 raidPtr->raidid, d);
1206 *(int *) data = d;
1207 return (retcode);
1208
1209 /* initialize all parity */
1210 case RAIDFRAME_REWRITEPARITY:
1211
1212 if (raidPtr->Layout.map->faultsTolerated == 0) {
1213 /* Parity for RAID 0 is trivially correct */
1214 raidPtr->parity_good = RF_RAID_CLEAN;
1215 return(0);
1216 }
1217
1218 if (raidPtr->parity_rewrite_in_progress == 1) {
1219 /* Re-write is already in progress! */
1220 return(EINVAL);
1221 }
1222
1223 retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
1224 rf_RewriteParityThread,
1225 raidPtr,"raid_parity");
1226 return (retcode);
1227
1228
1229 case RAIDFRAME_ADD_HOT_SPARE:
1230 sparePtr = (RF_SingleComponent_t *) data;
1231 memcpy( &component, sparePtr, sizeof(RF_SingleComponent_t));
1232 retcode = rf_add_hot_spare(raidPtr, &component);
1233 return(retcode);
1234
1235 case RAIDFRAME_REMOVE_HOT_SPARE:
1236 return(retcode);
1237
1238 case RAIDFRAME_DELETE_COMPONENT:
1239 componentPtr = (RF_SingleComponent_t *)data;
1240 memcpy( &component, componentPtr,
1241 sizeof(RF_SingleComponent_t));
1242 retcode = rf_delete_component(raidPtr, &component);
1243 return(retcode);
1244
1245 case RAIDFRAME_INCORPORATE_HOT_SPARE:
1246 componentPtr = (RF_SingleComponent_t *)data;
1247 memcpy( &component, componentPtr,
1248 sizeof(RF_SingleComponent_t));
1249 retcode = rf_incorporate_hot_spare(raidPtr, &component);
1250 return(retcode);
1251
1252 case RAIDFRAME_REBUILD_IN_PLACE:
1253
1254 if (raidPtr->Layout.map->faultsTolerated == 0) {
1255 /* Can't do this on a RAID 0!! */
1256 return(EINVAL);
1257 }
1258
1259 if (raidPtr->recon_in_progress == 1) {
1260 /* a reconstruct is already in progress! */
1261 return(EINVAL);
1262 }
1263
1264 componentPtr = (RF_SingleComponent_t *) data;
1265 memcpy( &component, componentPtr,
1266 sizeof(RF_SingleComponent_t));
1267 component.row = 0; /* we don't support any more */
1268 column = component.column;
1269
1270 if ((column < 0) || (column >= raidPtr->numCol)) {
1271 return(EINVAL);
1272 }
1273
1274 RF_LOCK_MUTEX(raidPtr->mutex);
1275 if ((raidPtr->Disks[column].status == rf_ds_optimal) &&
1276 (raidPtr->numFailures > 0)) {
1277 /* XXX 0 above shouldn't be constant!!! */
1278 /* some component other than this has failed.
1279 Let's not make things worse than they already
1280 are... */
1281 printf("raid%d: Unable to reconstruct to disk at:\n",
1282 raidPtr->raidid);
1283 printf("raid%d: Col: %d Too many failures.\n",
1284 raidPtr->raidid, column);
1285 RF_UNLOCK_MUTEX(raidPtr->mutex);
1286 return (EINVAL);
1287 }
1288 if (raidPtr->Disks[column].status ==
1289 rf_ds_reconstructing) {
1290 printf("raid%d: Unable to reconstruct to disk at:\n",
1291 raidPtr->raidid);
1292 printf("raid%d: Col: %d Reconstruction already occuring!\n", raidPtr->raidid, column);
1293
1294 RF_UNLOCK_MUTEX(raidPtr->mutex);
1295 return (EINVAL);
1296 }
1297 if (raidPtr->Disks[column].status == rf_ds_spared) {
1298 RF_UNLOCK_MUTEX(raidPtr->mutex);
1299 return (EINVAL);
1300 }
1301 RF_UNLOCK_MUTEX(raidPtr->mutex);
1302
1303 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1304 if (rrcopy == NULL)
1305 return(ENOMEM);
1306
1307 rrcopy->raidPtr = (void *) raidPtr;
1308 rrcopy->col = column;
1309
1310 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1311 rf_ReconstructInPlaceThread,
1312 rrcopy,"raid_reconip");
1313 return(retcode);
1314
1315 case RAIDFRAME_GET_INFO:
1316 if (!raidPtr->valid)
1317 return (ENODEV);
1318 ucfgp = (RF_DeviceConfig_t **) data;
1319 RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t),
1320 (RF_DeviceConfig_t *));
1321 if (d_cfg == NULL)
1322 return (ENOMEM);
1323 d_cfg->rows = 1; /* there is only 1 row now */
1324 d_cfg->cols = raidPtr->numCol;
1325 d_cfg->ndevs = raidPtr->numCol;
1326 if (d_cfg->ndevs >= RF_MAX_DISKS) {
1327 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1328 return (ENOMEM);
1329 }
1330 d_cfg->nspares = raidPtr->numSpare;
1331 if (d_cfg->nspares >= RF_MAX_DISKS) {
1332 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1333 return (ENOMEM);
1334 }
1335 d_cfg->maxqdepth = raidPtr->maxQueueDepth;
1336 d = 0;
1337 for (j = 0; j < d_cfg->cols; j++) {
1338 d_cfg->devs[d] = raidPtr->Disks[j];
1339 d++;
1340 }
1341 for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) {
1342 d_cfg->spares[i] = raidPtr->Disks[j];
1343 }
1344 retcode = copyout(d_cfg, *ucfgp, sizeof(RF_DeviceConfig_t));
1345 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1346
1347 return (retcode);
1348
1349 case RAIDFRAME_CHECK_PARITY:
1350 *(int *) data = raidPtr->parity_good;
1351 return (0);
1352
1353 case RAIDFRAME_RESET_ACCTOTALS:
1354 memset(&raidPtr->acc_totals, 0, sizeof(raidPtr->acc_totals));
1355 return (0);
1356
1357 case RAIDFRAME_GET_ACCTOTALS:
1358 totals = (RF_AccTotals_t *) data;
1359 *totals = raidPtr->acc_totals;
1360 return (0);
1361
1362 case RAIDFRAME_KEEP_ACCTOTALS:
1363 raidPtr->keep_acc_totals = *(int *)data;
1364 return (0);
1365
1366 case RAIDFRAME_GET_SIZE:
1367 *(int *) data = raidPtr->totalSectors;
1368 return (0);
1369
1370 /* fail a disk & optionally start reconstruction */
1371 case RAIDFRAME_FAIL_DISK:
1372
1373 if (raidPtr->Layout.map->faultsTolerated == 0) {
1374 /* Can't do this on a RAID 0!! */
1375 return(EINVAL);
1376 }
1377
1378 rr = (struct rf_recon_req *) data;
1379 rr->row = 0;
1380 if (rr->col < 0 || rr->col >= raidPtr->numCol)
1381 return (EINVAL);
1382
1383
1384 RF_LOCK_MUTEX(raidPtr->mutex);
1385 if (raidPtr->status == rf_rs_reconstructing) {
1386 /* you can't fail a disk while we're reconstructing! */
1387 /* XXX wrong for RAID6 */
1388 RF_UNLOCK_MUTEX(raidPtr->mutex);
1389 return (EINVAL);
1390 }
1391 if ((raidPtr->Disks[rr->col].status ==
1392 rf_ds_optimal) && (raidPtr->numFailures > 0)) {
1393 /* some other component has failed. Let's not make
1394 things worse. XXX wrong for RAID6 */
1395 RF_UNLOCK_MUTEX(raidPtr->mutex);
1396 return (EINVAL);
1397 }
1398 if (raidPtr->Disks[rr->col].status == rf_ds_spared) {
1399 /* Can't fail a spared disk! */
1400 RF_UNLOCK_MUTEX(raidPtr->mutex);
1401 return (EINVAL);
1402 }
1403 RF_UNLOCK_MUTEX(raidPtr->mutex);
1404
1405 /* make a copy of the recon request so that we don't rely on
1406 * the user's buffer */
1407 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1408 if (rrcopy == NULL)
1409 return(ENOMEM);
1410 memcpy(rrcopy, rr, sizeof(*rr));
1411 rrcopy->raidPtr = (void *) raidPtr;
1412
1413 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1414 rf_ReconThread,
1415 rrcopy,"raid_recon");
1416 return (0);
1417
1418 /* invoke a copyback operation after recon on whatever disk
1419 * needs it, if any */
1420 case RAIDFRAME_COPYBACK:
1421
1422 if (raidPtr->Layout.map->faultsTolerated == 0) {
1423 /* This makes no sense on a RAID 0!! */
1424 return(EINVAL);
1425 }
1426
1427 if (raidPtr->copyback_in_progress == 1) {
1428 /* Copyback is already in progress! */
1429 return(EINVAL);
1430 }
1431
1432 retcode = RF_CREATE_THREAD(raidPtr->copyback_thread,
1433 rf_CopybackThread,
1434 raidPtr,"raid_copyback");
1435 return (retcode);
1436
1437 /* return the percentage completion of reconstruction */
1438 case RAIDFRAME_CHECK_RECON_STATUS:
1439 if (raidPtr->Layout.map->faultsTolerated == 0) {
1440 /* This makes no sense on a RAID 0, so tell the
1441 user it's done. */
1442 *(int *) data = 100;
1443 return(0);
1444 }
1445 if (raidPtr->status != rf_rs_reconstructing)
1446 *(int *) data = 100;
1447 else {
1448 if (raidPtr->reconControl->numRUsTotal > 0) {
1449 *(int *) data = (raidPtr->reconControl->numRUsComplete * 100 / raidPtr->reconControl->numRUsTotal);
1450 } else {
1451 *(int *) data = 0;
1452 }
1453 }
1454 return (0);
1455 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
1456 progressInfoPtr = (RF_ProgressInfo_t **) data;
1457 if (raidPtr->status != rf_rs_reconstructing) {
1458 progressInfo.remaining = 0;
1459 progressInfo.completed = 100;
1460 progressInfo.total = 100;
1461 } else {
1462 progressInfo.total =
1463 raidPtr->reconControl->numRUsTotal;
1464 progressInfo.completed =
1465 raidPtr->reconControl->numRUsComplete;
1466 progressInfo.remaining = progressInfo.total -
1467 progressInfo.completed;
1468 }
1469 retcode = copyout(&progressInfo, *progressInfoPtr,
1470 sizeof(RF_ProgressInfo_t));
1471 return (retcode);
1472
1473 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1474 if (raidPtr->Layout.map->faultsTolerated == 0) {
1475 /* This makes no sense on a RAID 0, so tell the
1476 user it's done. */
1477 *(int *) data = 100;
1478 return(0);
1479 }
1480 if (raidPtr->parity_rewrite_in_progress == 1) {
1481 *(int *) data = 100 *
1482 raidPtr->parity_rewrite_stripes_done /
1483 raidPtr->Layout.numStripe;
1484 } else {
1485 *(int *) data = 100;
1486 }
1487 return (0);
1488
1489 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
1490 progressInfoPtr = (RF_ProgressInfo_t **) data;
1491 if (raidPtr->parity_rewrite_in_progress == 1) {
1492 progressInfo.total = raidPtr->Layout.numStripe;
1493 progressInfo.completed =
1494 raidPtr->parity_rewrite_stripes_done;
1495 progressInfo.remaining = progressInfo.total -
1496 progressInfo.completed;
1497 } else {
1498 progressInfo.remaining = 0;
1499 progressInfo.completed = 100;
1500 progressInfo.total = 100;
1501 }
1502 retcode = copyout(&progressInfo, *progressInfoPtr,
1503 sizeof(RF_ProgressInfo_t));
1504 return (retcode);
1505
1506 case RAIDFRAME_CHECK_COPYBACK_STATUS:
1507 if (raidPtr->Layout.map->faultsTolerated == 0) {
1508 /* This makes no sense on a RAID 0 */
1509 *(int *) data = 100;
1510 return(0);
1511 }
1512 if (raidPtr->copyback_in_progress == 1) {
1513 *(int *) data = 100 * raidPtr->copyback_stripes_done /
1514 raidPtr->Layout.numStripe;
1515 } else {
1516 *(int *) data = 100;
1517 }
1518 return (0);
1519
1520 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
1521 progressInfoPtr = (RF_ProgressInfo_t **) data;
1522 if (raidPtr->copyback_in_progress == 1) {
1523 progressInfo.total = raidPtr->Layout.numStripe;
1524 progressInfo.completed =
1525 raidPtr->copyback_stripes_done;
1526 progressInfo.remaining = progressInfo.total -
1527 progressInfo.completed;
1528 } else {
1529 progressInfo.remaining = 0;
1530 progressInfo.completed = 100;
1531 progressInfo.total = 100;
1532 }
1533 retcode = copyout(&progressInfo, *progressInfoPtr,
1534 sizeof(RF_ProgressInfo_t));
1535 return (retcode);
1536
1537 /* the sparetable daemon calls this to wait for the kernel to
1538 * need a spare table. this ioctl does not return until a
1539 * spare table is needed. XXX -- calling mpsleep here in the
1540 * ioctl code is almost certainly wrong and evil. -- XXX XXX
1541 * -- I should either compute the spare table in the kernel,
1542 * or have a different -- XXX XXX -- interface (a different
1543 * character device) for delivering the table -- XXX */
1544 #if 0
1545 case RAIDFRAME_SPARET_WAIT:
1546 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1547 while (!rf_sparet_wait_queue)
1548 mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE);
1549 waitreq = rf_sparet_wait_queue;
1550 rf_sparet_wait_queue = rf_sparet_wait_queue->next;
1551 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1552
1553 /* structure assignment */
1554 *((RF_SparetWait_t *) data) = *waitreq;
1555
1556 RF_Free(waitreq, sizeof(*waitreq));
1557 return (0);
1558
1559 /* wakes up a process waiting on SPARET_WAIT and puts an error
1560 * code in it that will cause the dameon to exit */
1561 case RAIDFRAME_ABORT_SPARET_WAIT:
1562 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1563 waitreq->fcol = -1;
1564 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1565 waitreq->next = rf_sparet_wait_queue;
1566 rf_sparet_wait_queue = waitreq;
1567 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1568 wakeup(&rf_sparet_wait_queue);
1569 return (0);
1570
1571 /* used by the spare table daemon to deliver a spare table
1572 * into the kernel */
1573 case RAIDFRAME_SEND_SPARET:
1574
1575 /* install the spare table */
1576 retcode = rf_SetSpareTable(raidPtr, *(void **) data);
1577
1578 /* respond to the requestor. the return status of the spare
1579 * table installation is passed in the "fcol" field */
1580 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1581 waitreq->fcol = retcode;
1582 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1583 waitreq->next = rf_sparet_resp_queue;
1584 rf_sparet_resp_queue = waitreq;
1585 wakeup(&rf_sparet_resp_queue);
1586 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1587
1588 return (retcode);
1589 #endif
1590
1591 default:
1592 break; /* fall through to the os-specific code below */
1593
1594 }
1595
1596 if (!raidPtr->valid)
1597 return (EINVAL);
1598
1599 /*
1600 * Add support for "regular" device ioctls here.
1601 */
1602
1603 switch (cmd) {
1604 case DIOCGDINFO:
1605 *(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
1606 break;
1607 #ifdef __HAVE_OLD_DISKLABEL
1608 case ODIOCGDINFO:
1609 newlabel = *(rs->sc_dkdev.dk_label);
1610 if (newlabel.d_npartitions > OLDMAXPARTITIONS)
1611 return ENOTTY;
1612 memcpy(data, &newlabel, sizeof (struct olddisklabel));
1613 break;
1614 #endif
1615
1616 case DIOCGPART:
1617 ((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
1618 ((struct partinfo *) data)->part =
1619 &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
1620 break;
1621
1622 case DIOCWDINFO:
1623 case DIOCSDINFO:
1624 #ifdef __HAVE_OLD_DISKLABEL
1625 case ODIOCWDINFO:
1626 case ODIOCSDINFO:
1627 #endif
1628 {
1629 struct disklabel *lp;
1630 #ifdef __HAVE_OLD_DISKLABEL
1631 if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) {
1632 memset(&newlabel, 0, sizeof newlabel);
1633 memcpy(&newlabel, data, sizeof (struct olddisklabel));
1634 lp = &newlabel;
1635 } else
1636 #endif
1637 lp = (struct disklabel *)data;
1638
1639 if ((error = raidlock(rs)) != 0)
1640 return (error);
1641
1642 rs->sc_flags |= RAIDF_LABELLING;
1643
1644 error = setdisklabel(rs->sc_dkdev.dk_label,
1645 lp, 0, rs->sc_dkdev.dk_cpulabel);
1646 if (error == 0) {
1647 if (cmd == DIOCWDINFO
1648 #ifdef __HAVE_OLD_DISKLABEL
1649 || cmd == ODIOCWDINFO
1650 #endif
1651 )
1652 error = writedisklabel(RAIDLABELDEV(dev),
1653 raidstrategy, rs->sc_dkdev.dk_label,
1654 rs->sc_dkdev.dk_cpulabel);
1655 }
1656 rs->sc_flags &= ~RAIDF_LABELLING;
1657
1658 raidunlock(rs);
1659
1660 if (error)
1661 return (error);
1662 break;
1663 }
1664
1665 case DIOCWLABEL:
1666 if (*(int *) data != 0)
1667 rs->sc_flags |= RAIDF_WLABEL;
1668 else
1669 rs->sc_flags &= ~RAIDF_WLABEL;
1670 break;
1671
1672 case DIOCGDEFLABEL:
1673 raidgetdefaultlabel(raidPtr, rs, (struct disklabel *) data);
1674 break;
1675
1676 #ifdef __HAVE_OLD_DISKLABEL
1677 case ODIOCGDEFLABEL:
1678 raidgetdefaultlabel(raidPtr, rs, &newlabel);
1679 if (newlabel.d_npartitions > OLDMAXPARTITIONS)
1680 return ENOTTY;
1681 memcpy(data, &newlabel, sizeof (struct olddisklabel));
1682 break;
1683 #endif
1684
1685 case DIOCAWEDGE:
1686 case DIOCDWEDGE:
1687 dkw = (void *)data;
1688
1689 /* If the ioctl happens here, the parent is us. */
1690 (void)strcpy(dkw->dkw_parent, rs->sc_xname);
1691 return cmd == DIOCAWEDGE ? dkwedge_add(dkw) : dkwedge_del(dkw);
1692
1693 case DIOCLWEDGES:
1694 return dkwedge_list(&rs->sc_dkdev,
1695 (struct dkwedge_list *)data, l);
1696
1697 default:
1698 retcode = ENOTTY;
1699 }
1700 return (retcode);
1701
1702 }
1703
1704
1705 /* raidinit -- complete the rest of the initialization for the
1706 RAIDframe device. */
1707
1708
1709 static void
1710 raidinit(RF_Raid_t *raidPtr)
1711 {
1712 struct cfdata *cf;
1713 struct raid_softc *rs;
1714 int unit;
1715
1716 unit = raidPtr->raidid;
1717
1718 rs = &raid_softc[unit];
1719
1720 /* XXX should check return code first... */
1721 rs->sc_flags |= RAIDF_INITED;
1722
1723 /* XXX doesn't check bounds. */
1724 snprintf(rs->sc_xname, sizeof(rs->sc_xname), "raid%d", unit);
1725
1726 rs->sc_dkdev.dk_name = rs->sc_xname;
1727
1728 /* attach the pseudo device */
1729 cf = malloc(sizeof(*cf), M_RAIDFRAME, M_WAITOK);
1730 cf->cf_name = raid_cd.cd_name;
1731 cf->cf_atname = raid_cd.cd_name;
1732 cf->cf_unit = unit;
1733 cf->cf_fstate = FSTATE_STAR;
1734
1735 rs->sc_dev = config_attach_pseudo(cf);
1736
1737 if (rs->sc_dev==NULL) {
1738 printf("raid%d: config_attach_pseudo failed\n",
1739 raidPtr->raidid);
1740 }
1741
1742 /* disk_attach actually creates space for the CPU disklabel, among
1743 * other things, so it's critical to call this *BEFORE* we try putzing
1744 * with disklabels. */
1745
1746 disk_attach(&rs->sc_dkdev);
1747
1748 /* XXX There may be a weird interaction here between this, and
1749 * protectedSectors, as used in RAIDframe. */
1750
1751 rs->sc_size = raidPtr->totalSectors;
1752 }
1753 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
1754 /* wake up the daemon & tell it to get us a spare table
1755 * XXX
1756 * the entries in the queues should be tagged with the raidPtr
1757 * so that in the extremely rare case that two recons happen at once,
1758 * we know for which device were requesting a spare table
1759 * XXX
1760 *
1761 * XXX This code is not currently used. GO
1762 */
1763 int
1764 rf_GetSpareTableFromDaemon(RF_SparetWait_t *req)
1765 {
1766 int retcode;
1767
1768 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1769 req->next = rf_sparet_wait_queue;
1770 rf_sparet_wait_queue = req;
1771 wakeup(&rf_sparet_wait_queue);
1772
1773 /* mpsleep unlocks the mutex */
1774 while (!rf_sparet_resp_queue) {
1775 tsleep(&rf_sparet_resp_queue, PRIBIO,
1776 "raidframe getsparetable", 0);
1777 }
1778 req = rf_sparet_resp_queue;
1779 rf_sparet_resp_queue = req->next;
1780 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1781
1782 retcode = req->fcol;
1783 RF_Free(req, sizeof(*req)); /* this is not the same req as we
1784 * alloc'd */
1785 return (retcode);
1786 }
1787 #endif
1788
1789 /* a wrapper around rf_DoAccess that extracts appropriate info from the
1790 * bp & passes it down.
1791 * any calls originating in the kernel must use non-blocking I/O
1792 * do some extra sanity checking to return "appropriate" error values for
1793 * certain conditions (to make some standard utilities work)
1794 *
1795 * Formerly known as: rf_DoAccessKernel
1796 */
1797 void
1798 raidstart(RF_Raid_t *raidPtr)
1799 {
1800 RF_SectorCount_t num_blocks, pb, sum;
1801 RF_RaidAddr_t raid_addr;
1802 struct partition *pp;
1803 daddr_t blocknum;
1804 int unit;
1805 struct raid_softc *rs;
1806 int do_async;
1807 struct buf *bp;
1808 int rc;
1809
1810 unit = raidPtr->raidid;
1811 rs = &raid_softc[unit];
1812
1813 /* quick check to see if anything has died recently */
1814 RF_LOCK_MUTEX(raidPtr->mutex);
1815 if (raidPtr->numNewFailures > 0) {
1816 RF_UNLOCK_MUTEX(raidPtr->mutex);
1817 rf_update_component_labels(raidPtr,
1818 RF_NORMAL_COMPONENT_UPDATE);
1819 RF_LOCK_MUTEX(raidPtr->mutex);
1820 raidPtr->numNewFailures--;
1821 }
1822
1823 /* Check to see if we're at the limit... */
1824 while (raidPtr->openings > 0) {
1825 RF_UNLOCK_MUTEX(raidPtr->mutex);
1826
1827 /* get the next item, if any, from the queue */
1828 if ((bp = BUFQ_GET(rs->buf_queue)) == NULL) {
1829 /* nothing more to do */
1830 return;
1831 }
1832
1833 /* Ok, for the bp we have here, bp->b_blkno is relative to the
1834 * partition.. Need to make it absolute to the underlying
1835 * device.. */
1836
1837 blocknum = bp->b_blkno;
1838 if (DISKPART(bp->b_dev) != RAW_PART) {
1839 pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
1840 blocknum += pp->p_offset;
1841 }
1842
1843 db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
1844 (int) blocknum));
1845
1846 db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
1847 db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
1848
1849 /* *THIS* is where we adjust what block we're going to...
1850 * but DO NOT TOUCH bp->b_blkno!!! */
1851 raid_addr = blocknum;
1852
1853 num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
1854 pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
1855 sum = raid_addr + num_blocks + pb;
1856 if (1 || rf_debugKernelAccess) {
1857 db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
1858 (int) raid_addr, (int) sum, (int) num_blocks,
1859 (int) pb, (int) bp->b_resid));
1860 }
1861 if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
1862 || (sum < num_blocks) || (sum < pb)) {
1863 bp->b_error = ENOSPC;
1864 bp->b_flags |= B_ERROR;
1865 bp->b_resid = bp->b_bcount;
1866 biodone(bp);
1867 RF_LOCK_MUTEX(raidPtr->mutex);
1868 continue;
1869 }
1870 /*
1871 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
1872 */
1873
1874 if (bp->b_bcount & raidPtr->sectorMask) {
1875 bp->b_error = EINVAL;
1876 bp->b_flags |= B_ERROR;
1877 bp->b_resid = bp->b_bcount;
1878 biodone(bp);
1879 RF_LOCK_MUTEX(raidPtr->mutex);
1880 continue;
1881
1882 }
1883 db1_printf(("Calling DoAccess..\n"));
1884
1885
1886 RF_LOCK_MUTEX(raidPtr->mutex);
1887 raidPtr->openings--;
1888 RF_UNLOCK_MUTEX(raidPtr->mutex);
1889
1890 /*
1891 * Everything is async.
1892 */
1893 do_async = 1;
1894
1895 disk_busy(&rs->sc_dkdev);
1896
1897 /* XXX we're still at splbio() here... do we *really*
1898 need to be? */
1899
1900 /* don't ever condition on bp->b_flags & B_WRITE.
1901 * always condition on B_READ instead */
1902
1903 rc = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
1904 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
1905 do_async, raid_addr, num_blocks,
1906 bp->b_data, bp, RF_DAG_NONBLOCKING_IO);
1907
1908 if (rc) {
1909 bp->b_error = rc;
1910 bp->b_flags |= B_ERROR;
1911 bp->b_resid = bp->b_bcount;
1912 biodone(bp);
1913 /* continue loop */
1914 }
1915
1916 RF_LOCK_MUTEX(raidPtr->mutex);
1917 }
1918 RF_UNLOCK_MUTEX(raidPtr->mutex);
1919 }
1920
1921
1922
1923
1924 /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */
1925
1926 int
1927 rf_DispatchKernelIO(RF_DiskQueue_t *queue, RF_DiskQueueData_t *req)
1928 {
1929 int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
1930 struct buf *bp;
1931
1932 req->queue = queue;
1933
1934 #if DIAGNOSTIC
1935 if (queue->raidPtr->raidid >= numraid) {
1936 printf("Invalid unit number: %d %d\n", queue->raidPtr->raidid,
1937 numraid);
1938 panic("Invalid Unit number in rf_DispatchKernelIO");
1939 }
1940 #endif
1941
1942 bp = req->bp;
1943
1944 switch (req->type) {
1945 case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */
1946 /* XXX need to do something extra here.. */
1947 /* I'm leaving this in, as I've never actually seen it used,
1948 * and I'd like folks to report it... GO */
1949 printf(("WAKEUP CALLED\n"));
1950 queue->numOutstanding++;
1951
1952 bp->b_flags = 0;
1953 bp->b_private = req;
1954
1955 KernelWakeupFunc(bp);
1956 break;
1957
1958 case RF_IO_TYPE_READ:
1959 case RF_IO_TYPE_WRITE:
1960 #if RF_ACC_TRACE > 0
1961 if (req->tracerec) {
1962 RF_ETIMER_START(req->tracerec->timer);
1963 }
1964 #endif
1965 InitBP(bp, queue->rf_cinfo->ci_vp,
1966 op, queue->rf_cinfo->ci_dev,
1967 req->sectorOffset, req->numSector,
1968 req->buf, KernelWakeupFunc, (void *) req,
1969 queue->raidPtr->logBytesPerSector, req->b_proc);
1970
1971 if (rf_debugKernelAccess) {
1972 db1_printf(("dispatch: bp->b_blkno = %ld\n",
1973 (long) bp->b_blkno));
1974 }
1975 queue->numOutstanding++;
1976 queue->last_deq_sector = req->sectorOffset;
1977 /* acc wouldn't have been let in if there were any pending
1978 * reqs at any other priority */
1979 queue->curPriority = req->priority;
1980
1981 db1_printf(("Going for %c to unit %d col %d\n",
1982 req->type, queue->raidPtr->raidid,
1983 queue->col));
1984 db1_printf(("sector %d count %d (%d bytes) %d\n",
1985 (int) req->sectorOffset, (int) req->numSector,
1986 (int) (req->numSector <<
1987 queue->raidPtr->logBytesPerSector),
1988 (int) queue->raidPtr->logBytesPerSector));
1989 VOP_STRATEGY(bp->b_vp, bp);
1990
1991 break;
1992
1993 default:
1994 panic("bad req->type in rf_DispatchKernelIO");
1995 }
1996 db1_printf(("Exiting from DispatchKernelIO\n"));
1997
1998 return (0);
1999 }
2000 /* this is the callback function associated with a I/O invoked from
2001 kernel code.
2002 */
2003 static void
2004 KernelWakeupFunc(struct buf *bp)
2005 {
2006 RF_DiskQueueData_t *req = NULL;
2007 RF_DiskQueue_t *queue;
2008 int s;
2009
2010 s = splbio();
2011 db1_printf(("recovering the request queue:\n"));
2012 req = bp->b_private;
2013
2014 queue = (RF_DiskQueue_t *) req->queue;
2015
2016 #if RF_ACC_TRACE > 0
2017 if (req->tracerec) {
2018 RF_ETIMER_STOP(req->tracerec->timer);
2019 RF_ETIMER_EVAL(req->tracerec->timer);
2020 RF_LOCK_MUTEX(rf_tracing_mutex);
2021 req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
2022 req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
2023 req->tracerec->num_phys_ios++;
2024 RF_UNLOCK_MUTEX(rf_tracing_mutex);
2025 }
2026 #endif
2027
2028 /* XXX Ok, let's get aggressive... If B_ERROR is set, let's go
2029 * ballistic, and mark the component as hosed... */
2030
2031 if (bp->b_flags & B_ERROR) {
2032 /* Mark the disk as dead */
2033 /* but only mark it once... */
2034 /* and only if it wouldn't leave this RAID set
2035 completely broken */
2036 if (((queue->raidPtr->Disks[queue->col].status ==
2037 rf_ds_optimal) ||
2038 (queue->raidPtr->Disks[queue->col].status ==
2039 rf_ds_used_spare)) &&
2040 (queue->raidPtr->numFailures <
2041 queue->raidPtr->Layout.map->faultsTolerated)) {
2042 printf("raid%d: IO Error. Marking %s as failed.\n",
2043 queue->raidPtr->raidid,
2044 queue->raidPtr->Disks[queue->col].devname);
2045 queue->raidPtr->Disks[queue->col].status =
2046 rf_ds_failed;
2047 queue->raidPtr->status = rf_rs_degraded;
2048 queue->raidPtr->numFailures++;
2049 queue->raidPtr->numNewFailures++;
2050 } else { /* Disk is already dead... */
2051 /* printf("Disk already marked as dead!\n"); */
2052 }
2053
2054 }
2055
2056 /* Fill in the error value */
2057
2058 req->error = (bp->b_flags & B_ERROR) ? bp->b_error : 0;
2059
2060 simple_lock(&queue->raidPtr->iodone_lock);
2061
2062 /* Drop this one on the "finished" queue... */
2063 TAILQ_INSERT_TAIL(&(queue->raidPtr->iodone), req, iodone_entries);
2064
2065 /* Let the raidio thread know there is work to be done. */
2066 wakeup(&(queue->raidPtr->iodone));
2067
2068 simple_unlock(&queue->raidPtr->iodone_lock);
2069
2070 splx(s);
2071 }
2072
2073
2074
2075 /*
2076 * initialize a buf structure for doing an I/O in the kernel.
2077 */
2078 static void
2079 InitBP(struct buf *bp, struct vnode *b_vp, unsigned rw_flag, dev_t dev,
2080 RF_SectorNum_t startSect, RF_SectorCount_t numSect, void *bf,
2081 void (*cbFunc) (struct buf *), void *cbArg, int logBytesPerSector,
2082 struct proc *b_proc)
2083 {
2084 /* bp->b_flags = B_PHYS | rw_flag; */
2085 bp->b_flags = B_CALL | rw_flag; /* XXX need B_PHYS here too??? */
2086 bp->b_bcount = numSect << logBytesPerSector;
2087 bp->b_bufsize = bp->b_bcount;
2088 bp->b_error = 0;
2089 bp->b_dev = dev;
2090 bp->b_data = bf;
2091 bp->b_blkno = startSect;
2092 bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */
2093 if (bp->b_bcount == 0) {
2094 panic("bp->b_bcount is zero in InitBP!!");
2095 }
2096 bp->b_proc = b_proc;
2097 bp->b_iodone = cbFunc;
2098 bp->b_private = cbArg;
2099 bp->b_vp = b_vp;
2100 if ((bp->b_flags & B_READ) == 0) {
2101 bp->b_vp->v_numoutput++;
2102 }
2103
2104 }
2105
2106 static void
2107 raidgetdefaultlabel(RF_Raid_t *raidPtr, struct raid_softc *rs,
2108 struct disklabel *lp)
2109 {
2110 memset(lp, 0, sizeof(*lp));
2111
2112 /* fabricate a label... */
2113 lp->d_secperunit = raidPtr->totalSectors;
2114 lp->d_secsize = raidPtr->bytesPerSector;
2115 lp->d_nsectors = raidPtr->Layout.dataSectorsPerStripe;
2116 lp->d_ntracks = 4 * raidPtr->numCol;
2117 lp->d_ncylinders = raidPtr->totalSectors /
2118 (lp->d_nsectors * lp->d_ntracks);
2119 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
2120
2121 strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
2122 lp->d_type = DTYPE_RAID;
2123 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
2124 lp->d_rpm = 3600;
2125 lp->d_interleave = 1;
2126 lp->d_flags = 0;
2127
2128 lp->d_partitions[RAW_PART].p_offset = 0;
2129 lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors;
2130 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
2131 lp->d_npartitions = RAW_PART + 1;
2132
2133 lp->d_magic = DISKMAGIC;
2134 lp->d_magic2 = DISKMAGIC;
2135 lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
2136
2137 }
2138 /*
2139 * Read the disklabel from the raid device. If one is not present, fake one
2140 * up.
2141 */
2142 static void
2143 raidgetdisklabel(dev_t dev)
2144 {
2145 int unit = raidunit(dev);
2146 struct raid_softc *rs = &raid_softc[unit];
2147 const char *errstring;
2148 struct disklabel *lp = rs->sc_dkdev.dk_label;
2149 struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel;
2150 RF_Raid_t *raidPtr;
2151
2152 db1_printf(("Getting the disklabel...\n"));
2153
2154 memset(clp, 0, sizeof(*clp));
2155
2156 raidPtr = raidPtrs[unit];
2157
2158 raidgetdefaultlabel(raidPtr, rs, lp);
2159
2160 /*
2161 * Call the generic disklabel extraction routine.
2162 */
2163 errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
2164 rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
2165 if (errstring)
2166 raidmakedisklabel(rs);
2167 else {
2168 int i;
2169 struct partition *pp;
2170
2171 /*
2172 * Sanity check whether the found disklabel is valid.
2173 *
2174 * This is necessary since total size of the raid device
2175 * may vary when an interleave is changed even though exactly
2176 * same components are used, and old disklabel may used
2177 * if that is found.
2178 */
2179 if (lp->d_secperunit != rs->sc_size)
2180 printf("raid%d: WARNING: %s: "
2181 "total sector size in disklabel (%d) != "
2182 "the size of raid (%ld)\n", unit, rs->sc_xname,
2183 lp->d_secperunit, (long) rs->sc_size);
2184 for (i = 0; i < lp->d_npartitions; i++) {
2185 pp = &lp->d_partitions[i];
2186 if (pp->p_offset + pp->p_size > rs->sc_size)
2187 printf("raid%d: WARNING: %s: end of partition `%c' "
2188 "exceeds the size of raid (%ld)\n",
2189 unit, rs->sc_xname, 'a' + i, (long) rs->sc_size);
2190 }
2191 }
2192
2193 }
2194 /*
2195 * Take care of things one might want to take care of in the event
2196 * that a disklabel isn't present.
2197 */
2198 static void
2199 raidmakedisklabel(struct raid_softc *rs)
2200 {
2201 struct disklabel *lp = rs->sc_dkdev.dk_label;
2202 db1_printf(("Making a label..\n"));
2203
2204 /*
2205 * For historical reasons, if there's no disklabel present
2206 * the raw partition must be marked FS_BSDFFS.
2207 */
2208
2209 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
2210
2211 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
2212
2213 lp->d_checksum = dkcksum(lp);
2214 }
2215 /*
2216 * Wait interruptibly for an exclusive lock.
2217 *
2218 * XXX
2219 * Several drivers do this; it should be abstracted and made MP-safe.
2220 * (Hmm... where have we seen this warning before :-> GO )
2221 */
2222 static int
2223 raidlock(struct raid_softc *rs)
2224 {
2225 int error;
2226
2227 while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
2228 rs->sc_flags |= RAIDF_WANTED;
2229 if ((error =
2230 tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
2231 return (error);
2232 }
2233 rs->sc_flags |= RAIDF_LOCKED;
2234 return (0);
2235 }
2236 /*
2237 * Unlock and wake up any waiters.
2238 */
2239 static void
2240 raidunlock(struct raid_softc *rs)
2241 {
2242
2243 rs->sc_flags &= ~RAIDF_LOCKED;
2244 if ((rs->sc_flags & RAIDF_WANTED) != 0) {
2245 rs->sc_flags &= ~RAIDF_WANTED;
2246 wakeup(rs);
2247 }
2248 }
2249
2250
2251 #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */
2252 #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */
2253
2254 int
2255 raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter)
2256 {
2257 RF_ComponentLabel_t clabel;
2258 raidread_component_label(dev, b_vp, &clabel);
2259 clabel.mod_counter = mod_counter;
2260 clabel.clean = RF_RAID_CLEAN;
2261 raidwrite_component_label(dev, b_vp, &clabel);
2262 return(0);
2263 }
2264
2265
2266 int
2267 raidmarkdirty(dev_t dev, struct vnode *b_vp, int mod_counter)
2268 {
2269 RF_ComponentLabel_t clabel;
2270 raidread_component_label(dev, b_vp, &clabel);
2271 clabel.mod_counter = mod_counter;
2272 clabel.clean = RF_RAID_DIRTY;
2273 raidwrite_component_label(dev, b_vp, &clabel);
2274 return(0);
2275 }
2276
2277 /* ARGSUSED */
2278 int
2279 raidread_component_label(dev_t dev, struct vnode *b_vp,
2280 RF_ComponentLabel_t *clabel)
2281 {
2282 struct buf *bp;
2283 const struct bdevsw *bdev;
2284 int error;
2285
2286 /* XXX should probably ensure that we don't try to do this if
2287 someone has changed rf_protected_sectors. */
2288
2289 if (b_vp == NULL) {
2290 /* For whatever reason, this component is not valid.
2291 Don't try to read a component label from it. */
2292 return(EINVAL);
2293 }
2294
2295 /* get a block of the appropriate size... */
2296 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2297 bp->b_dev = dev;
2298
2299 /* get our ducks in a row for the read */
2300 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2301 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2302 bp->b_flags |= B_READ;
2303 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2304
2305 bdev = bdevsw_lookup(bp->b_dev);
2306 if (bdev == NULL)
2307 return (ENXIO);
2308 (*bdev->d_strategy)(bp);
2309
2310 error = biowait(bp);
2311
2312 if (!error) {
2313 memcpy(clabel, bp->b_data,
2314 sizeof(RF_ComponentLabel_t));
2315 }
2316
2317 brelse(bp);
2318 return(error);
2319 }
2320 /* ARGSUSED */
2321 int
2322 raidwrite_component_label(dev_t dev, struct vnode *b_vp,
2323 RF_ComponentLabel_t *clabel)
2324 {
2325 struct buf *bp;
2326 const struct bdevsw *bdev;
2327 int error;
2328
2329 /* get a block of the appropriate size... */
2330 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2331 bp->b_dev = dev;
2332
2333 /* get our ducks in a row for the write */
2334 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2335 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2336 bp->b_flags |= B_WRITE;
2337 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2338
2339 memset(bp->b_data, 0, RF_COMPONENT_INFO_SIZE );
2340
2341 memcpy(bp->b_data, clabel, sizeof(RF_ComponentLabel_t));
2342
2343 bdev = bdevsw_lookup(bp->b_dev);
2344 if (bdev == NULL)
2345 return (ENXIO);
2346 (*bdev->d_strategy)(bp);
2347 error = biowait(bp);
2348 brelse(bp);
2349 if (error) {
2350 #if 1
2351 printf("Failed to write RAID component info!\n");
2352 #endif
2353 }
2354
2355 return(error);
2356 }
2357
2358 void
2359 rf_markalldirty(RF_Raid_t *raidPtr)
2360 {
2361 RF_ComponentLabel_t clabel;
2362 int sparecol;
2363 int c;
2364 int j;
2365 int scol = -1;
2366
2367 raidPtr->mod_counter++;
2368 for (c = 0; c < raidPtr->numCol; c++) {
2369 /* we don't want to touch (at all) a disk that has
2370 failed */
2371 if (!RF_DEAD_DISK(raidPtr->Disks[c].status)) {
2372 raidread_component_label(
2373 raidPtr->Disks[c].dev,
2374 raidPtr->raid_cinfo[c].ci_vp,
2375 &clabel);
2376 if (clabel.status == rf_ds_spared) {
2377 /* XXX do something special...
2378 but whatever you do, don't
2379 try to access it!! */
2380 } else {
2381 raidmarkdirty(
2382 raidPtr->Disks[c].dev,
2383 raidPtr->raid_cinfo[c].ci_vp,
2384 raidPtr->mod_counter);
2385 }
2386 }
2387 }
2388
2389 for( c = 0; c < raidPtr->numSpare ; c++) {
2390 sparecol = raidPtr->numCol + c;
2391 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
2392 /*
2393
2394 we claim this disk is "optimal" if it's
2395 rf_ds_used_spare, as that means it should be
2396 directly substitutable for the disk it replaced.
2397 We note that too...
2398
2399 */
2400
2401 for(j=0;j<raidPtr->numCol;j++) {
2402 if (raidPtr->Disks[j].spareCol == sparecol) {
2403 scol = j;
2404 break;
2405 }
2406 }
2407
2408 raidread_component_label(
2409 raidPtr->Disks[sparecol].dev,
2410 raidPtr->raid_cinfo[sparecol].ci_vp,
2411 &clabel);
2412 /* make sure status is noted */
2413
2414 raid_init_component_label(raidPtr, &clabel);
2415
2416 clabel.row = 0;
2417 clabel.column = scol;
2418 /* Note: we *don't* change status from rf_ds_used_spare
2419 to rf_ds_optimal */
2420 /* clabel.status = rf_ds_optimal; */
2421
2422 raidmarkdirty(raidPtr->Disks[sparecol].dev,
2423 raidPtr->raid_cinfo[sparecol].ci_vp,
2424 raidPtr->mod_counter);
2425 }
2426 }
2427 }
2428
2429
2430 void
2431 rf_update_component_labels(RF_Raid_t *raidPtr, int final)
2432 {
2433 RF_ComponentLabel_t clabel;
2434 int sparecol;
2435 int c;
2436 int j;
2437 int scol;
2438
2439 scol = -1;
2440
2441 /* XXX should do extra checks to make sure things really are clean,
2442 rather than blindly setting the clean bit... */
2443
2444 raidPtr->mod_counter++;
2445
2446 for (c = 0; c < raidPtr->numCol; c++) {
2447 if (raidPtr->Disks[c].status == rf_ds_optimal) {
2448 raidread_component_label(
2449 raidPtr->Disks[c].dev,
2450 raidPtr->raid_cinfo[c].ci_vp,
2451 &clabel);
2452 /* make sure status is noted */
2453 clabel.status = rf_ds_optimal;
2454
2455 /* bump the counter */
2456 clabel.mod_counter = raidPtr->mod_counter;
2457
2458 /* note what unit we are configured as */
2459 clabel.last_unit = raidPtr->raidid;
2460
2461 raidwrite_component_label(
2462 raidPtr->Disks[c].dev,
2463 raidPtr->raid_cinfo[c].ci_vp,
2464 &clabel);
2465 if (final == RF_FINAL_COMPONENT_UPDATE) {
2466 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2467 raidmarkclean(
2468 raidPtr->Disks[c].dev,
2469 raidPtr->raid_cinfo[c].ci_vp,
2470 raidPtr->mod_counter);
2471 }
2472 }
2473 }
2474 /* else we don't touch it.. */
2475 }
2476
2477 for( c = 0; c < raidPtr->numSpare ; c++) {
2478 sparecol = raidPtr->numCol + c;
2479 /* Need to ensure that the reconstruct actually completed! */
2480 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
2481 /*
2482
2483 we claim this disk is "optimal" if it's
2484 rf_ds_used_spare, as that means it should be
2485 directly substitutable for the disk it replaced.
2486 We note that too...
2487
2488 */
2489
2490 for(j=0;j<raidPtr->numCol;j++) {
2491 if (raidPtr->Disks[j].spareCol == sparecol) {
2492 scol = j;
2493 break;
2494 }
2495 }
2496
2497 /* XXX shouldn't *really* need this... */
2498 raidread_component_label(
2499 raidPtr->Disks[sparecol].dev,
2500 raidPtr->raid_cinfo[sparecol].ci_vp,
2501 &clabel);
2502 /* make sure status is noted */
2503
2504 raid_init_component_label(raidPtr, &clabel);
2505
2506 clabel.mod_counter = raidPtr->mod_counter;
2507 clabel.column = scol;
2508 clabel.status = rf_ds_optimal;
2509 clabel.last_unit = raidPtr->raidid;
2510
2511 raidwrite_component_label(
2512 raidPtr->Disks[sparecol].dev,
2513 raidPtr->raid_cinfo[sparecol].ci_vp,
2514 &clabel);
2515 if (final == RF_FINAL_COMPONENT_UPDATE) {
2516 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2517 raidmarkclean( raidPtr->Disks[sparecol].dev,
2518 raidPtr->raid_cinfo[sparecol].ci_vp,
2519 raidPtr->mod_counter);
2520 }
2521 }
2522 }
2523 }
2524 }
2525
2526 void
2527 rf_close_component(RF_Raid_t *raidPtr, struct vnode *vp, int auto_configured)
2528 {
2529 struct proc *p;
2530 struct lwp *l;
2531
2532 p = raidPtr->engine_thread;
2533 l = LIST_FIRST(&p->p_lwps);
2534
2535 if (vp != NULL) {
2536 if (auto_configured == 1) {
2537 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2538 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
2539 vput(vp);
2540
2541 } else {
2542 (void) vn_close(vp, FREAD | FWRITE, p->p_cred, l);
2543 }
2544 }
2545 }
2546
2547
2548 void
2549 rf_UnconfigureVnodes(RF_Raid_t *raidPtr)
2550 {
2551 int r,c;
2552 struct vnode *vp;
2553 int acd;
2554
2555
2556 /* We take this opportunity to close the vnodes like we should.. */
2557
2558 for (c = 0; c < raidPtr->numCol; c++) {
2559 vp = raidPtr->raid_cinfo[c].ci_vp;
2560 acd = raidPtr->Disks[c].auto_configured;
2561 rf_close_component(raidPtr, vp, acd);
2562 raidPtr->raid_cinfo[c].ci_vp = NULL;
2563 raidPtr->Disks[c].auto_configured = 0;
2564 }
2565
2566 for (r = 0; r < raidPtr->numSpare; r++) {
2567 vp = raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp;
2568 acd = raidPtr->Disks[raidPtr->numCol + r].auto_configured;
2569 rf_close_component(raidPtr, vp, acd);
2570 raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp = NULL;
2571 raidPtr->Disks[raidPtr->numCol + r].auto_configured = 0;
2572 }
2573 }
2574
2575
2576 void
2577 rf_ReconThread(struct rf_recon_req *req)
2578 {
2579 int s;
2580 RF_Raid_t *raidPtr;
2581
2582 s = splbio();
2583 raidPtr = (RF_Raid_t *) req->raidPtr;
2584 raidPtr->recon_in_progress = 1;
2585
2586 rf_FailDisk((RF_Raid_t *) req->raidPtr, req->col,
2587 ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
2588
2589 RF_Free(req, sizeof(*req));
2590
2591 raidPtr->recon_in_progress = 0;
2592 splx(s);
2593
2594 /* That's all... */
2595 kthread_exit(0); /* does not return */
2596 }
2597
2598 void
2599 rf_RewriteParityThread(RF_Raid_t *raidPtr)
2600 {
2601 int retcode;
2602 int s;
2603
2604 raidPtr->parity_rewrite_stripes_done = 0;
2605 raidPtr->parity_rewrite_in_progress = 1;
2606 s = splbio();
2607 retcode = rf_RewriteParity(raidPtr);
2608 splx(s);
2609 if (retcode) {
2610 printf("raid%d: Error re-writing parity!\n",raidPtr->raidid);
2611 } else {
2612 /* set the clean bit! If we shutdown correctly,
2613 the clean bit on each component label will get
2614 set */
2615 raidPtr->parity_good = RF_RAID_CLEAN;
2616 }
2617 raidPtr->parity_rewrite_in_progress = 0;
2618
2619 /* Anyone waiting for us to stop? If so, inform them... */
2620 if (raidPtr->waitShutdown) {
2621 wakeup(&raidPtr->parity_rewrite_in_progress);
2622 }
2623
2624 /* That's all... */
2625 kthread_exit(0); /* does not return */
2626 }
2627
2628
2629 void
2630 rf_CopybackThread(RF_Raid_t *raidPtr)
2631 {
2632 int s;
2633
2634 raidPtr->copyback_in_progress = 1;
2635 s = splbio();
2636 rf_CopybackReconstructedData(raidPtr);
2637 splx(s);
2638 raidPtr->copyback_in_progress = 0;
2639
2640 /* That's all... */
2641 kthread_exit(0); /* does not return */
2642 }
2643
2644
2645 void
2646 rf_ReconstructInPlaceThread(struct rf_recon_req *req)
2647 {
2648 int s;
2649 RF_Raid_t *raidPtr;
2650
2651 s = splbio();
2652 raidPtr = req->raidPtr;
2653 raidPtr->recon_in_progress = 1;
2654 rf_ReconstructInPlace(raidPtr, req->col);
2655 RF_Free(req, sizeof(*req));
2656 raidPtr->recon_in_progress = 0;
2657 splx(s);
2658
2659 /* That's all... */
2660 kthread_exit(0); /* does not return */
2661 }
2662
2663 static RF_AutoConfig_t *
2664 rf_get_component(RF_AutoConfig_t *ac_list, dev_t dev, struct vnode *vp,
2665 const char *cname, RF_SectorCount_t size)
2666 {
2667 int good_one = 0;
2668 RF_ComponentLabel_t *clabel;
2669 RF_AutoConfig_t *ac;
2670
2671 clabel = malloc(sizeof(RF_ComponentLabel_t), M_RAIDFRAME, M_NOWAIT);
2672 if (clabel == NULL) {
2673 oomem:
2674 while(ac_list) {
2675 ac = ac_list;
2676 if (ac->clabel)
2677 free(ac->clabel, M_RAIDFRAME);
2678 ac_list = ac_list->next;
2679 free(ac, M_RAIDFRAME);
2680 }
2681 printf("RAID auto config: out of memory!\n");
2682 return NULL; /* XXX probably should panic? */
2683 }
2684
2685 if (!raidread_component_label(dev, vp, clabel)) {
2686 /* Got the label. Does it look reasonable? */
2687 if (rf_reasonable_label(clabel) &&
2688 (clabel->partitionSize <= size)) {
2689 #ifdef DEBUG
2690 printf("Component on: %s: %llu\n",
2691 cname, (unsigned long long)size);
2692 rf_print_component_label(clabel);
2693 #endif
2694 /* if it's reasonable, add it, else ignore it. */
2695 ac = malloc(sizeof(RF_AutoConfig_t), M_RAIDFRAME,
2696 M_NOWAIT);
2697 if (ac == NULL) {
2698 free(clabel, M_RAIDFRAME);
2699 goto oomem;
2700 }
2701 strlcpy(ac->devname, cname, sizeof(ac->devname));
2702 ac->dev = dev;
2703 ac->vp = vp;
2704 ac->clabel = clabel;
2705 ac->next = ac_list;
2706 ac_list = ac;
2707 good_one = 1;
2708 }
2709 }
2710 if (!good_one) {
2711 /* cleanup */
2712 free(clabel, M_RAIDFRAME);
2713 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2714 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
2715 vput(vp);
2716 }
2717 return ac_list;
2718 }
2719
2720 RF_AutoConfig_t *
2721 rf_find_raid_components()
2722 {
2723 struct vnode *vp;
2724 struct disklabel label;
2725 struct device *dv;
2726 dev_t dev;
2727 int bmajor, bminor, wedge;
2728 int error;
2729 int i;
2730 RF_AutoConfig_t *ac_list;
2731
2732
2733 /* initialize the AutoConfig list */
2734 ac_list = NULL;
2735
2736 /* we begin by trolling through *all* the devices on the system */
2737
2738 for (dv = alldevs.tqh_first; dv != NULL;
2739 dv = dv->dv_list.tqe_next) {
2740
2741 /* we are only interested in disks... */
2742 if (device_class(dv) != DV_DISK)
2743 continue;
2744
2745 /* we don't care about floppies... */
2746 if (device_is_a(dv, "fd")) {
2747 continue;
2748 }
2749
2750 /* we don't care about CD's... */
2751 if (device_is_a(dv, "cd")) {
2752 continue;
2753 }
2754
2755 /* hdfd is the Atari/Hades floppy driver */
2756 if (device_is_a(dv, "hdfd")) {
2757 continue;
2758 }
2759
2760 /* fdisa is the Atari/Milan floppy driver */
2761 if (device_is_a(dv, "fdisa")) {
2762 continue;
2763 }
2764
2765 /* need to find the device_name_to_block_device_major stuff */
2766 bmajor = devsw_name2blk(dv->dv_xname, NULL, 0);
2767
2768 /* get a vnode for the raw partition of this disk */
2769
2770 wedge = device_is_a(dv, "dk");
2771 bminor = minor(device_unit(dv));
2772 dev = wedge ? makedev(bmajor, bminor) :
2773 MAKEDISKDEV(bmajor, bminor, RAW_PART);
2774 if (bdevvp(dev, &vp))
2775 panic("RAID can't alloc vnode");
2776
2777 error = VOP_OPEN(vp, FREAD, NOCRED, 0);
2778
2779 if (error) {
2780 /* "Who cares." Continue looking
2781 for something that exists*/
2782 vput(vp);
2783 continue;
2784 }
2785
2786 if (wedge) {
2787 struct dkwedge_info dkw;
2788 error = VOP_IOCTL(vp, DIOCGWEDGEINFO, &dkw, FREAD,
2789 NOCRED, 0);
2790 if (error) {
2791 printf("RAIDframe: can't get wedge info for "
2792 "dev %s (%d)\n", dv->dv_xname, error);
2793 out:
2794 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2795 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
2796 vput(vp);
2797 continue;
2798 }
2799
2800 if (strcmp(dkw.dkw_ptype, DKW_PTYPE_RAIDFRAME) != 0)
2801 goto out;
2802
2803 ac_list = rf_get_component(ac_list, dev, vp,
2804 dv->dv_xname, dkw.dkw_size);
2805 continue;
2806 }
2807
2808 /* Ok, the disk exists. Go get the disklabel. */
2809 error = VOP_IOCTL(vp, DIOCGDINFO, &label, FREAD, NOCRED, 0);
2810 if (error) {
2811 /*
2812 * XXX can't happen - open() would
2813 * have errored out (or faked up one)
2814 */
2815 if (error != ENOTTY)
2816 printf("RAIDframe: can't get label for dev "
2817 "%s (%d)\n", dv->dv_xname, error);
2818 }
2819
2820 /* don't need this any more. We'll allocate it again
2821 a little later if we really do... */
2822 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2823 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
2824 vput(vp);
2825
2826 if (error)
2827 continue;
2828
2829 for (i = 0; i < label.d_npartitions; i++) {
2830 char cname[sizeof(ac_list->devname)];
2831
2832 /* We only support partitions marked as RAID */
2833 if (label.d_partitions[i].p_fstype != FS_RAID)
2834 continue;
2835
2836 dev = MAKEDISKDEV(bmajor, device_unit(dv), i);
2837 if (bdevvp(dev, &vp))
2838 panic("RAID can't alloc vnode");
2839
2840 error = VOP_OPEN(vp, FREAD, NOCRED, 0);
2841 if (error) {
2842 /* Whatever... */
2843 vput(vp);
2844 continue;
2845 }
2846 snprintf(cname, sizeof(cname), "%s%c",
2847 dv->dv_xname, 'a' + i);
2848 ac_list = rf_get_component(ac_list, dev, vp, cname,
2849 label.d_partitions[i].p_size);
2850 }
2851 }
2852 return ac_list;
2853 }
2854
2855
2856 static int
2857 rf_reasonable_label(RF_ComponentLabel_t *clabel)
2858 {
2859
2860 if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) ||
2861 (clabel->version==RF_COMPONENT_LABEL_VERSION)) &&
2862 ((clabel->clean == RF_RAID_CLEAN) ||
2863 (clabel->clean == RF_RAID_DIRTY)) &&
2864 clabel->row >=0 &&
2865 clabel->column >= 0 &&
2866 clabel->num_rows > 0 &&
2867 clabel->num_columns > 0 &&
2868 clabel->row < clabel->num_rows &&
2869 clabel->column < clabel->num_columns &&
2870 clabel->blockSize > 0 &&
2871 clabel->numBlocks > 0) {
2872 /* label looks reasonable enough... */
2873 return(1);
2874 }
2875 return(0);
2876 }
2877
2878
2879 #ifdef DEBUG
2880 void
2881 rf_print_component_label(RF_ComponentLabel_t *clabel)
2882 {
2883 printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
2884 clabel->row, clabel->column,
2885 clabel->num_rows, clabel->num_columns);
2886 printf(" Version: %d Serial Number: %d Mod Counter: %d\n",
2887 clabel->version, clabel->serial_number,
2888 clabel->mod_counter);
2889 printf(" Clean: %s Status: %d\n",
2890 clabel->clean ? "Yes" : "No", clabel->status );
2891 printf(" sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
2892 clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
2893 printf(" RAID Level: %c blocksize: %d numBlocks: %d\n",
2894 (char) clabel->parityConfig, clabel->blockSize,
2895 clabel->numBlocks);
2896 printf(" Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No" );
2897 printf(" Contains root partition: %s\n",
2898 clabel->root_partition ? "Yes" : "No" );
2899 printf(" Last configured as: raid%d\n", clabel->last_unit );
2900 #if 0
2901 printf(" Config order: %d\n", clabel->config_order);
2902 #endif
2903
2904 }
2905 #endif
2906
2907 RF_ConfigSet_t *
2908 rf_create_auto_sets(RF_AutoConfig_t *ac_list)
2909 {
2910 RF_AutoConfig_t *ac;
2911 RF_ConfigSet_t *config_sets;
2912 RF_ConfigSet_t *cset;
2913 RF_AutoConfig_t *ac_next;
2914
2915
2916 config_sets = NULL;
2917
2918 /* Go through the AutoConfig list, and figure out which components
2919 belong to what sets. */
2920 ac = ac_list;
2921 while(ac!=NULL) {
2922 /* we're going to putz with ac->next, so save it here
2923 for use at the end of the loop */
2924 ac_next = ac->next;
2925
2926 if (config_sets == NULL) {
2927 /* will need at least this one... */
2928 config_sets = (RF_ConfigSet_t *)
2929 malloc(sizeof(RF_ConfigSet_t),
2930 M_RAIDFRAME, M_NOWAIT);
2931 if (config_sets == NULL) {
2932 panic("rf_create_auto_sets: No memory!");
2933 }
2934 /* this one is easy :) */
2935 config_sets->ac = ac;
2936 config_sets->next = NULL;
2937 config_sets->rootable = 0;
2938 ac->next = NULL;
2939 } else {
2940 /* which set does this component fit into? */
2941 cset = config_sets;
2942 while(cset!=NULL) {
2943 if (rf_does_it_fit(cset, ac)) {
2944 /* looks like it matches... */
2945 ac->next = cset->ac;
2946 cset->ac = ac;
2947 break;
2948 }
2949 cset = cset->next;
2950 }
2951 if (cset==NULL) {
2952 /* didn't find a match above... new set..*/
2953 cset = (RF_ConfigSet_t *)
2954 malloc(sizeof(RF_ConfigSet_t),
2955 M_RAIDFRAME, M_NOWAIT);
2956 if (cset == NULL) {
2957 panic("rf_create_auto_sets: No memory!");
2958 }
2959 cset->ac = ac;
2960 ac->next = NULL;
2961 cset->next = config_sets;
2962 cset->rootable = 0;
2963 config_sets = cset;
2964 }
2965 }
2966 ac = ac_next;
2967 }
2968
2969
2970 return(config_sets);
2971 }
2972
2973 static int
2974 rf_does_it_fit(RF_ConfigSet_t *cset, RF_AutoConfig_t *ac)
2975 {
2976 RF_ComponentLabel_t *clabel1, *clabel2;
2977
2978 /* If this one matches the *first* one in the set, that's good
2979 enough, since the other members of the set would have been
2980 through here too... */
2981 /* note that we are not checking partitionSize here..
2982
2983 Note that we are also not checking the mod_counters here.
2984 If everything else matches execpt the mod_counter, that's
2985 good enough for this test. We will deal with the mod_counters
2986 a little later in the autoconfiguration process.
2987
2988 (clabel1->mod_counter == clabel2->mod_counter) &&
2989
2990 The reason we don't check for this is that failed disks
2991 will have lower modification counts. If those disks are
2992 not added to the set they used to belong to, then they will
2993 form their own set, which may result in 2 different sets,
2994 for example, competing to be configured at raid0, and
2995 perhaps competing to be the root filesystem set. If the
2996 wrong ones get configured, or both attempt to become /,
2997 weird behaviour and or serious lossage will occur. Thus we
2998 need to bring them into the fold here, and kick them out at
2999 a later point.
3000
3001 */
3002
3003 clabel1 = cset->ac->clabel;
3004 clabel2 = ac->clabel;
3005 if ((clabel1->version == clabel2->version) &&
3006 (clabel1->serial_number == clabel2->serial_number) &&
3007 (clabel1->num_rows == clabel2->num_rows) &&
3008 (clabel1->num_columns == clabel2->num_columns) &&
3009 (clabel1->sectPerSU == clabel2->sectPerSU) &&
3010 (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
3011 (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
3012 (clabel1->parityConfig == clabel2->parityConfig) &&
3013 (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
3014 (clabel1->blockSize == clabel2->blockSize) &&
3015 (clabel1->numBlocks == clabel2->numBlocks) &&
3016 (clabel1->autoconfigure == clabel2->autoconfigure) &&
3017 (clabel1->root_partition == clabel2->root_partition) &&
3018 (clabel1->last_unit == clabel2->last_unit) &&
3019 (clabel1->config_order == clabel2->config_order)) {
3020 /* if it get's here, it almost *has* to be a match */
3021 } else {
3022 /* it's not consistent with somebody in the set..
3023 punt */
3024 return(0);
3025 }
3026 /* all was fine.. it must fit... */
3027 return(1);
3028 }
3029
3030 int
3031 rf_have_enough_components(RF_ConfigSet_t *cset)
3032 {
3033 RF_AutoConfig_t *ac;
3034 RF_AutoConfig_t *auto_config;
3035 RF_ComponentLabel_t *clabel;
3036 int c;
3037 int num_cols;
3038 int num_missing;
3039 int mod_counter;
3040 int mod_counter_found;
3041 int even_pair_failed;
3042 char parity_type;
3043
3044
3045 /* check to see that we have enough 'live' components
3046 of this set. If so, we can configure it if necessary */
3047
3048 num_cols = cset->ac->clabel->num_columns;
3049 parity_type = cset->ac->clabel->parityConfig;
3050
3051 /* XXX Check for duplicate components!?!?!? */
3052
3053 /* Determine what the mod_counter is supposed to be for this set. */
3054
3055 mod_counter_found = 0;
3056 mod_counter = 0;
3057 ac = cset->ac;
3058 while(ac!=NULL) {
3059 if (mod_counter_found==0) {
3060 mod_counter = ac->clabel->mod_counter;
3061 mod_counter_found = 1;
3062 } else {
3063 if (ac->clabel->mod_counter > mod_counter) {
3064 mod_counter = ac->clabel->mod_counter;
3065 }
3066 }
3067 ac = ac->next;
3068 }
3069
3070 num_missing = 0;
3071 auto_config = cset->ac;
3072
3073 even_pair_failed = 0;
3074 for(c=0; c<num_cols; c++) {
3075 ac = auto_config;
3076 while(ac!=NULL) {
3077 if ((ac->clabel->column == c) &&
3078 (ac->clabel->mod_counter == mod_counter)) {
3079 /* it's this one... */
3080 #ifdef DEBUG
3081 printf("Found: %s at %d\n",
3082 ac->devname,c);
3083 #endif
3084 break;
3085 }
3086 ac=ac->next;
3087 }
3088 if (ac==NULL) {
3089 /* Didn't find one here! */
3090 /* special case for RAID 1, especially
3091 where there are more than 2
3092 components (where RAIDframe treats
3093 things a little differently :( ) */
3094 if (parity_type == '1') {
3095 if (c%2 == 0) { /* even component */
3096 even_pair_failed = 1;
3097 } else { /* odd component. If
3098 we're failed, and
3099 so is the even
3100 component, it's
3101 "Good Night, Charlie" */
3102 if (even_pair_failed == 1) {
3103 return(0);
3104 }
3105 }
3106 } else {
3107 /* normal accounting */
3108 num_missing++;
3109 }
3110 }
3111 if ((parity_type == '1') && (c%2 == 1)) {
3112 /* Just did an even component, and we didn't
3113 bail.. reset the even_pair_failed flag,
3114 and go on to the next component.... */
3115 even_pair_failed = 0;
3116 }
3117 }
3118
3119 clabel = cset->ac->clabel;
3120
3121 if (((clabel->parityConfig == '0') && (num_missing > 0)) ||
3122 ((clabel->parityConfig == '4') && (num_missing > 1)) ||
3123 ((clabel->parityConfig == '5') && (num_missing > 1))) {
3124 /* XXX this needs to be made *much* more general */
3125 /* Too many failures */
3126 return(0);
3127 }
3128 /* otherwise, all is well, and we've got enough to take a kick
3129 at autoconfiguring this set */
3130 return(1);
3131 }
3132
3133 void
3134 rf_create_configuration(RF_AutoConfig_t *ac, RF_Config_t *config,
3135 RF_Raid_t *raidPtr)
3136 {
3137 RF_ComponentLabel_t *clabel;
3138 int i;
3139
3140 clabel = ac->clabel;
3141
3142 /* 1. Fill in the common stuff */
3143 config->numRow = clabel->num_rows = 1;
3144 config->numCol = clabel->num_columns;
3145 config->numSpare = 0; /* XXX should this be set here? */
3146 config->sectPerSU = clabel->sectPerSU;
3147 config->SUsPerPU = clabel->SUsPerPU;
3148 config->SUsPerRU = clabel->SUsPerRU;
3149 config->parityConfig = clabel->parityConfig;
3150 /* XXX... */
3151 strcpy(config->diskQueueType,"fifo");
3152 config->maxOutstandingDiskReqs = clabel->maxOutstanding;
3153 config->layoutSpecificSize = 0; /* XXX ?? */
3154
3155 while(ac!=NULL) {
3156 /* row/col values will be in range due to the checks
3157 in reasonable_label() */
3158 strcpy(config->devnames[0][ac->clabel->column],
3159 ac->devname);
3160 ac = ac->next;
3161 }
3162
3163 for(i=0;i<RF_MAXDBGV;i++) {
3164 config->debugVars[i][0] = 0;
3165 }
3166 }
3167
3168 int
3169 rf_set_autoconfig(RF_Raid_t *raidPtr, int new_value)
3170 {
3171 RF_ComponentLabel_t clabel;
3172 struct vnode *vp;
3173 dev_t dev;
3174 int column;
3175 int sparecol;
3176
3177 raidPtr->autoconfigure = new_value;
3178
3179 for(column=0; column<raidPtr->numCol; column++) {
3180 if (raidPtr->Disks[column].status == rf_ds_optimal) {
3181 dev = raidPtr->Disks[column].dev;
3182 vp = raidPtr->raid_cinfo[column].ci_vp;
3183 raidread_component_label(dev, vp, &clabel);
3184 clabel.autoconfigure = new_value;
3185 raidwrite_component_label(dev, vp, &clabel);
3186 }
3187 }
3188 for(column = 0; column < raidPtr->numSpare ; column++) {
3189 sparecol = raidPtr->numCol + column;
3190 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3191 dev = raidPtr->Disks[sparecol].dev;
3192 vp = raidPtr->raid_cinfo[sparecol].ci_vp;
3193 raidread_component_label(dev, vp, &clabel);
3194 clabel.autoconfigure = new_value;
3195 raidwrite_component_label(dev, vp, &clabel);
3196 }
3197 }
3198 return(new_value);
3199 }
3200
3201 int
3202 rf_set_rootpartition(RF_Raid_t *raidPtr, int new_value)
3203 {
3204 RF_ComponentLabel_t clabel;
3205 struct vnode *vp;
3206 dev_t dev;
3207 int column;
3208 int sparecol;
3209
3210 raidPtr->root_partition = new_value;
3211 for(column=0; column<raidPtr->numCol; column++) {
3212 if (raidPtr->Disks[column].status == rf_ds_optimal) {
3213 dev = raidPtr->Disks[column].dev;
3214 vp = raidPtr->raid_cinfo[column].ci_vp;
3215 raidread_component_label(dev, vp, &clabel);
3216 clabel.root_partition = new_value;
3217 raidwrite_component_label(dev, vp, &clabel);
3218 }
3219 }
3220 for(column = 0; column < raidPtr->numSpare ; column++) {
3221 sparecol = raidPtr->numCol + column;
3222 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3223 dev = raidPtr->Disks[sparecol].dev;
3224 vp = raidPtr->raid_cinfo[sparecol].ci_vp;
3225 raidread_component_label(dev, vp, &clabel);
3226 clabel.root_partition = new_value;
3227 raidwrite_component_label(dev, vp, &clabel);
3228 }
3229 }
3230 return(new_value);
3231 }
3232
3233 void
3234 rf_release_all_vps(RF_ConfigSet_t *cset)
3235 {
3236 RF_AutoConfig_t *ac;
3237
3238 ac = cset->ac;
3239 while(ac!=NULL) {
3240 /* Close the vp, and give it back */
3241 if (ac->vp) {
3242 vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY);
3243 VOP_CLOSE(ac->vp, FREAD, NOCRED, 0);
3244 vput(ac->vp);
3245 ac->vp = NULL;
3246 }
3247 ac = ac->next;
3248 }
3249 }
3250
3251
3252 void
3253 rf_cleanup_config_set(RF_ConfigSet_t *cset)
3254 {
3255 RF_AutoConfig_t *ac;
3256 RF_AutoConfig_t *next_ac;
3257
3258 ac = cset->ac;
3259 while(ac!=NULL) {
3260 next_ac = ac->next;
3261 /* nuke the label */
3262 free(ac->clabel, M_RAIDFRAME);
3263 /* cleanup the config structure */
3264 free(ac, M_RAIDFRAME);
3265 /* "next.." */
3266 ac = next_ac;
3267 }
3268 /* and, finally, nuke the config set */
3269 free(cset, M_RAIDFRAME);
3270 }
3271
3272
3273 void
3274 raid_init_component_label(RF_Raid_t *raidPtr, RF_ComponentLabel_t *clabel)
3275 {
3276 /* current version number */
3277 clabel->version = RF_COMPONENT_LABEL_VERSION;
3278 clabel->serial_number = raidPtr->serial_number;
3279 clabel->mod_counter = raidPtr->mod_counter;
3280 clabel->num_rows = 1;
3281 clabel->num_columns = raidPtr->numCol;
3282 clabel->clean = RF_RAID_DIRTY; /* not clean */
3283 clabel->status = rf_ds_optimal; /* "It's good!" */
3284
3285 clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
3286 clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
3287 clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
3288
3289 clabel->blockSize = raidPtr->bytesPerSector;
3290 clabel->numBlocks = raidPtr->sectorsPerDisk;
3291
3292 /* XXX not portable */
3293 clabel->parityConfig = raidPtr->Layout.map->parityConfig;
3294 clabel->maxOutstanding = raidPtr->maxOutstanding;
3295 clabel->autoconfigure = raidPtr->autoconfigure;
3296 clabel->root_partition = raidPtr->root_partition;
3297 clabel->last_unit = raidPtr->raidid;
3298 clabel->config_order = raidPtr->config_order;
3299 }
3300
3301 int
3302 rf_auto_config_set(RF_ConfigSet_t *cset, int *unit)
3303 {
3304 RF_Raid_t *raidPtr;
3305 RF_Config_t *config;
3306 int raidID;
3307 int retcode;
3308
3309 #ifdef DEBUG
3310 printf("RAID autoconfigure\n");
3311 #endif
3312
3313 retcode = 0;
3314 *unit = -1;
3315
3316 /* 1. Create a config structure */
3317
3318 config = (RF_Config_t *)malloc(sizeof(RF_Config_t),
3319 M_RAIDFRAME,
3320 M_NOWAIT);
3321 if (config==NULL) {
3322 printf("Out of mem!?!?\n");
3323 /* XXX do something more intelligent here. */
3324 return(1);
3325 }
3326
3327 memset(config, 0, sizeof(RF_Config_t));
3328
3329 /*
3330 2. Figure out what RAID ID this one is supposed to live at
3331 See if we can get the same RAID dev that it was configured
3332 on last time..
3333 */
3334
3335 raidID = cset->ac->clabel->last_unit;
3336 if ((raidID < 0) || (raidID >= numraid)) {
3337 /* let's not wander off into lala land. */
3338 raidID = numraid - 1;
3339 }
3340 if (raidPtrs[raidID]->valid != 0) {
3341
3342 /*
3343 Nope... Go looking for an alternative...
3344 Start high so we don't immediately use raid0 if that's
3345 not taken.
3346 */
3347
3348 for(raidID = numraid - 1; raidID >= 0; raidID--) {
3349 if (raidPtrs[raidID]->valid == 0) {
3350 /* can use this one! */
3351 break;
3352 }
3353 }
3354 }
3355
3356 if (raidID < 0) {
3357 /* punt... */
3358 printf("Unable to auto configure this set!\n");
3359 printf("(Out of RAID devs!)\n");
3360 free(config, M_RAIDFRAME);
3361 return(1);
3362 }
3363
3364 #ifdef DEBUG
3365 printf("Configuring raid%d:\n",raidID);
3366 #endif
3367
3368 raidPtr = raidPtrs[raidID];
3369
3370 /* XXX all this stuff should be done SOMEWHERE ELSE! */
3371 raidPtr->raidid = raidID;
3372 raidPtr->openings = RAIDOUTSTANDING;
3373
3374 /* 3. Build the configuration structure */
3375 rf_create_configuration(cset->ac, config, raidPtr);
3376
3377 /* 4. Do the configuration */
3378 retcode = rf_Configure(raidPtr, config, cset->ac);
3379
3380 if (retcode == 0) {
3381
3382 raidinit(raidPtrs[raidID]);
3383
3384 rf_markalldirty(raidPtrs[raidID]);
3385 raidPtrs[raidID]->autoconfigure = 1; /* XXX do this here? */
3386 if (cset->ac->clabel->root_partition==1) {
3387 /* everything configured just fine. Make a note
3388 that this set is eligible to be root. */
3389 cset->rootable = 1;
3390 /* XXX do this here? */
3391 raidPtrs[raidID]->root_partition = 1;
3392 }
3393 }
3394
3395 /* 5. Cleanup */
3396 free(config, M_RAIDFRAME);
3397
3398 *unit = raidID;
3399 return(retcode);
3400 }
3401
3402 void
3403 rf_disk_unbusy(RF_RaidAccessDesc_t *desc)
3404 {
3405 struct buf *bp;
3406
3407 bp = (struct buf *)desc->bp;
3408 disk_unbusy(&raid_softc[desc->raidPtr->raidid].sc_dkdev,
3409 (bp->b_bcount - bp->b_resid), (bp->b_flags & B_READ));
3410 }
3411
3412 void
3413 rf_pool_init(struct pool *p, size_t size, const char *w_chan,
3414 size_t xmin, size_t xmax)
3415 {
3416 pool_init(p, size, 0, 0, 0, w_chan, NULL);
3417 pool_sethiwat(p, xmax);
3418 pool_prime(p, xmin);
3419 pool_setlowat(p, xmin);
3420 }
3421
3422 /*
3423 * rf_buf_queue_check(int raidid) -- looks into the buf_queue to see
3424 * if there is IO pending and if that IO could possibly be done for a
3425 * given RAID set. Returns 0 if IO is waiting and can be done, 1
3426 * otherwise.
3427 *
3428 */
3429
3430 int
3431 rf_buf_queue_check(int raidid)
3432 {
3433 if ((BUFQ_PEEK(raid_softc[raidid].buf_queue) != NULL) &&
3434 raidPtrs[raidid]->openings > 0) {
3435 /* there is work to do */
3436 return 0;
3437 }
3438 /* default is nothing to do */
3439 return 1;
3440 }
3441
3442 int
3443 rf_getdisksize(struct vnode *vp, struct lwp *l, RF_RaidDisk_t *diskPtr)
3444 {
3445 struct partinfo dpart;
3446 struct dkwedge_info dkw;
3447 int error;
3448
3449 error = VOP_IOCTL(vp, DIOCGPART, &dpart, FREAD, l->l_cred, l);
3450 if (error == 0) {
3451 diskPtr->blockSize = dpart.disklab->d_secsize;
3452 diskPtr->numBlocks = dpart.part->p_size - rf_protectedSectors;
3453 diskPtr->partitionSize = dpart.part->p_size;
3454 return 0;
3455 }
3456
3457 error = VOP_IOCTL(vp, DIOCGWEDGEINFO, &dkw, FREAD, l->l_cred, l);
3458 if (error == 0) {
3459 diskPtr->blockSize = 512; /* XXX */
3460 diskPtr->numBlocks = dkw.dkw_size - rf_protectedSectors;
3461 diskPtr->partitionSize = dkw.dkw_size;
3462 return 0;
3463 }
3464 return error;
3465 }
3466
3467 static int
3468 raid_match(struct device *self, struct cfdata *cfdata,
3469 void *aux)
3470 {
3471 return 1;
3472 }
3473
3474 static void
3475 raid_attach(struct device *parent, struct device *self,
3476 void *aux)
3477 {
3478
3479 }
3480
3481
3482 static int
3483 raid_detach(struct device *self, int flags)
3484 {
3485 struct raid_softc *rs = (struct raid_softc *)self;
3486
3487 if (rs->sc_flags & RAIDF_INITED)
3488 return EBUSY;
3489
3490 return 0;
3491 }
3492
3493
3494