rf_netbsdkintf.c revision 1.226.2.2 1 /* $NetBSD: rf_netbsdkintf.c,v 1.226.2.2 2007/06/09 23:57:56 ad Exp $ */
2 /*-
3 * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
4 * All rights reserved.
5 *
6 * This code is derived from software contributed to The NetBSD Foundation
7 * by Greg Oster; Jason R. Thorpe.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed by the NetBSD
20 * Foundation, Inc. and its contributors.
21 * 4. Neither the name of The NetBSD Foundation nor the names of its
22 * contributors may be used to endorse or promote products derived
23 * from this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
26 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
36 */
37
38 /*
39 * Copyright (c) 1990, 1993
40 * The Regents of the University of California. All rights reserved.
41 *
42 * This code is derived from software contributed to Berkeley by
43 * the Systems Programming Group of the University of Utah Computer
44 * Science Department.
45 *
46 * Redistribution and use in source and binary forms, with or without
47 * modification, are permitted provided that the following conditions
48 * are met:
49 * 1. Redistributions of source code must retain the above copyright
50 * notice, this list of conditions and the following disclaimer.
51 * 2. Redistributions in binary form must reproduce the above copyright
52 * notice, this list of conditions and the following disclaimer in the
53 * documentation and/or other materials provided with the distribution.
54 * 3. Neither the name of the University nor the names of its contributors
55 * may be used to endorse or promote products derived from this software
56 * without specific prior written permission.
57 *
58 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
59 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
60 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
61 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
62 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
63 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
64 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
65 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
66 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
67 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
68 * SUCH DAMAGE.
69 *
70 * from: Utah $Hdr: cd.c 1.6 90/11/28$
71 *
72 * @(#)cd.c 8.2 (Berkeley) 11/16/93
73 */
74
75 /*
76 * Copyright (c) 1988 University of Utah.
77 *
78 * This code is derived from software contributed to Berkeley by
79 * the Systems Programming Group of the University of Utah Computer
80 * Science Department.
81 *
82 * Redistribution and use in source and binary forms, with or without
83 * modification, are permitted provided that the following conditions
84 * are met:
85 * 1. Redistributions of source code must retain the above copyright
86 * notice, this list of conditions and the following disclaimer.
87 * 2. Redistributions in binary form must reproduce the above copyright
88 * notice, this list of conditions and the following disclaimer in the
89 * documentation and/or other materials provided with the distribution.
90 * 3. All advertising materials mentioning features or use of this software
91 * must display the following acknowledgement:
92 * This product includes software developed by the University of
93 * California, Berkeley and its contributors.
94 * 4. Neither the name of the University nor the names of its contributors
95 * may be used to endorse or promote products derived from this software
96 * without specific prior written permission.
97 *
98 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
99 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
100 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
101 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
102 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
103 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
104 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
105 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
106 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
107 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
108 * SUCH DAMAGE.
109 *
110 * from: Utah $Hdr: cd.c 1.6 90/11/28$
111 *
112 * @(#)cd.c 8.2 (Berkeley) 11/16/93
113 */
114
115 /*
116 * Copyright (c) 1995 Carnegie-Mellon University.
117 * All rights reserved.
118 *
119 * Authors: Mark Holland, Jim Zelenka
120 *
121 * Permission to use, copy, modify and distribute this software and
122 * its documentation is hereby granted, provided that both the copyright
123 * notice and this permission notice appear in all copies of the
124 * software, derivative works or modified versions, and any portions
125 * thereof, and that both notices appear in supporting documentation.
126 *
127 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
128 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
129 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
130 *
131 * Carnegie Mellon requests users of this software to return to
132 *
133 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
134 * School of Computer Science
135 * Carnegie Mellon University
136 * Pittsburgh PA 15213-3890
137 *
138 * any improvements or extensions that they make and grant Carnegie the
139 * rights to redistribute these changes.
140 */
141
142 /***********************************************************
143 *
144 * rf_kintf.c -- the kernel interface routines for RAIDframe
145 *
146 ***********************************************************/
147
148 #include <sys/cdefs.h>
149 __KERNEL_RCSID(0, "$NetBSD: rf_netbsdkintf.c,v 1.226.2.2 2007/06/09 23:57:56 ad Exp $");
150
151 #include <sys/param.h>
152 #include <sys/errno.h>
153 #include <sys/pool.h>
154 #include <sys/proc.h>
155 #include <sys/queue.h>
156 #include <sys/disk.h>
157 #include <sys/device.h>
158 #include <sys/stat.h>
159 #include <sys/ioctl.h>
160 #include <sys/fcntl.h>
161 #include <sys/systm.h>
162 #include <sys/namei.h>
163 #include <sys/vnode.h>
164 #include <sys/disklabel.h>
165 #include <sys/conf.h>
166 #include <sys/lock.h>
167 #include <sys/buf.h>
168 #include <sys/bufq.h>
169 #include <sys/user.h>
170 #include <sys/reboot.h>
171 #include <sys/kauth.h>
172
173 #include <dev/raidframe/raidframevar.h>
174 #include <dev/raidframe/raidframeio.h>
175 #include "raid.h"
176 #include "opt_raid_autoconfig.h"
177 #include "rf_raid.h"
178 #include "rf_copyback.h"
179 #include "rf_dag.h"
180 #include "rf_dagflags.h"
181 #include "rf_desc.h"
182 #include "rf_diskqueue.h"
183 #include "rf_etimer.h"
184 #include "rf_general.h"
185 #include "rf_kintf.h"
186 #include "rf_options.h"
187 #include "rf_driver.h"
188 #include "rf_parityscan.h"
189 #include "rf_threadstuff.h"
190
191 #ifdef DEBUG
192 int rf_kdebug_level = 0;
193 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
194 #else /* DEBUG */
195 #define db1_printf(a) { }
196 #endif /* DEBUG */
197
198 static RF_Raid_t **raidPtrs; /* global raid device descriptors */
199
200 RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex)
201
202 static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a
203 * spare table */
204 static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from
205 * installation process */
206
207 MALLOC_DEFINE(M_RAIDFRAME, "RAIDframe", "RAIDframe structures");
208
209 /* prototypes */
210 static void KernelWakeupFunc(struct buf *);
211 static void InitBP(struct buf *, struct vnode *, unsigned,
212 dev_t, RF_SectorNum_t, RF_SectorCount_t, void *, void (*) (struct buf *),
213 void *, int, struct proc *);
214 static void raidinit(RF_Raid_t *);
215
216 void raidattach(int);
217 static int raid_match(struct device *, struct cfdata *, void *);
218 static void raid_attach(struct device *, struct device *, void *);
219 static int raid_detach(struct device *, int);
220
221 dev_type_open(raidopen);
222 dev_type_close(raidclose);
223 dev_type_read(raidread);
224 dev_type_write(raidwrite);
225 dev_type_ioctl(raidioctl);
226 dev_type_strategy(raidstrategy);
227 dev_type_dump(raiddump);
228 dev_type_size(raidsize);
229
230 const struct bdevsw raid_bdevsw = {
231 raidopen, raidclose, raidstrategy, raidioctl,
232 raiddump, raidsize, D_DISK
233 };
234
235 const struct cdevsw raid_cdevsw = {
236 raidopen, raidclose, raidread, raidwrite, raidioctl,
237 nostop, notty, nopoll, nommap, nokqfilter, D_DISK
238 };
239
240 /* XXX Not sure if the following should be replacing the raidPtrs above,
241 or if it should be used in conjunction with that...
242 */
243
244 struct raid_softc {
245 struct device *sc_dev;
246 int sc_flags; /* flags */
247 int sc_cflags; /* configuration flags */
248 uint64_t sc_size; /* size of the raid device */
249 char sc_xname[20]; /* XXX external name */
250 struct disk sc_dkdev; /* generic disk device info */
251 struct bufq_state *buf_queue; /* used for the device queue */
252 };
253 /* sc_flags */
254 #define RAIDF_INITED 0x01 /* unit has been initialized */
255 #define RAIDF_WLABEL 0x02 /* label area is writable */
256 #define RAIDF_LABELLING 0x04 /* unit is currently being labelled */
257 #define RAIDF_WANTED 0x40 /* someone is waiting to obtain a lock */
258 #define RAIDF_LOCKED 0x80 /* unit is locked */
259
260 #define raidunit(x) DISKUNIT(x)
261 int numraid = 0;
262
263 extern struct cfdriver raid_cd;
264 CFATTACH_DECL(raid, sizeof(struct raid_softc),
265 raid_match, raid_attach, raid_detach, NULL);
266
267 /*
268 * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
269 * Be aware that large numbers can allow the driver to consume a lot of
270 * kernel memory, especially on writes, and in degraded mode reads.
271 *
272 * For example: with a stripe width of 64 blocks (32k) and 5 disks,
273 * a single 64K write will typically require 64K for the old data,
274 * 64K for the old parity, and 64K for the new parity, for a total
275 * of 192K (if the parity buffer is not re-used immediately).
276 * Even it if is used immediately, that's still 128K, which when multiplied
277 * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
278 *
279 * Now in degraded mode, for example, a 64K read on the above setup may
280 * require data reconstruction, which will require *all* of the 4 remaining
281 * disks to participate -- 4 * 32K/disk == 128K again.
282 */
283
284 #ifndef RAIDOUTSTANDING
285 #define RAIDOUTSTANDING 6
286 #endif
287
288 #define RAIDLABELDEV(dev) \
289 (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
290
291 /* declared here, and made public, for the benefit of KVM stuff.. */
292 struct raid_softc *raid_softc;
293
294 static void raidgetdefaultlabel(RF_Raid_t *, struct raid_softc *,
295 struct disklabel *);
296 static void raidgetdisklabel(dev_t);
297 static void raidmakedisklabel(struct raid_softc *);
298
299 static int raidlock(struct raid_softc *);
300 static void raidunlock(struct raid_softc *);
301
302 static void rf_markalldirty(RF_Raid_t *);
303
304 void rf_ReconThread(struct rf_recon_req *);
305 void rf_RewriteParityThread(RF_Raid_t *raidPtr);
306 void rf_CopybackThread(RF_Raid_t *raidPtr);
307 void rf_ReconstructInPlaceThread(struct rf_recon_req *);
308 int rf_autoconfig(struct device *self);
309 void rf_buildroothack(RF_ConfigSet_t *);
310
311 RF_AutoConfig_t *rf_find_raid_components(void);
312 RF_ConfigSet_t *rf_create_auto_sets(RF_AutoConfig_t *);
313 static int rf_does_it_fit(RF_ConfigSet_t *,RF_AutoConfig_t *);
314 static int rf_reasonable_label(RF_ComponentLabel_t *);
315 void rf_create_configuration(RF_AutoConfig_t *,RF_Config_t *, RF_Raid_t *);
316 int rf_set_autoconfig(RF_Raid_t *, int);
317 int rf_set_rootpartition(RF_Raid_t *, int);
318 void rf_release_all_vps(RF_ConfigSet_t *);
319 void rf_cleanup_config_set(RF_ConfigSet_t *);
320 int rf_have_enough_components(RF_ConfigSet_t *);
321 int rf_auto_config_set(RF_ConfigSet_t *, int *);
322
323 static int raidautoconfig = 0; /* Debugging, mostly. Set to 0 to not
324 allow autoconfig to take place.
325 Note that this is overridden by having
326 RAID_AUTOCONFIG as an option in the
327 kernel config file. */
328
329 struct RF_Pools_s rf_pools;
330
331 void
332 raidattach(int num)
333 {
334 int raidID;
335 int i, rc;
336
337 #ifdef DEBUG
338 printf("raidattach: Asked for %d units\n", num);
339 #endif
340
341 if (num <= 0) {
342 #ifdef DIAGNOSTIC
343 panic("raidattach: count <= 0");
344 #endif
345 return;
346 }
347 /* This is where all the initialization stuff gets done. */
348
349 numraid = num;
350
351 /* Make some space for requested number of units... */
352
353 RF_Malloc(raidPtrs, num * sizeof(RF_Raid_t *), (RF_Raid_t **));
354 if (raidPtrs == NULL) {
355 panic("raidPtrs is NULL!!");
356 }
357
358 rf_mutex_init(&rf_sparet_wait_mutex);
359
360 rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
361
362 for (i = 0; i < num; i++)
363 raidPtrs[i] = NULL;
364 rc = rf_BootRaidframe();
365 if (rc == 0)
366 printf("Kernelized RAIDframe activated\n");
367 else
368 panic("Serious error booting RAID!!");
369
370 /* put together some datastructures like the CCD device does.. This
371 * lets us lock the device and what-not when it gets opened. */
372
373 raid_softc = (struct raid_softc *)
374 malloc(num * sizeof(struct raid_softc),
375 M_RAIDFRAME, M_NOWAIT);
376 if (raid_softc == NULL) {
377 printf("WARNING: no memory for RAIDframe driver\n");
378 return;
379 }
380
381 memset(raid_softc, 0, num * sizeof(struct raid_softc));
382
383 for (raidID = 0; raidID < num; raidID++) {
384 bufq_alloc(&raid_softc[raidID].buf_queue, "fcfs", 0);
385
386 RF_Malloc(raidPtrs[raidID], sizeof(RF_Raid_t),
387 (RF_Raid_t *));
388 if (raidPtrs[raidID] == NULL) {
389 printf("WARNING: raidPtrs[%d] is NULL\n", raidID);
390 numraid = raidID;
391 return;
392 }
393 }
394
395 if (config_cfattach_attach(raid_cd.cd_name, &raid_ca)) {
396 printf("config_cfattach_attach failed?\n");
397 }
398
399 #ifdef RAID_AUTOCONFIG
400 raidautoconfig = 1;
401 #endif
402
403 /*
404 * Register a finalizer which will be used to auto-config RAID
405 * sets once all real hardware devices have been found.
406 */
407 if (config_finalize_register(NULL, rf_autoconfig) != 0)
408 printf("WARNING: unable to register RAIDframe finalizer\n");
409 }
410
411 int
412 rf_autoconfig(struct device *self)
413 {
414 RF_AutoConfig_t *ac_list;
415 RF_ConfigSet_t *config_sets;
416 int i;
417
418 if (raidautoconfig == 0)
419 return (0);
420
421 /* XXX This code can only be run once. */
422 raidautoconfig = 0;
423
424 /* 1. locate all RAID components on the system */
425 #ifdef DEBUG
426 printf("Searching for RAID components...\n");
427 #endif
428 ac_list = rf_find_raid_components();
429
430 /* 2. Sort them into their respective sets. */
431 config_sets = rf_create_auto_sets(ac_list);
432
433 /*
434 * 3. Evaluate each set andconfigure the valid ones.
435 * This gets done in rf_buildroothack().
436 */
437 rf_buildroothack(config_sets);
438
439 for (i = 0; i < numraid; i++)
440 if (raidPtrs[i] != NULL && raidPtrs[i]->valid)
441 dkwedge_discover(&raid_softc[i].sc_dkdev);
442
443 return 1;
444 }
445
446 void
447 rf_buildroothack(RF_ConfigSet_t *config_sets)
448 {
449 RF_ConfigSet_t *cset;
450 RF_ConfigSet_t *next_cset;
451 int retcode;
452 int raidID;
453 int rootID;
454 int col;
455 int num_root;
456 char *devname;
457
458 rootID = 0;
459 num_root = 0;
460 cset = config_sets;
461 while(cset != NULL ) {
462 next_cset = cset->next;
463 if (rf_have_enough_components(cset) &&
464 cset->ac->clabel->autoconfigure==1) {
465 retcode = rf_auto_config_set(cset,&raidID);
466 if (!retcode) {
467 #ifdef DEBUG
468 printf("raid%d: configured ok\n", raidID);
469 #endif
470 if (cset->rootable) {
471 rootID = raidID;
472 num_root++;
473 }
474 } else {
475 /* The autoconfig didn't work :( */
476 #ifdef DEBUG
477 printf("Autoconfig failed with code %d for raid%d\n", retcode, raidID);
478 #endif
479 rf_release_all_vps(cset);
480 }
481 } else {
482 #ifdef DEBUG
483 printf("raid%d: not enough components\n", raidID);
484 #endif
485 /* we're not autoconfiguring this set...
486 release the associated resources */
487 rf_release_all_vps(cset);
488 }
489 /* cleanup */
490 rf_cleanup_config_set(cset);
491 cset = next_cset;
492 }
493
494 /* if the user has specified what the root device should be
495 then we don't touch booted_device or boothowto... */
496
497 if (rootspec != NULL)
498 return;
499
500 /* we found something bootable... */
501
502 if (num_root == 1) {
503 booted_device = raid_softc[rootID].sc_dev;
504 } else if (num_root > 1) {
505
506 /*
507 * Maybe the MD code can help. If it cannot, then
508 * setroot() will discover that we have no
509 * booted_device and will ask the user if nothing was
510 * hardwired in the kernel config file
511 */
512
513 if (booted_device == NULL)
514 cpu_rootconf();
515 if (booted_device == NULL)
516 return;
517
518 num_root = 0;
519 for (raidID = 0; raidID < numraid; raidID++) {
520 if (raidPtrs[raidID]->valid == 0)
521 continue;
522
523 if (raidPtrs[raidID]->root_partition == 0)
524 continue;
525
526 for (col = 0; col < raidPtrs[raidID]->numCol; col++) {
527 devname = raidPtrs[raidID]->Disks[col].devname;
528 devname += sizeof("/dev/") - 1;
529 if (strncmp(devname, booted_device->dv_xname,
530 strlen(booted_device->dv_xname)) != 0)
531 continue;
532 #ifdef DEBUG
533 printf("raid%d includes boot device %s\n",
534 raidID, devname);
535 #endif
536 num_root++;
537 rootID = raidID;
538 }
539 }
540
541 if (num_root == 1) {
542 booted_device = raid_softc[rootID].sc_dev;
543 } else {
544 /* we can't guess.. require the user to answer... */
545 boothowto |= RB_ASKNAME;
546 }
547 }
548 }
549
550
551 int
552 raidsize(dev_t dev)
553 {
554 struct raid_softc *rs;
555 struct disklabel *lp;
556 int part, unit, omask, size;
557
558 unit = raidunit(dev);
559 if (unit >= numraid)
560 return (-1);
561 rs = &raid_softc[unit];
562
563 if ((rs->sc_flags & RAIDF_INITED) == 0)
564 return (-1);
565
566 part = DISKPART(dev);
567 omask = rs->sc_dkdev.dk_openmask & (1 << part);
568 lp = rs->sc_dkdev.dk_label;
569
570 if (omask == 0 && raidopen(dev, 0, S_IFBLK, curlwp))
571 return (-1);
572
573 if (lp->d_partitions[part].p_fstype != FS_SWAP)
574 size = -1;
575 else
576 size = lp->d_partitions[part].p_size *
577 (lp->d_secsize / DEV_BSIZE);
578
579 if (omask == 0 && raidclose(dev, 0, S_IFBLK, curlwp))
580 return (-1);
581
582 return (size);
583
584 }
585
586 int
587 raiddump(dev_t dev, daddr_t blkno, void *va,
588 size_t size)
589 {
590 /* Not implemented. */
591 return ENXIO;
592 }
593 /* ARGSUSED */
594 int
595 raidopen(dev_t dev, int flags, int fmt,
596 struct lwp *l)
597 {
598 int unit = raidunit(dev);
599 struct raid_softc *rs;
600 struct disklabel *lp;
601 int part, pmask;
602 int error = 0;
603
604 if (unit >= numraid)
605 return (ENXIO);
606 rs = &raid_softc[unit];
607
608 if ((error = raidlock(rs)) != 0)
609 return (error);
610 lp = rs->sc_dkdev.dk_label;
611
612 part = DISKPART(dev);
613
614 /*
615 * If there are wedges, and this is not RAW_PART, then we
616 * need to fail.
617 */
618 if (rs->sc_dkdev.dk_nwedges != 0 && part != RAW_PART) {
619 error = EBUSY;
620 goto bad;
621 }
622 pmask = (1 << part);
623
624 if ((rs->sc_flags & RAIDF_INITED) &&
625 (rs->sc_dkdev.dk_openmask == 0))
626 raidgetdisklabel(dev);
627
628 /* make sure that this partition exists */
629
630 if (part != RAW_PART) {
631 if (((rs->sc_flags & RAIDF_INITED) == 0) ||
632 ((part >= lp->d_npartitions) ||
633 (lp->d_partitions[part].p_fstype == FS_UNUSED))) {
634 error = ENXIO;
635 goto bad;
636 }
637 }
638 /* Prevent this unit from being unconfigured while open. */
639 switch (fmt) {
640 case S_IFCHR:
641 rs->sc_dkdev.dk_copenmask |= pmask;
642 break;
643
644 case S_IFBLK:
645 rs->sc_dkdev.dk_bopenmask |= pmask;
646 break;
647 }
648
649 if ((rs->sc_dkdev.dk_openmask == 0) &&
650 ((rs->sc_flags & RAIDF_INITED) != 0)) {
651 /* First one... mark things as dirty... Note that we *MUST*
652 have done a configure before this. I DO NOT WANT TO BE
653 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
654 THAT THEY BELONG TOGETHER!!!!! */
655 /* XXX should check to see if we're only open for reading
656 here... If so, we needn't do this, but then need some
657 other way of keeping track of what's happened.. */
658
659 rf_markalldirty( raidPtrs[unit] );
660 }
661
662
663 rs->sc_dkdev.dk_openmask =
664 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
665
666 bad:
667 raidunlock(rs);
668
669 return (error);
670
671
672 }
673 /* ARGSUSED */
674 int
675 raidclose(dev_t dev, int flags, int fmt, struct lwp *l)
676 {
677 int unit = raidunit(dev);
678 struct cfdata *cf;
679 struct raid_softc *rs;
680 int error = 0;
681 int part;
682
683 if (unit >= numraid)
684 return (ENXIO);
685 rs = &raid_softc[unit];
686
687 if ((error = raidlock(rs)) != 0)
688 return (error);
689
690 part = DISKPART(dev);
691
692 /* ...that much closer to allowing unconfiguration... */
693 switch (fmt) {
694 case S_IFCHR:
695 rs->sc_dkdev.dk_copenmask &= ~(1 << part);
696 break;
697
698 case S_IFBLK:
699 rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
700 break;
701 }
702 rs->sc_dkdev.dk_openmask =
703 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
704
705 if ((rs->sc_dkdev.dk_openmask == 0) &&
706 ((rs->sc_flags & RAIDF_INITED) != 0)) {
707 /* Last one... device is not unconfigured yet.
708 Device shutdown has taken care of setting the
709 clean bits if RAIDF_INITED is not set
710 mark things as clean... */
711
712 rf_update_component_labels(raidPtrs[unit],
713 RF_FINAL_COMPONENT_UPDATE);
714 if (doing_shutdown) {
715 /* last one, and we're going down, so
716 lights out for this RAID set too. */
717 error = rf_Shutdown(raidPtrs[unit]);
718
719 /* It's no longer initialized... */
720 rs->sc_flags &= ~RAIDF_INITED;
721
722 /* detach the device */
723
724 cf = device_cfdata(rs->sc_dev);
725 error = config_detach(rs->sc_dev, DETACH_QUIET);
726 free(cf, M_RAIDFRAME);
727
728 /* Detach the disk. */
729 pseudo_disk_detach(&rs->sc_dkdev);
730 }
731 }
732
733 raidunlock(rs);
734 return (0);
735
736 }
737
738 void
739 raidstrategy(struct buf *bp)
740 {
741 int s, error = 0;
742
743 unsigned int raidID = raidunit(bp->b_dev);
744 RF_Raid_t *raidPtr;
745 struct raid_softc *rs = &raid_softc[raidID];
746 int wlabel;
747
748 if ((rs->sc_flags & RAIDF_INITED) ==0) {
749 error = ENXIO;
750 goto done;
751 }
752 if (raidID >= numraid || !raidPtrs[raidID]) {
753 error = ENODEV;
754 goto done;
755 }
756 raidPtr = raidPtrs[raidID];
757 if (!raidPtr->valid) {
758 error = ENODEV;
759 goto done;
760 }
761 if (bp->b_bcount == 0) {
762 db1_printf(("b_bcount is zero..\n"));
763 goto done;
764 }
765
766 /*
767 * Do bounds checking and adjust transfer. If there's an
768 * error, the bounds check will flag that for us.
769 */
770
771 wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
772 if (DISKPART(bp->b_dev) == RAW_PART) {
773 uint64_t size; /* device size in DEV_BSIZE unit */
774
775 if (raidPtr->logBytesPerSector > DEV_BSHIFT) {
776 size = raidPtr->totalSectors <<
777 (raidPtr->logBytesPerSector - DEV_BSHIFT);
778 } else {
779 size = raidPtr->totalSectors >>
780 (DEV_BSHIFT - raidPtr->logBytesPerSector);
781 }
782 if (bounds_check_with_mediasize(bp, DEV_BSIZE, size) <= 0) {
783 goto done;
784 }
785 } else {
786 if (bounds_check_with_label(&rs->sc_dkdev, bp, wlabel) <= 0) {
787 db1_printf(("Bounds check failed!!:%d %d\n",
788 (int) bp->b_blkno, (int) wlabel));
789 goto done;
790 }
791 }
792 s = splbio();
793
794 bp->b_resid = 0;
795
796 /* stuff it onto our queue */
797 BUFQ_PUT(rs->buf_queue, bp);
798
799 /* scheduled the IO to happen at the next convenient time */
800 wakeup(&(raidPtrs[raidID]->iodone));
801
802 splx(s);
803 return;
804
805 done:
806 biodone(bp, error, bp->b_bcount);
807 }
808 /* ARGSUSED */
809 int
810 raidread(dev_t dev, struct uio *uio, int flags)
811 {
812 int unit = raidunit(dev);
813 struct raid_softc *rs;
814
815 if (unit >= numraid)
816 return (ENXIO);
817 rs = &raid_softc[unit];
818
819 if ((rs->sc_flags & RAIDF_INITED) == 0)
820 return (ENXIO);
821
822 return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
823
824 }
825 /* ARGSUSED */
826 int
827 raidwrite(dev_t dev, struct uio *uio, int flags)
828 {
829 int unit = raidunit(dev);
830 struct raid_softc *rs;
831
832 if (unit >= numraid)
833 return (ENXIO);
834 rs = &raid_softc[unit];
835
836 if ((rs->sc_flags & RAIDF_INITED) == 0)
837 return (ENXIO);
838
839 return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
840
841 }
842
843 int
844 raidioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l)
845 {
846 int unit = raidunit(dev);
847 int error = 0;
848 int part, pmask;
849 struct cfdata *cf;
850 struct raid_softc *rs;
851 RF_Config_t *k_cfg, *u_cfg;
852 RF_Raid_t *raidPtr;
853 RF_RaidDisk_t *diskPtr;
854 RF_AccTotals_t *totals;
855 RF_DeviceConfig_t *d_cfg, **ucfgp;
856 u_char *specific_buf;
857 int retcode = 0;
858 int column;
859 int raidid;
860 struct rf_recon_req *rrcopy, *rr;
861 RF_ComponentLabel_t *clabel;
862 RF_ComponentLabel_t *ci_label;
863 RF_ComponentLabel_t **clabel_ptr;
864 RF_SingleComponent_t *sparePtr,*componentPtr;
865 RF_SingleComponent_t component;
866 RF_ProgressInfo_t progressInfo, **progressInfoPtr;
867 int i, j, d;
868 #ifdef __HAVE_OLD_DISKLABEL
869 struct disklabel newlabel;
870 #endif
871 struct dkwedge_info *dkw;
872
873 if (unit >= numraid)
874 return (ENXIO);
875 rs = &raid_softc[unit];
876 raidPtr = raidPtrs[unit];
877
878 db1_printf(("raidioctl: %d %d %d %d\n", (int) dev,
879 (int) DISKPART(dev), (int) unit, (int) cmd));
880
881 /* Must be open for writes for these commands... */
882 switch (cmd) {
883 #ifdef DIOCGSECTORSIZE
884 case DIOCGSECTORSIZE:
885 *(u_int *)data = raidPtr->bytesPerSector;
886 return 0;
887 case DIOCGMEDIASIZE:
888 *(off_t *)data =
889 (off_t)raidPtr->totalSectors * raidPtr->bytesPerSector;
890 return 0;
891 #endif
892 case DIOCSDINFO:
893 case DIOCWDINFO:
894 #ifdef __HAVE_OLD_DISKLABEL
895 case ODIOCWDINFO:
896 case ODIOCSDINFO:
897 #endif
898 case DIOCWLABEL:
899 case DIOCAWEDGE:
900 case DIOCDWEDGE:
901 if ((flag & FWRITE) == 0)
902 return (EBADF);
903 }
904
905 /* Must be initialized for these... */
906 switch (cmd) {
907 case DIOCGDINFO:
908 case DIOCSDINFO:
909 case DIOCWDINFO:
910 #ifdef __HAVE_OLD_DISKLABEL
911 case ODIOCGDINFO:
912 case ODIOCWDINFO:
913 case ODIOCSDINFO:
914 case ODIOCGDEFLABEL:
915 #endif
916 case DIOCGPART:
917 case DIOCWLABEL:
918 case DIOCGDEFLABEL:
919 case DIOCAWEDGE:
920 case DIOCDWEDGE:
921 case DIOCLWEDGES:
922 case RAIDFRAME_SHUTDOWN:
923 case RAIDFRAME_REWRITEPARITY:
924 case RAIDFRAME_GET_INFO:
925 case RAIDFRAME_RESET_ACCTOTALS:
926 case RAIDFRAME_GET_ACCTOTALS:
927 case RAIDFRAME_KEEP_ACCTOTALS:
928 case RAIDFRAME_GET_SIZE:
929 case RAIDFRAME_FAIL_DISK:
930 case RAIDFRAME_COPYBACK:
931 case RAIDFRAME_CHECK_RECON_STATUS:
932 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
933 case RAIDFRAME_GET_COMPONENT_LABEL:
934 case RAIDFRAME_SET_COMPONENT_LABEL:
935 case RAIDFRAME_ADD_HOT_SPARE:
936 case RAIDFRAME_REMOVE_HOT_SPARE:
937 case RAIDFRAME_INIT_LABELS:
938 case RAIDFRAME_REBUILD_IN_PLACE:
939 case RAIDFRAME_CHECK_PARITY:
940 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
941 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
942 case RAIDFRAME_CHECK_COPYBACK_STATUS:
943 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
944 case RAIDFRAME_SET_AUTOCONFIG:
945 case RAIDFRAME_SET_ROOT:
946 case RAIDFRAME_DELETE_COMPONENT:
947 case RAIDFRAME_INCORPORATE_HOT_SPARE:
948 if ((rs->sc_flags & RAIDF_INITED) == 0)
949 return (ENXIO);
950 }
951
952 switch (cmd) {
953
954 /* configure the system */
955 case RAIDFRAME_CONFIGURE:
956
957 if (raidPtr->valid) {
958 /* There is a valid RAID set running on this unit! */
959 printf("raid%d: Device already configured!\n",unit);
960 return(EINVAL);
961 }
962
963 /* copy-in the configuration information */
964 /* data points to a pointer to the configuration structure */
965
966 u_cfg = *((RF_Config_t **) data);
967 RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
968 if (k_cfg == NULL) {
969 return (ENOMEM);
970 }
971 retcode = copyin(u_cfg, k_cfg, sizeof(RF_Config_t));
972 if (retcode) {
973 RF_Free(k_cfg, sizeof(RF_Config_t));
974 db1_printf(("rf_ioctl: retcode=%d copyin.1\n",
975 retcode));
976 return (retcode);
977 }
978 /* allocate a buffer for the layout-specific data, and copy it
979 * in */
980 if (k_cfg->layoutSpecificSize) {
981 if (k_cfg->layoutSpecificSize > 10000) {
982 /* sanity check */
983 RF_Free(k_cfg, sizeof(RF_Config_t));
984 return (EINVAL);
985 }
986 RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
987 (u_char *));
988 if (specific_buf == NULL) {
989 RF_Free(k_cfg, sizeof(RF_Config_t));
990 return (ENOMEM);
991 }
992 retcode = copyin(k_cfg->layoutSpecific, specific_buf,
993 k_cfg->layoutSpecificSize);
994 if (retcode) {
995 RF_Free(k_cfg, sizeof(RF_Config_t));
996 RF_Free(specific_buf,
997 k_cfg->layoutSpecificSize);
998 db1_printf(("rf_ioctl: retcode=%d copyin.2\n",
999 retcode));
1000 return (retcode);
1001 }
1002 } else
1003 specific_buf = NULL;
1004 k_cfg->layoutSpecific = specific_buf;
1005
1006 /* should do some kind of sanity check on the configuration.
1007 * Store the sum of all the bytes in the last byte? */
1008
1009 /* configure the system */
1010
1011 /*
1012 * Clear the entire RAID descriptor, just to make sure
1013 * there is no stale data left in the case of a
1014 * reconfiguration
1015 */
1016 memset((char *) raidPtr, 0, sizeof(RF_Raid_t));
1017 raidPtr->raidid = unit;
1018
1019 retcode = rf_Configure(raidPtr, k_cfg, NULL);
1020
1021 if (retcode == 0) {
1022
1023 /* allow this many simultaneous IO's to
1024 this RAID device */
1025 raidPtr->openings = RAIDOUTSTANDING;
1026
1027 raidinit(raidPtr);
1028 rf_markalldirty(raidPtr);
1029 }
1030 /* free the buffers. No return code here. */
1031 if (k_cfg->layoutSpecificSize) {
1032 RF_Free(specific_buf, k_cfg->layoutSpecificSize);
1033 }
1034 RF_Free(k_cfg, sizeof(RF_Config_t));
1035
1036 return (retcode);
1037
1038 /* shutdown the system */
1039 case RAIDFRAME_SHUTDOWN:
1040
1041 if ((error = raidlock(rs)) != 0)
1042 return (error);
1043
1044 /*
1045 * If somebody has a partition mounted, we shouldn't
1046 * shutdown.
1047 */
1048
1049 part = DISKPART(dev);
1050 pmask = (1 << part);
1051 if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
1052 ((rs->sc_dkdev.dk_bopenmask & pmask) &&
1053 (rs->sc_dkdev.dk_copenmask & pmask))) {
1054 raidunlock(rs);
1055 return (EBUSY);
1056 }
1057
1058 retcode = rf_Shutdown(raidPtr);
1059
1060 /* It's no longer initialized... */
1061 rs->sc_flags &= ~RAIDF_INITED;
1062
1063 /* free the pseudo device attach bits */
1064
1065 cf = device_cfdata(rs->sc_dev);
1066 /* XXX this causes us to not return any errors
1067 from the above call to rf_Shutdown() */
1068 retcode = config_detach(rs->sc_dev, DETACH_QUIET);
1069 free(cf, M_RAIDFRAME);
1070
1071 /* Detach the disk. */
1072 pseudo_disk_detach(&rs->sc_dkdev);
1073
1074 raidunlock(rs);
1075
1076 return (retcode);
1077 case RAIDFRAME_GET_COMPONENT_LABEL:
1078 clabel_ptr = (RF_ComponentLabel_t **) data;
1079 /* need to read the component label for the disk indicated
1080 by row,column in clabel */
1081
1082 /* For practice, let's get it directly fromdisk, rather
1083 than from the in-core copy */
1084 RF_Malloc( clabel, sizeof( RF_ComponentLabel_t ),
1085 (RF_ComponentLabel_t *));
1086 if (clabel == NULL)
1087 return (ENOMEM);
1088
1089 retcode = copyin( *clabel_ptr, clabel,
1090 sizeof(RF_ComponentLabel_t));
1091
1092 if (retcode) {
1093 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
1094 return(retcode);
1095 }
1096
1097 clabel->row = 0; /* Don't allow looking at anything else.*/
1098
1099 column = clabel->column;
1100
1101 if ((column < 0) || (column >= raidPtr->numCol +
1102 raidPtr->numSpare)) {
1103 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
1104 return(EINVAL);
1105 }
1106
1107 retcode = raidread_component_label(raidPtr->Disks[column].dev,
1108 raidPtr->raid_cinfo[column].ci_vp,
1109 clabel );
1110
1111 if (retcode == 0) {
1112 retcode = copyout(clabel, *clabel_ptr,
1113 sizeof(RF_ComponentLabel_t));
1114 }
1115 RF_Free(clabel, sizeof(RF_ComponentLabel_t));
1116 return (retcode);
1117
1118 case RAIDFRAME_SET_COMPONENT_LABEL:
1119 clabel = (RF_ComponentLabel_t *) data;
1120
1121 /* XXX check the label for valid stuff... */
1122 /* Note that some things *should not* get modified --
1123 the user should be re-initing the labels instead of
1124 trying to patch things.
1125 */
1126
1127 raidid = raidPtr->raidid;
1128 #ifdef DEBUG
1129 printf("raid%d: Got component label:\n", raidid);
1130 printf("raid%d: Version: %d\n", raidid, clabel->version);
1131 printf("raid%d: Serial Number: %d\n", raidid, clabel->serial_number);
1132 printf("raid%d: Mod counter: %d\n", raidid, clabel->mod_counter);
1133 printf("raid%d: Column: %d\n", raidid, clabel->column);
1134 printf("raid%d: Num Columns: %d\n", raidid, clabel->num_columns);
1135 printf("raid%d: Clean: %d\n", raidid, clabel->clean);
1136 printf("raid%d: Status: %d\n", raidid, clabel->status);
1137 #endif
1138 clabel->row = 0;
1139 column = clabel->column;
1140
1141 if ((column < 0) || (column >= raidPtr->numCol)) {
1142 return(EINVAL);
1143 }
1144
1145 /* XXX this isn't allowed to do anything for now :-) */
1146
1147 /* XXX and before it is, we need to fill in the rest
1148 of the fields!?!?!?! */
1149 #if 0
1150 raidwrite_component_label(
1151 raidPtr->Disks[column].dev,
1152 raidPtr->raid_cinfo[column].ci_vp,
1153 clabel );
1154 #endif
1155 return (0);
1156
1157 case RAIDFRAME_INIT_LABELS:
1158 clabel = (RF_ComponentLabel_t *) data;
1159 /*
1160 we only want the serial number from
1161 the above. We get all the rest of the information
1162 from the config that was used to create this RAID
1163 set.
1164 */
1165
1166 raidPtr->serial_number = clabel->serial_number;
1167
1168 RF_Malloc(ci_label, sizeof(RF_ComponentLabel_t),
1169 (RF_ComponentLabel_t *));
1170 if (ci_label == NULL)
1171 return (ENOMEM);
1172
1173 raid_init_component_label(raidPtr, ci_label);
1174 ci_label->serial_number = clabel->serial_number;
1175 ci_label->row = 0; /* we dont' pretend to support more */
1176
1177 for(column=0;column<raidPtr->numCol;column++) {
1178 diskPtr = &raidPtr->Disks[column];
1179 if (!RF_DEAD_DISK(diskPtr->status)) {
1180 ci_label->partitionSize = diskPtr->partitionSize;
1181 ci_label->column = column;
1182 raidwrite_component_label(
1183 raidPtr->Disks[column].dev,
1184 raidPtr->raid_cinfo[column].ci_vp,
1185 ci_label );
1186 }
1187 }
1188 RF_Free(ci_label, sizeof(RF_ComponentLabel_t));
1189
1190 return (retcode);
1191 case RAIDFRAME_SET_AUTOCONFIG:
1192 d = rf_set_autoconfig(raidPtr, *(int *) data);
1193 printf("raid%d: New autoconfig value is: %d\n",
1194 raidPtr->raidid, d);
1195 *(int *) data = d;
1196 return (retcode);
1197
1198 case RAIDFRAME_SET_ROOT:
1199 d = rf_set_rootpartition(raidPtr, *(int *) data);
1200 printf("raid%d: New rootpartition value is: %d\n",
1201 raidPtr->raidid, d);
1202 *(int *) data = d;
1203 return (retcode);
1204
1205 /* initialize all parity */
1206 case RAIDFRAME_REWRITEPARITY:
1207
1208 if (raidPtr->Layout.map->faultsTolerated == 0) {
1209 /* Parity for RAID 0 is trivially correct */
1210 raidPtr->parity_good = RF_RAID_CLEAN;
1211 return(0);
1212 }
1213
1214 if (raidPtr->parity_rewrite_in_progress == 1) {
1215 /* Re-write is already in progress! */
1216 return(EINVAL);
1217 }
1218
1219 retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
1220 rf_RewriteParityThread,
1221 raidPtr,"raid_parity");
1222 return (retcode);
1223
1224
1225 case RAIDFRAME_ADD_HOT_SPARE:
1226 sparePtr = (RF_SingleComponent_t *) data;
1227 memcpy( &component, sparePtr, sizeof(RF_SingleComponent_t));
1228 retcode = rf_add_hot_spare(raidPtr, &component);
1229 return(retcode);
1230
1231 case RAIDFRAME_REMOVE_HOT_SPARE:
1232 return(retcode);
1233
1234 case RAIDFRAME_DELETE_COMPONENT:
1235 componentPtr = (RF_SingleComponent_t *)data;
1236 memcpy( &component, componentPtr,
1237 sizeof(RF_SingleComponent_t));
1238 retcode = rf_delete_component(raidPtr, &component);
1239 return(retcode);
1240
1241 case RAIDFRAME_INCORPORATE_HOT_SPARE:
1242 componentPtr = (RF_SingleComponent_t *)data;
1243 memcpy( &component, componentPtr,
1244 sizeof(RF_SingleComponent_t));
1245 retcode = rf_incorporate_hot_spare(raidPtr, &component);
1246 return(retcode);
1247
1248 case RAIDFRAME_REBUILD_IN_PLACE:
1249
1250 if (raidPtr->Layout.map->faultsTolerated == 0) {
1251 /* Can't do this on a RAID 0!! */
1252 return(EINVAL);
1253 }
1254
1255 if (raidPtr->recon_in_progress == 1) {
1256 /* a reconstruct is already in progress! */
1257 return(EINVAL);
1258 }
1259
1260 componentPtr = (RF_SingleComponent_t *) data;
1261 memcpy( &component, componentPtr,
1262 sizeof(RF_SingleComponent_t));
1263 component.row = 0; /* we don't support any more */
1264 column = component.column;
1265
1266 if ((column < 0) || (column >= raidPtr->numCol)) {
1267 return(EINVAL);
1268 }
1269
1270 RF_LOCK_MUTEX(raidPtr->mutex);
1271 if ((raidPtr->Disks[column].status == rf_ds_optimal) &&
1272 (raidPtr->numFailures > 0)) {
1273 /* XXX 0 above shouldn't be constant!!! */
1274 /* some component other than this has failed.
1275 Let's not make things worse than they already
1276 are... */
1277 printf("raid%d: Unable to reconstruct to disk at:\n",
1278 raidPtr->raidid);
1279 printf("raid%d: Col: %d Too many failures.\n",
1280 raidPtr->raidid, column);
1281 RF_UNLOCK_MUTEX(raidPtr->mutex);
1282 return (EINVAL);
1283 }
1284 if (raidPtr->Disks[column].status ==
1285 rf_ds_reconstructing) {
1286 printf("raid%d: Unable to reconstruct to disk at:\n",
1287 raidPtr->raidid);
1288 printf("raid%d: Col: %d Reconstruction already occuring!\n", raidPtr->raidid, column);
1289
1290 RF_UNLOCK_MUTEX(raidPtr->mutex);
1291 return (EINVAL);
1292 }
1293 if (raidPtr->Disks[column].status == rf_ds_spared) {
1294 RF_UNLOCK_MUTEX(raidPtr->mutex);
1295 return (EINVAL);
1296 }
1297 RF_UNLOCK_MUTEX(raidPtr->mutex);
1298
1299 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1300 if (rrcopy == NULL)
1301 return(ENOMEM);
1302
1303 rrcopy->raidPtr = (void *) raidPtr;
1304 rrcopy->col = column;
1305
1306 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1307 rf_ReconstructInPlaceThread,
1308 rrcopy,"raid_reconip");
1309 return(retcode);
1310
1311 case RAIDFRAME_GET_INFO:
1312 if (!raidPtr->valid)
1313 return (ENODEV);
1314 ucfgp = (RF_DeviceConfig_t **) data;
1315 RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t),
1316 (RF_DeviceConfig_t *));
1317 if (d_cfg == NULL)
1318 return (ENOMEM);
1319 d_cfg->rows = 1; /* there is only 1 row now */
1320 d_cfg->cols = raidPtr->numCol;
1321 d_cfg->ndevs = raidPtr->numCol;
1322 if (d_cfg->ndevs >= RF_MAX_DISKS) {
1323 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1324 return (ENOMEM);
1325 }
1326 d_cfg->nspares = raidPtr->numSpare;
1327 if (d_cfg->nspares >= RF_MAX_DISKS) {
1328 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1329 return (ENOMEM);
1330 }
1331 d_cfg->maxqdepth = raidPtr->maxQueueDepth;
1332 d = 0;
1333 for (j = 0; j < d_cfg->cols; j++) {
1334 d_cfg->devs[d] = raidPtr->Disks[j];
1335 d++;
1336 }
1337 for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) {
1338 d_cfg->spares[i] = raidPtr->Disks[j];
1339 }
1340 retcode = copyout(d_cfg, *ucfgp, sizeof(RF_DeviceConfig_t));
1341 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1342
1343 return (retcode);
1344
1345 case RAIDFRAME_CHECK_PARITY:
1346 *(int *) data = raidPtr->parity_good;
1347 return (0);
1348
1349 case RAIDFRAME_RESET_ACCTOTALS:
1350 memset(&raidPtr->acc_totals, 0, sizeof(raidPtr->acc_totals));
1351 return (0);
1352
1353 case RAIDFRAME_GET_ACCTOTALS:
1354 totals = (RF_AccTotals_t *) data;
1355 *totals = raidPtr->acc_totals;
1356 return (0);
1357
1358 case RAIDFRAME_KEEP_ACCTOTALS:
1359 raidPtr->keep_acc_totals = *(int *)data;
1360 return (0);
1361
1362 case RAIDFRAME_GET_SIZE:
1363 *(int *) data = raidPtr->totalSectors;
1364 return (0);
1365
1366 /* fail a disk & optionally start reconstruction */
1367 case RAIDFRAME_FAIL_DISK:
1368
1369 if (raidPtr->Layout.map->faultsTolerated == 0) {
1370 /* Can't do this on a RAID 0!! */
1371 return(EINVAL);
1372 }
1373
1374 rr = (struct rf_recon_req *) data;
1375 rr->row = 0;
1376 if (rr->col < 0 || rr->col >= raidPtr->numCol)
1377 return (EINVAL);
1378
1379
1380 RF_LOCK_MUTEX(raidPtr->mutex);
1381 if (raidPtr->status == rf_rs_reconstructing) {
1382 /* you can't fail a disk while we're reconstructing! */
1383 /* XXX wrong for RAID6 */
1384 RF_UNLOCK_MUTEX(raidPtr->mutex);
1385 return (EINVAL);
1386 }
1387 if ((raidPtr->Disks[rr->col].status ==
1388 rf_ds_optimal) && (raidPtr->numFailures > 0)) {
1389 /* some other component has failed. Let's not make
1390 things worse. XXX wrong for RAID6 */
1391 RF_UNLOCK_MUTEX(raidPtr->mutex);
1392 return (EINVAL);
1393 }
1394 if (raidPtr->Disks[rr->col].status == rf_ds_spared) {
1395 /* Can't fail a spared disk! */
1396 RF_UNLOCK_MUTEX(raidPtr->mutex);
1397 return (EINVAL);
1398 }
1399 RF_UNLOCK_MUTEX(raidPtr->mutex);
1400
1401 /* make a copy of the recon request so that we don't rely on
1402 * the user's buffer */
1403 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1404 if (rrcopy == NULL)
1405 return(ENOMEM);
1406 memcpy(rrcopy, rr, sizeof(*rr));
1407 rrcopy->raidPtr = (void *) raidPtr;
1408
1409 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1410 rf_ReconThread,
1411 rrcopy,"raid_recon");
1412 return (0);
1413
1414 /* invoke a copyback operation after recon on whatever disk
1415 * needs it, if any */
1416 case RAIDFRAME_COPYBACK:
1417
1418 if (raidPtr->Layout.map->faultsTolerated == 0) {
1419 /* This makes no sense on a RAID 0!! */
1420 return(EINVAL);
1421 }
1422
1423 if (raidPtr->copyback_in_progress == 1) {
1424 /* Copyback is already in progress! */
1425 return(EINVAL);
1426 }
1427
1428 retcode = RF_CREATE_THREAD(raidPtr->copyback_thread,
1429 rf_CopybackThread,
1430 raidPtr,"raid_copyback");
1431 return (retcode);
1432
1433 /* return the percentage completion of reconstruction */
1434 case RAIDFRAME_CHECK_RECON_STATUS:
1435 if (raidPtr->Layout.map->faultsTolerated == 0) {
1436 /* This makes no sense on a RAID 0, so tell the
1437 user it's done. */
1438 *(int *) data = 100;
1439 return(0);
1440 }
1441 if (raidPtr->status != rf_rs_reconstructing)
1442 *(int *) data = 100;
1443 else {
1444 if (raidPtr->reconControl->numRUsTotal > 0) {
1445 *(int *) data = (raidPtr->reconControl->numRUsComplete * 100 / raidPtr->reconControl->numRUsTotal);
1446 } else {
1447 *(int *) data = 0;
1448 }
1449 }
1450 return (0);
1451 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
1452 progressInfoPtr = (RF_ProgressInfo_t **) data;
1453 if (raidPtr->status != rf_rs_reconstructing) {
1454 progressInfo.remaining = 0;
1455 progressInfo.completed = 100;
1456 progressInfo.total = 100;
1457 } else {
1458 progressInfo.total =
1459 raidPtr->reconControl->numRUsTotal;
1460 progressInfo.completed =
1461 raidPtr->reconControl->numRUsComplete;
1462 progressInfo.remaining = progressInfo.total -
1463 progressInfo.completed;
1464 }
1465 retcode = copyout(&progressInfo, *progressInfoPtr,
1466 sizeof(RF_ProgressInfo_t));
1467 return (retcode);
1468
1469 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1470 if (raidPtr->Layout.map->faultsTolerated == 0) {
1471 /* This makes no sense on a RAID 0, so tell the
1472 user it's done. */
1473 *(int *) data = 100;
1474 return(0);
1475 }
1476 if (raidPtr->parity_rewrite_in_progress == 1) {
1477 *(int *) data = 100 *
1478 raidPtr->parity_rewrite_stripes_done /
1479 raidPtr->Layout.numStripe;
1480 } else {
1481 *(int *) data = 100;
1482 }
1483 return (0);
1484
1485 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
1486 progressInfoPtr = (RF_ProgressInfo_t **) data;
1487 if (raidPtr->parity_rewrite_in_progress == 1) {
1488 progressInfo.total = raidPtr->Layout.numStripe;
1489 progressInfo.completed =
1490 raidPtr->parity_rewrite_stripes_done;
1491 progressInfo.remaining = progressInfo.total -
1492 progressInfo.completed;
1493 } else {
1494 progressInfo.remaining = 0;
1495 progressInfo.completed = 100;
1496 progressInfo.total = 100;
1497 }
1498 retcode = copyout(&progressInfo, *progressInfoPtr,
1499 sizeof(RF_ProgressInfo_t));
1500 return (retcode);
1501
1502 case RAIDFRAME_CHECK_COPYBACK_STATUS:
1503 if (raidPtr->Layout.map->faultsTolerated == 0) {
1504 /* This makes no sense on a RAID 0 */
1505 *(int *) data = 100;
1506 return(0);
1507 }
1508 if (raidPtr->copyback_in_progress == 1) {
1509 *(int *) data = 100 * raidPtr->copyback_stripes_done /
1510 raidPtr->Layout.numStripe;
1511 } else {
1512 *(int *) data = 100;
1513 }
1514 return (0);
1515
1516 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
1517 progressInfoPtr = (RF_ProgressInfo_t **) data;
1518 if (raidPtr->copyback_in_progress == 1) {
1519 progressInfo.total = raidPtr->Layout.numStripe;
1520 progressInfo.completed =
1521 raidPtr->copyback_stripes_done;
1522 progressInfo.remaining = progressInfo.total -
1523 progressInfo.completed;
1524 } else {
1525 progressInfo.remaining = 0;
1526 progressInfo.completed = 100;
1527 progressInfo.total = 100;
1528 }
1529 retcode = copyout(&progressInfo, *progressInfoPtr,
1530 sizeof(RF_ProgressInfo_t));
1531 return (retcode);
1532
1533 /* the sparetable daemon calls this to wait for the kernel to
1534 * need a spare table. this ioctl does not return until a
1535 * spare table is needed. XXX -- calling mpsleep here in the
1536 * ioctl code is almost certainly wrong and evil. -- XXX XXX
1537 * -- I should either compute the spare table in the kernel,
1538 * or have a different -- XXX XXX -- interface (a different
1539 * character device) for delivering the table -- XXX */
1540 #if 0
1541 case RAIDFRAME_SPARET_WAIT:
1542 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1543 while (!rf_sparet_wait_queue)
1544 mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE);
1545 waitreq = rf_sparet_wait_queue;
1546 rf_sparet_wait_queue = rf_sparet_wait_queue->next;
1547 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1548
1549 /* structure assignment */
1550 *((RF_SparetWait_t *) data) = *waitreq;
1551
1552 RF_Free(waitreq, sizeof(*waitreq));
1553 return (0);
1554
1555 /* wakes up a process waiting on SPARET_WAIT and puts an error
1556 * code in it that will cause the dameon to exit */
1557 case RAIDFRAME_ABORT_SPARET_WAIT:
1558 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1559 waitreq->fcol = -1;
1560 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1561 waitreq->next = rf_sparet_wait_queue;
1562 rf_sparet_wait_queue = waitreq;
1563 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1564 wakeup(&rf_sparet_wait_queue);
1565 return (0);
1566
1567 /* used by the spare table daemon to deliver a spare table
1568 * into the kernel */
1569 case RAIDFRAME_SEND_SPARET:
1570
1571 /* install the spare table */
1572 retcode = rf_SetSpareTable(raidPtr, *(void **) data);
1573
1574 /* respond to the requestor. the return status of the spare
1575 * table installation is passed in the "fcol" field */
1576 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1577 waitreq->fcol = retcode;
1578 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1579 waitreq->next = rf_sparet_resp_queue;
1580 rf_sparet_resp_queue = waitreq;
1581 wakeup(&rf_sparet_resp_queue);
1582 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1583
1584 return (retcode);
1585 #endif
1586
1587 default:
1588 break; /* fall through to the os-specific code below */
1589
1590 }
1591
1592 if (!raidPtr->valid)
1593 return (EINVAL);
1594
1595 /*
1596 * Add support for "regular" device ioctls here.
1597 */
1598
1599 switch (cmd) {
1600 case DIOCGDINFO:
1601 *(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
1602 break;
1603 #ifdef __HAVE_OLD_DISKLABEL
1604 case ODIOCGDINFO:
1605 newlabel = *(rs->sc_dkdev.dk_label);
1606 if (newlabel.d_npartitions > OLDMAXPARTITIONS)
1607 return ENOTTY;
1608 memcpy(data, &newlabel, sizeof (struct olddisklabel));
1609 break;
1610 #endif
1611
1612 case DIOCGPART:
1613 ((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
1614 ((struct partinfo *) data)->part =
1615 &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
1616 break;
1617
1618 case DIOCWDINFO:
1619 case DIOCSDINFO:
1620 #ifdef __HAVE_OLD_DISKLABEL
1621 case ODIOCWDINFO:
1622 case ODIOCSDINFO:
1623 #endif
1624 {
1625 struct disklabel *lp;
1626 #ifdef __HAVE_OLD_DISKLABEL
1627 if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) {
1628 memset(&newlabel, 0, sizeof newlabel);
1629 memcpy(&newlabel, data, sizeof (struct olddisklabel));
1630 lp = &newlabel;
1631 } else
1632 #endif
1633 lp = (struct disklabel *)data;
1634
1635 if ((error = raidlock(rs)) != 0)
1636 return (error);
1637
1638 rs->sc_flags |= RAIDF_LABELLING;
1639
1640 error = setdisklabel(rs->sc_dkdev.dk_label,
1641 lp, 0, rs->sc_dkdev.dk_cpulabel);
1642 if (error == 0) {
1643 if (cmd == DIOCWDINFO
1644 #ifdef __HAVE_OLD_DISKLABEL
1645 || cmd == ODIOCWDINFO
1646 #endif
1647 )
1648 error = writedisklabel(RAIDLABELDEV(dev),
1649 raidstrategy, rs->sc_dkdev.dk_label,
1650 rs->sc_dkdev.dk_cpulabel);
1651 }
1652 rs->sc_flags &= ~RAIDF_LABELLING;
1653
1654 raidunlock(rs);
1655
1656 if (error)
1657 return (error);
1658 break;
1659 }
1660
1661 case DIOCWLABEL:
1662 if (*(int *) data != 0)
1663 rs->sc_flags |= RAIDF_WLABEL;
1664 else
1665 rs->sc_flags &= ~RAIDF_WLABEL;
1666 break;
1667
1668 case DIOCGDEFLABEL:
1669 raidgetdefaultlabel(raidPtr, rs, (struct disklabel *) data);
1670 break;
1671
1672 #ifdef __HAVE_OLD_DISKLABEL
1673 case ODIOCGDEFLABEL:
1674 raidgetdefaultlabel(raidPtr, rs, &newlabel);
1675 if (newlabel.d_npartitions > OLDMAXPARTITIONS)
1676 return ENOTTY;
1677 memcpy(data, &newlabel, sizeof (struct olddisklabel));
1678 break;
1679 #endif
1680
1681 case DIOCAWEDGE:
1682 case DIOCDWEDGE:
1683 dkw = (void *)data;
1684
1685 /* If the ioctl happens here, the parent is us. */
1686 (void)strcpy(dkw->dkw_parent, rs->sc_xname);
1687 return cmd == DIOCAWEDGE ? dkwedge_add(dkw) : dkwedge_del(dkw);
1688
1689 case DIOCLWEDGES:
1690 return dkwedge_list(&rs->sc_dkdev,
1691 (struct dkwedge_list *)data, l);
1692
1693 default:
1694 retcode = ENOTTY;
1695 }
1696 return (retcode);
1697
1698 }
1699
1700
1701 /* raidinit -- complete the rest of the initialization for the
1702 RAIDframe device. */
1703
1704
1705 static void
1706 raidinit(RF_Raid_t *raidPtr)
1707 {
1708 struct cfdata *cf;
1709 struct raid_softc *rs;
1710 int unit;
1711
1712 unit = raidPtr->raidid;
1713
1714 rs = &raid_softc[unit];
1715
1716 /* XXX should check return code first... */
1717 rs->sc_flags |= RAIDF_INITED;
1718
1719 /* XXX doesn't check bounds. */
1720 snprintf(rs->sc_xname, sizeof(rs->sc_xname), "raid%d", unit);
1721
1722 rs->sc_dkdev.dk_name = rs->sc_xname;
1723
1724 /* attach the pseudo device */
1725 cf = malloc(sizeof(*cf), M_RAIDFRAME, M_WAITOK);
1726 cf->cf_name = raid_cd.cd_name;
1727 cf->cf_atname = raid_cd.cd_name;
1728 cf->cf_unit = unit;
1729 cf->cf_fstate = FSTATE_STAR;
1730
1731 rs->sc_dev = config_attach_pseudo(cf);
1732
1733 if (rs->sc_dev==NULL) {
1734 printf("raid%d: config_attach_pseudo failed\n",
1735 raidPtr->raidid);
1736 }
1737
1738 /* disk_attach actually creates space for the CPU disklabel, among
1739 * other things, so it's critical to call this *BEFORE* we try putzing
1740 * with disklabels. */
1741
1742 disk_attach(&rs->sc_dkdev);
1743
1744 /* XXX There may be a weird interaction here between this, and
1745 * protectedSectors, as used in RAIDframe. */
1746
1747 rs->sc_size = raidPtr->totalSectors;
1748 }
1749 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
1750 /* wake up the daemon & tell it to get us a spare table
1751 * XXX
1752 * the entries in the queues should be tagged with the raidPtr
1753 * so that in the extremely rare case that two recons happen at once,
1754 * we know for which device were requesting a spare table
1755 * XXX
1756 *
1757 * XXX This code is not currently used. GO
1758 */
1759 int
1760 rf_GetSpareTableFromDaemon(RF_SparetWait_t *req)
1761 {
1762 int retcode;
1763
1764 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1765 req->next = rf_sparet_wait_queue;
1766 rf_sparet_wait_queue = req;
1767 wakeup(&rf_sparet_wait_queue);
1768
1769 /* mpsleep unlocks the mutex */
1770 while (!rf_sparet_resp_queue) {
1771 tsleep(&rf_sparet_resp_queue, PRIBIO,
1772 "raidframe getsparetable", 0);
1773 }
1774 req = rf_sparet_resp_queue;
1775 rf_sparet_resp_queue = req->next;
1776 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1777
1778 retcode = req->fcol;
1779 RF_Free(req, sizeof(*req)); /* this is not the same req as we
1780 * alloc'd */
1781 return (retcode);
1782 }
1783 #endif
1784
1785 /* a wrapper around rf_DoAccess that extracts appropriate info from the
1786 * bp & passes it down.
1787 * any calls originating in the kernel must use non-blocking I/O
1788 * do some extra sanity checking to return "appropriate" error values for
1789 * certain conditions (to make some standard utilities work)
1790 *
1791 * Formerly known as: rf_DoAccessKernel
1792 */
1793 void
1794 raidstart(RF_Raid_t *raidPtr)
1795 {
1796 RF_SectorCount_t num_blocks, pb, sum;
1797 RF_RaidAddr_t raid_addr;
1798 struct partition *pp;
1799 daddr_t blocknum;
1800 int unit;
1801 struct raid_softc *rs;
1802 int do_async;
1803 struct buf *bp;
1804 int rc;
1805
1806 unit = raidPtr->raidid;
1807 rs = &raid_softc[unit];
1808
1809 /* quick check to see if anything has died recently */
1810 RF_LOCK_MUTEX(raidPtr->mutex);
1811 if (raidPtr->numNewFailures > 0) {
1812 RF_UNLOCK_MUTEX(raidPtr->mutex);
1813 rf_update_component_labels(raidPtr,
1814 RF_NORMAL_COMPONENT_UPDATE);
1815 RF_LOCK_MUTEX(raidPtr->mutex);
1816 raidPtr->numNewFailures--;
1817 }
1818
1819 /* Check to see if we're at the limit... */
1820 while (raidPtr->openings > 0) {
1821 RF_UNLOCK_MUTEX(raidPtr->mutex);
1822
1823 /* get the next item, if any, from the queue */
1824 if ((bp = BUFQ_GET(rs->buf_queue)) == NULL) {
1825 /* nothing more to do */
1826 return;
1827 }
1828
1829 /* Ok, for the bp we have here, bp->b_blkno is relative to the
1830 * partition.. Need to make it absolute to the underlying
1831 * device.. */
1832
1833 blocknum = bp->b_blkno;
1834 if (DISKPART(bp->b_dev) != RAW_PART) {
1835 pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
1836 blocknum += pp->p_offset;
1837 }
1838
1839 db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
1840 (int) blocknum));
1841
1842 db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
1843 db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
1844
1845 /* *THIS* is where we adjust what block we're going to...
1846 * but DO NOT TOUCH bp->b_blkno!!! */
1847 raid_addr = blocknum;
1848
1849 num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
1850 pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
1851 sum = raid_addr + num_blocks + pb;
1852 if (1 || rf_debugKernelAccess) {
1853 db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
1854 (int) raid_addr, (int) sum, (int) num_blocks,
1855 (int) pb, (int) bp->b_resid));
1856 }
1857 if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
1858 || (sum < num_blocks) || (sum < pb)) {
1859 biodone(bp, ENOSPC, bp->b_bcount);
1860 RF_LOCK_MUTEX(raidPtr->mutex);
1861 continue;
1862 }
1863 /*
1864 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
1865 */
1866
1867 if (bp->b_bcount & raidPtr->sectorMask) {
1868 biodone(bp, EINVAL, bp->b_bcount);
1869 RF_LOCK_MUTEX(raidPtr->mutex);
1870 continue;
1871
1872 }
1873 db1_printf(("Calling DoAccess..\n"));
1874
1875
1876 RF_LOCK_MUTEX(raidPtr->mutex);
1877 raidPtr->openings--;
1878 RF_UNLOCK_MUTEX(raidPtr->mutex);
1879
1880 /*
1881 * Everything is async.
1882 */
1883 do_async = 1;
1884
1885 disk_busy(&rs->sc_dkdev);
1886
1887 /* XXX we're still at splbio() here... do we *really*
1888 need to be? */
1889
1890 /* don't ever condition on bp->b_flags & B_WRITE.
1891 * always condition on B_READ instead */
1892
1893 rc = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
1894 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
1895 do_async, raid_addr, num_blocks,
1896 bp->b_data, bp, RF_DAG_NONBLOCKING_IO);
1897
1898 if (rc) {
1899 biodone(bp, rc, bp->b_bcount);
1900 /* continue loop */
1901 }
1902
1903 RF_LOCK_MUTEX(raidPtr->mutex);
1904 }
1905 RF_UNLOCK_MUTEX(raidPtr->mutex);
1906 }
1907
1908
1909
1910
1911 /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */
1912
1913 int
1914 rf_DispatchKernelIO(RF_DiskQueue_t *queue, RF_DiskQueueData_t *req)
1915 {
1916 int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
1917 struct buf *bp;
1918
1919 req->queue = queue;
1920
1921 #if DIAGNOSTIC
1922 if (queue->raidPtr->raidid >= numraid) {
1923 printf("Invalid unit number: %d %d\n", queue->raidPtr->raidid,
1924 numraid);
1925 panic("Invalid Unit number in rf_DispatchKernelIO");
1926 }
1927 #endif
1928
1929 bp = req->bp;
1930
1931 switch (req->type) {
1932 case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */
1933 /* XXX need to do something extra here.. */
1934 /* I'm leaving this in, as I've never actually seen it used,
1935 * and I'd like folks to report it... GO */
1936 printf(("WAKEUP CALLED\n"));
1937 queue->numOutstanding++;
1938
1939 bp->b_flags = 0;
1940 bp->b_private = req;
1941
1942 KernelWakeupFunc(bp);
1943 break;
1944
1945 case RF_IO_TYPE_READ:
1946 case RF_IO_TYPE_WRITE:
1947 #if RF_ACC_TRACE > 0
1948 if (req->tracerec) {
1949 RF_ETIMER_START(req->tracerec->timer);
1950 }
1951 #endif
1952 InitBP(bp, queue->rf_cinfo->ci_vp,
1953 op, queue->rf_cinfo->ci_dev,
1954 req->sectorOffset, req->numSector,
1955 req->buf, KernelWakeupFunc, (void *) req,
1956 queue->raidPtr->logBytesPerSector, req->b_proc);
1957
1958 if (rf_debugKernelAccess) {
1959 db1_printf(("dispatch: bp->b_blkno = %ld\n",
1960 (long) bp->b_blkno));
1961 }
1962 queue->numOutstanding++;
1963 queue->last_deq_sector = req->sectorOffset;
1964 /* acc wouldn't have been let in if there were any pending
1965 * reqs at any other priority */
1966 queue->curPriority = req->priority;
1967
1968 db1_printf(("Going for %c to unit %d col %d\n",
1969 req->type, queue->raidPtr->raidid,
1970 queue->col));
1971 db1_printf(("sector %d count %d (%d bytes) %d\n",
1972 (int) req->sectorOffset, (int) req->numSector,
1973 (int) (req->numSector <<
1974 queue->raidPtr->logBytesPerSector),
1975 (int) queue->raidPtr->logBytesPerSector));
1976 VOP_STRATEGY(bp->b_vp, bp);
1977
1978 break;
1979
1980 default:
1981 panic("bad req->type in rf_DispatchKernelIO");
1982 }
1983 db1_printf(("Exiting from DispatchKernelIO\n"));
1984
1985 return (0);
1986 }
1987 /* this is the callback function associated with a I/O invoked from
1988 kernel code.
1989 */
1990 static void
1991 KernelWakeupFunc(struct buf *bp)
1992 {
1993 RF_DiskQueueData_t *req = NULL;
1994 RF_DiskQueue_t *queue;
1995 int s;
1996
1997 s = splbio();
1998 db1_printf(("recovering the request queue:\n"));
1999 req = bp->b_private;
2000
2001 queue = (RF_DiskQueue_t *) req->queue;
2002
2003 #if RF_ACC_TRACE > 0
2004 if (req->tracerec) {
2005 RF_ETIMER_STOP(req->tracerec->timer);
2006 RF_ETIMER_EVAL(req->tracerec->timer);
2007 RF_LOCK_MUTEX(rf_tracing_mutex);
2008 req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
2009 req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
2010 req->tracerec->num_phys_ios++;
2011 RF_UNLOCK_MUTEX(rf_tracing_mutex);
2012 }
2013 #endif
2014
2015 /* XXX Ok, let's get aggressive... If B_ERROR is set, let's go
2016 * ballistic, and mark the component as hosed... */
2017
2018 if (bp->b_flags & B_ERROR) {
2019 /* Mark the disk as dead */
2020 /* but only mark it once... */
2021 /* and only if it wouldn't leave this RAID set
2022 completely broken */
2023 if (((queue->raidPtr->Disks[queue->col].status ==
2024 rf_ds_optimal) ||
2025 (queue->raidPtr->Disks[queue->col].status ==
2026 rf_ds_used_spare)) &&
2027 (queue->raidPtr->numFailures <
2028 queue->raidPtr->Layout.map->faultsTolerated)) {
2029 printf("raid%d: IO Error. Marking %s as failed.\n",
2030 queue->raidPtr->raidid,
2031 queue->raidPtr->Disks[queue->col].devname);
2032 queue->raidPtr->Disks[queue->col].status =
2033 rf_ds_failed;
2034 queue->raidPtr->status = rf_rs_degraded;
2035 queue->raidPtr->numFailures++;
2036 queue->raidPtr->numNewFailures++;
2037 } else { /* Disk is already dead... */
2038 /* printf("Disk already marked as dead!\n"); */
2039 }
2040
2041 }
2042
2043 /* Fill in the error value */
2044
2045 req->error = (bp->b_flags & B_ERROR) ? bp->b_error : 0;
2046
2047 simple_lock(&queue->raidPtr->iodone_lock);
2048
2049 /* Drop this one on the "finished" queue... */
2050 TAILQ_INSERT_TAIL(&(queue->raidPtr->iodone), req, iodone_entries);
2051
2052 /* Let the raidio thread know there is work to be done. */
2053 wakeup(&(queue->raidPtr->iodone));
2054
2055 simple_unlock(&queue->raidPtr->iodone_lock);
2056
2057 splx(s);
2058 }
2059
2060
2061
2062 /*
2063 * initialize a buf structure for doing an I/O in the kernel.
2064 */
2065 static void
2066 InitBP(struct buf *bp, struct vnode *b_vp, unsigned rw_flag, dev_t dev,
2067 RF_SectorNum_t startSect, RF_SectorCount_t numSect, void *bf,
2068 void (*cbFunc) (struct buf *), void *cbArg, int logBytesPerSector,
2069 struct proc *b_proc)
2070 {
2071 /* bp->b_flags = B_PHYS | rw_flag; */
2072 bp->b_flags = B_CALL | rw_flag; /* XXX need B_PHYS here too??? */
2073 bp->b_bcount = numSect << logBytesPerSector;
2074 bp->b_bufsize = bp->b_bcount;
2075 bp->b_error = 0;
2076 bp->b_dev = dev;
2077 bp->b_data = bf;
2078 bp->b_blkno = startSect;
2079 bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */
2080 if (bp->b_bcount == 0) {
2081 panic("bp->b_bcount is zero in InitBP!!");
2082 }
2083 bp->b_proc = b_proc;
2084 bp->b_iodone = cbFunc;
2085 bp->b_private = cbArg;
2086 bp->b_vp = b_vp;
2087 if ((bp->b_flags & B_READ) == 0) {
2088 bp->b_vp->v_numoutput++;
2089 }
2090
2091 }
2092
2093 static void
2094 raidgetdefaultlabel(RF_Raid_t *raidPtr, struct raid_softc *rs,
2095 struct disklabel *lp)
2096 {
2097 memset(lp, 0, sizeof(*lp));
2098
2099 /* fabricate a label... */
2100 lp->d_secperunit = raidPtr->totalSectors;
2101 lp->d_secsize = raidPtr->bytesPerSector;
2102 lp->d_nsectors = raidPtr->Layout.dataSectorsPerStripe;
2103 lp->d_ntracks = 4 * raidPtr->numCol;
2104 lp->d_ncylinders = raidPtr->totalSectors /
2105 (lp->d_nsectors * lp->d_ntracks);
2106 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
2107
2108 strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
2109 lp->d_type = DTYPE_RAID;
2110 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
2111 lp->d_rpm = 3600;
2112 lp->d_interleave = 1;
2113 lp->d_flags = 0;
2114
2115 lp->d_partitions[RAW_PART].p_offset = 0;
2116 lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors;
2117 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
2118 lp->d_npartitions = RAW_PART + 1;
2119
2120 lp->d_magic = DISKMAGIC;
2121 lp->d_magic2 = DISKMAGIC;
2122 lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
2123
2124 }
2125 /*
2126 * Read the disklabel from the raid device. If one is not present, fake one
2127 * up.
2128 */
2129 static void
2130 raidgetdisklabel(dev_t dev)
2131 {
2132 int unit = raidunit(dev);
2133 struct raid_softc *rs = &raid_softc[unit];
2134 const char *errstring;
2135 struct disklabel *lp = rs->sc_dkdev.dk_label;
2136 struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel;
2137 RF_Raid_t *raidPtr;
2138
2139 db1_printf(("Getting the disklabel...\n"));
2140
2141 memset(clp, 0, sizeof(*clp));
2142
2143 raidPtr = raidPtrs[unit];
2144
2145 raidgetdefaultlabel(raidPtr, rs, lp);
2146
2147 /*
2148 * Call the generic disklabel extraction routine.
2149 */
2150 errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
2151 rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
2152 if (errstring)
2153 raidmakedisklabel(rs);
2154 else {
2155 int i;
2156 struct partition *pp;
2157
2158 /*
2159 * Sanity check whether the found disklabel is valid.
2160 *
2161 * This is necessary since total size of the raid device
2162 * may vary when an interleave is changed even though exactly
2163 * same components are used, and old disklabel may used
2164 * if that is found.
2165 */
2166 if (lp->d_secperunit != rs->sc_size)
2167 printf("raid%d: WARNING: %s: "
2168 "total sector size in disklabel (%d) != "
2169 "the size of raid (%ld)\n", unit, rs->sc_xname,
2170 lp->d_secperunit, (long) rs->sc_size);
2171 for (i = 0; i < lp->d_npartitions; i++) {
2172 pp = &lp->d_partitions[i];
2173 if (pp->p_offset + pp->p_size > rs->sc_size)
2174 printf("raid%d: WARNING: %s: end of partition `%c' "
2175 "exceeds the size of raid (%ld)\n",
2176 unit, rs->sc_xname, 'a' + i, (long) rs->sc_size);
2177 }
2178 }
2179
2180 }
2181 /*
2182 * Take care of things one might want to take care of in the event
2183 * that a disklabel isn't present.
2184 */
2185 static void
2186 raidmakedisklabel(struct raid_softc *rs)
2187 {
2188 struct disklabel *lp = rs->sc_dkdev.dk_label;
2189 db1_printf(("Making a label..\n"));
2190
2191 /*
2192 * For historical reasons, if there's no disklabel present
2193 * the raw partition must be marked FS_BSDFFS.
2194 */
2195
2196 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
2197
2198 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
2199
2200 lp->d_checksum = dkcksum(lp);
2201 }
2202 /*
2203 * Wait interruptibly for an exclusive lock.
2204 *
2205 * XXX
2206 * Several drivers do this; it should be abstracted and made MP-safe.
2207 * (Hmm... where have we seen this warning before :-> GO )
2208 */
2209 static int
2210 raidlock(struct raid_softc *rs)
2211 {
2212 int error;
2213
2214 while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
2215 rs->sc_flags |= RAIDF_WANTED;
2216 if ((error =
2217 tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
2218 return (error);
2219 }
2220 rs->sc_flags |= RAIDF_LOCKED;
2221 return (0);
2222 }
2223 /*
2224 * Unlock and wake up any waiters.
2225 */
2226 static void
2227 raidunlock(struct raid_softc *rs)
2228 {
2229
2230 rs->sc_flags &= ~RAIDF_LOCKED;
2231 if ((rs->sc_flags & RAIDF_WANTED) != 0) {
2232 rs->sc_flags &= ~RAIDF_WANTED;
2233 wakeup(rs);
2234 }
2235 }
2236
2237
2238 #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */
2239 #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */
2240
2241 int
2242 raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter)
2243 {
2244 RF_ComponentLabel_t clabel;
2245 raidread_component_label(dev, b_vp, &clabel);
2246 clabel.mod_counter = mod_counter;
2247 clabel.clean = RF_RAID_CLEAN;
2248 raidwrite_component_label(dev, b_vp, &clabel);
2249 return(0);
2250 }
2251
2252
2253 int
2254 raidmarkdirty(dev_t dev, struct vnode *b_vp, int mod_counter)
2255 {
2256 RF_ComponentLabel_t clabel;
2257 raidread_component_label(dev, b_vp, &clabel);
2258 clabel.mod_counter = mod_counter;
2259 clabel.clean = RF_RAID_DIRTY;
2260 raidwrite_component_label(dev, b_vp, &clabel);
2261 return(0);
2262 }
2263
2264 /* ARGSUSED */
2265 int
2266 raidread_component_label(dev_t dev, struct vnode *b_vp,
2267 RF_ComponentLabel_t *clabel)
2268 {
2269 struct buf *bp;
2270 const struct bdevsw *bdev;
2271 int error;
2272
2273 /* XXX should probably ensure that we don't try to do this if
2274 someone has changed rf_protected_sectors. */
2275
2276 if (b_vp == NULL) {
2277 /* For whatever reason, this component is not valid.
2278 Don't try to read a component label from it. */
2279 return(EINVAL);
2280 }
2281
2282 /* get a block of the appropriate size... */
2283 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2284 bp->b_dev = dev;
2285
2286 /* get our ducks in a row for the read */
2287 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2288 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2289 bp->b_flags |= B_READ;
2290 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2291
2292 bdev = bdevsw_lookup(bp->b_dev);
2293 if (bdev == NULL)
2294 return (ENXIO);
2295 (*bdev->d_strategy)(bp);
2296
2297 error = biowait(bp);
2298
2299 if (!error) {
2300 memcpy(clabel, bp->b_data,
2301 sizeof(RF_ComponentLabel_t));
2302 }
2303
2304 brelse(bp, 0);
2305 return(error);
2306 }
2307 /* ARGSUSED */
2308 int
2309 raidwrite_component_label(dev_t dev, struct vnode *b_vp,
2310 RF_ComponentLabel_t *clabel)
2311 {
2312 struct buf *bp;
2313 const struct bdevsw *bdev;
2314 int error;
2315
2316 /* get a block of the appropriate size... */
2317 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2318 bp->b_dev = dev;
2319
2320 /* get our ducks in a row for the write */
2321 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2322 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2323 bp->b_flags |= B_WRITE;
2324 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2325
2326 memset(bp->b_data, 0, RF_COMPONENT_INFO_SIZE );
2327
2328 memcpy(bp->b_data, clabel, sizeof(RF_ComponentLabel_t));
2329
2330 bdev = bdevsw_lookup(bp->b_dev);
2331 if (bdev == NULL)
2332 return (ENXIO);
2333 (*bdev->d_strategy)(bp);
2334 error = biowait(bp);
2335 brelse(bp, 0);
2336 if (error) {
2337 #if 1
2338 printf("Failed to write RAID component info!\n");
2339 #endif
2340 }
2341
2342 return(error);
2343 }
2344
2345 void
2346 rf_markalldirty(RF_Raid_t *raidPtr)
2347 {
2348 RF_ComponentLabel_t clabel;
2349 int sparecol;
2350 int c;
2351 int j;
2352 int scol = -1;
2353
2354 raidPtr->mod_counter++;
2355 for (c = 0; c < raidPtr->numCol; c++) {
2356 /* we don't want to touch (at all) a disk that has
2357 failed */
2358 if (!RF_DEAD_DISK(raidPtr->Disks[c].status)) {
2359 raidread_component_label(
2360 raidPtr->Disks[c].dev,
2361 raidPtr->raid_cinfo[c].ci_vp,
2362 &clabel);
2363 if (clabel.status == rf_ds_spared) {
2364 /* XXX do something special...
2365 but whatever you do, don't
2366 try to access it!! */
2367 } else {
2368 raidmarkdirty(
2369 raidPtr->Disks[c].dev,
2370 raidPtr->raid_cinfo[c].ci_vp,
2371 raidPtr->mod_counter);
2372 }
2373 }
2374 }
2375
2376 for( c = 0; c < raidPtr->numSpare ; c++) {
2377 sparecol = raidPtr->numCol + c;
2378 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
2379 /*
2380
2381 we claim this disk is "optimal" if it's
2382 rf_ds_used_spare, as that means it should be
2383 directly substitutable for the disk it replaced.
2384 We note that too...
2385
2386 */
2387
2388 for(j=0;j<raidPtr->numCol;j++) {
2389 if (raidPtr->Disks[j].spareCol == sparecol) {
2390 scol = j;
2391 break;
2392 }
2393 }
2394
2395 raidread_component_label(
2396 raidPtr->Disks[sparecol].dev,
2397 raidPtr->raid_cinfo[sparecol].ci_vp,
2398 &clabel);
2399 /* make sure status is noted */
2400
2401 raid_init_component_label(raidPtr, &clabel);
2402
2403 clabel.row = 0;
2404 clabel.column = scol;
2405 /* Note: we *don't* change status from rf_ds_used_spare
2406 to rf_ds_optimal */
2407 /* clabel.status = rf_ds_optimal; */
2408
2409 raidmarkdirty(raidPtr->Disks[sparecol].dev,
2410 raidPtr->raid_cinfo[sparecol].ci_vp,
2411 raidPtr->mod_counter);
2412 }
2413 }
2414 }
2415
2416
2417 void
2418 rf_update_component_labels(RF_Raid_t *raidPtr, int final)
2419 {
2420 RF_ComponentLabel_t clabel;
2421 int sparecol;
2422 int c;
2423 int j;
2424 int scol;
2425
2426 scol = -1;
2427
2428 /* XXX should do extra checks to make sure things really are clean,
2429 rather than blindly setting the clean bit... */
2430
2431 raidPtr->mod_counter++;
2432
2433 for (c = 0; c < raidPtr->numCol; c++) {
2434 if (raidPtr->Disks[c].status == rf_ds_optimal) {
2435 raidread_component_label(
2436 raidPtr->Disks[c].dev,
2437 raidPtr->raid_cinfo[c].ci_vp,
2438 &clabel);
2439 /* make sure status is noted */
2440 clabel.status = rf_ds_optimal;
2441
2442 /* bump the counter */
2443 clabel.mod_counter = raidPtr->mod_counter;
2444
2445 /* note what unit we are configured as */
2446 clabel.last_unit = raidPtr->raidid;
2447
2448 raidwrite_component_label(
2449 raidPtr->Disks[c].dev,
2450 raidPtr->raid_cinfo[c].ci_vp,
2451 &clabel);
2452 if (final == RF_FINAL_COMPONENT_UPDATE) {
2453 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2454 raidmarkclean(
2455 raidPtr->Disks[c].dev,
2456 raidPtr->raid_cinfo[c].ci_vp,
2457 raidPtr->mod_counter);
2458 }
2459 }
2460 }
2461 /* else we don't touch it.. */
2462 }
2463
2464 for( c = 0; c < raidPtr->numSpare ; c++) {
2465 sparecol = raidPtr->numCol + c;
2466 /* Need to ensure that the reconstruct actually completed! */
2467 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
2468 /*
2469
2470 we claim this disk is "optimal" if it's
2471 rf_ds_used_spare, as that means it should be
2472 directly substitutable for the disk it replaced.
2473 We note that too...
2474
2475 */
2476
2477 for(j=0;j<raidPtr->numCol;j++) {
2478 if (raidPtr->Disks[j].spareCol == sparecol) {
2479 scol = j;
2480 break;
2481 }
2482 }
2483
2484 /* XXX shouldn't *really* need this... */
2485 raidread_component_label(
2486 raidPtr->Disks[sparecol].dev,
2487 raidPtr->raid_cinfo[sparecol].ci_vp,
2488 &clabel);
2489 /* make sure status is noted */
2490
2491 raid_init_component_label(raidPtr, &clabel);
2492
2493 clabel.mod_counter = raidPtr->mod_counter;
2494 clabel.column = scol;
2495 clabel.status = rf_ds_optimal;
2496 clabel.last_unit = raidPtr->raidid;
2497
2498 raidwrite_component_label(
2499 raidPtr->Disks[sparecol].dev,
2500 raidPtr->raid_cinfo[sparecol].ci_vp,
2501 &clabel);
2502 if (final == RF_FINAL_COMPONENT_UPDATE) {
2503 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2504 raidmarkclean( raidPtr->Disks[sparecol].dev,
2505 raidPtr->raid_cinfo[sparecol].ci_vp,
2506 raidPtr->mod_counter);
2507 }
2508 }
2509 }
2510 }
2511 }
2512
2513 void
2514 rf_close_component(RF_Raid_t *raidPtr, struct vnode *vp, int auto_configured)
2515 {
2516 struct proc *p;
2517 struct lwp *l;
2518
2519 p = raidPtr->engine_thread;
2520 l = LIST_FIRST(&p->p_lwps);
2521
2522 if (vp != NULL) {
2523 if (auto_configured == 1) {
2524 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2525 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
2526 vput(vp);
2527
2528 } else {
2529 (void) vn_close(vp, FREAD | FWRITE, p->p_cred, l);
2530 }
2531 }
2532 }
2533
2534
2535 void
2536 rf_UnconfigureVnodes(RF_Raid_t *raidPtr)
2537 {
2538 int r,c;
2539 struct vnode *vp;
2540 int acd;
2541
2542
2543 /* We take this opportunity to close the vnodes like we should.. */
2544
2545 for (c = 0; c < raidPtr->numCol; c++) {
2546 vp = raidPtr->raid_cinfo[c].ci_vp;
2547 acd = raidPtr->Disks[c].auto_configured;
2548 rf_close_component(raidPtr, vp, acd);
2549 raidPtr->raid_cinfo[c].ci_vp = NULL;
2550 raidPtr->Disks[c].auto_configured = 0;
2551 }
2552
2553 for (r = 0; r < raidPtr->numSpare; r++) {
2554 vp = raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp;
2555 acd = raidPtr->Disks[raidPtr->numCol + r].auto_configured;
2556 rf_close_component(raidPtr, vp, acd);
2557 raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp = NULL;
2558 raidPtr->Disks[raidPtr->numCol + r].auto_configured = 0;
2559 }
2560 }
2561
2562
2563 void
2564 rf_ReconThread(struct rf_recon_req *req)
2565 {
2566 int s;
2567 RF_Raid_t *raidPtr;
2568
2569 s = splbio();
2570 raidPtr = (RF_Raid_t *) req->raidPtr;
2571 raidPtr->recon_in_progress = 1;
2572
2573 rf_FailDisk((RF_Raid_t *) req->raidPtr, req->col,
2574 ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
2575
2576 RF_Free(req, sizeof(*req));
2577
2578 raidPtr->recon_in_progress = 0;
2579 splx(s);
2580
2581 /* That's all... */
2582 kthread_exit(0); /* does not return */
2583 }
2584
2585 void
2586 rf_RewriteParityThread(RF_Raid_t *raidPtr)
2587 {
2588 int retcode;
2589 int s;
2590
2591 raidPtr->parity_rewrite_stripes_done = 0;
2592 raidPtr->parity_rewrite_in_progress = 1;
2593 s = splbio();
2594 retcode = rf_RewriteParity(raidPtr);
2595 splx(s);
2596 if (retcode) {
2597 printf("raid%d: Error re-writing parity!\n",raidPtr->raidid);
2598 } else {
2599 /* set the clean bit! If we shutdown correctly,
2600 the clean bit on each component label will get
2601 set */
2602 raidPtr->parity_good = RF_RAID_CLEAN;
2603 }
2604 raidPtr->parity_rewrite_in_progress = 0;
2605
2606 /* Anyone waiting for us to stop? If so, inform them... */
2607 if (raidPtr->waitShutdown) {
2608 wakeup(&raidPtr->parity_rewrite_in_progress);
2609 }
2610
2611 /* That's all... */
2612 kthread_exit(0); /* does not return */
2613 }
2614
2615
2616 void
2617 rf_CopybackThread(RF_Raid_t *raidPtr)
2618 {
2619 int s;
2620
2621 raidPtr->copyback_in_progress = 1;
2622 s = splbio();
2623 rf_CopybackReconstructedData(raidPtr);
2624 splx(s);
2625 raidPtr->copyback_in_progress = 0;
2626
2627 /* That's all... */
2628 kthread_exit(0); /* does not return */
2629 }
2630
2631
2632 void
2633 rf_ReconstructInPlaceThread(struct rf_recon_req *req)
2634 {
2635 int s;
2636 RF_Raid_t *raidPtr;
2637
2638 s = splbio();
2639 raidPtr = req->raidPtr;
2640 raidPtr->recon_in_progress = 1;
2641 rf_ReconstructInPlace(raidPtr, req->col);
2642 RF_Free(req, sizeof(*req));
2643 raidPtr->recon_in_progress = 0;
2644 splx(s);
2645
2646 /* That's all... */
2647 kthread_exit(0); /* does not return */
2648 }
2649
2650 static RF_AutoConfig_t *
2651 rf_get_component(RF_AutoConfig_t *ac_list, dev_t dev, struct vnode *vp,
2652 const char *cname, RF_SectorCount_t size)
2653 {
2654 int good_one = 0;
2655 RF_ComponentLabel_t *clabel;
2656 RF_AutoConfig_t *ac;
2657
2658 clabel = malloc(sizeof(RF_ComponentLabel_t), M_RAIDFRAME, M_NOWAIT);
2659 if (clabel == NULL) {
2660 oomem:
2661 while(ac_list) {
2662 ac = ac_list;
2663 if (ac->clabel)
2664 free(ac->clabel, M_RAIDFRAME);
2665 ac_list = ac_list->next;
2666 free(ac, M_RAIDFRAME);
2667 }
2668 printf("RAID auto config: out of memory!\n");
2669 return NULL; /* XXX probably should panic? */
2670 }
2671
2672 if (!raidread_component_label(dev, vp, clabel)) {
2673 /* Got the label. Does it look reasonable? */
2674 if (rf_reasonable_label(clabel) &&
2675 (clabel->partitionSize <= size)) {
2676 #ifdef DEBUG
2677 printf("Component on: %s: %llu\n",
2678 cname, (unsigned long long)size);
2679 rf_print_component_label(clabel);
2680 #endif
2681 /* if it's reasonable, add it, else ignore it. */
2682 ac = malloc(sizeof(RF_AutoConfig_t), M_RAIDFRAME,
2683 M_NOWAIT);
2684 if (ac == NULL) {
2685 free(clabel, M_RAIDFRAME);
2686 goto oomem;
2687 }
2688 strlcpy(ac->devname, cname, sizeof(ac->devname));
2689 ac->dev = dev;
2690 ac->vp = vp;
2691 ac->clabel = clabel;
2692 ac->next = ac_list;
2693 ac_list = ac;
2694 good_one = 1;
2695 }
2696 }
2697 if (!good_one) {
2698 /* cleanup */
2699 free(clabel, M_RAIDFRAME);
2700 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2701 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
2702 vput(vp);
2703 }
2704 return ac_list;
2705 }
2706
2707 RF_AutoConfig_t *
2708 rf_find_raid_components()
2709 {
2710 struct vnode *vp;
2711 struct disklabel label;
2712 struct device *dv;
2713 dev_t dev;
2714 int bmajor, bminor, wedge;
2715 int error;
2716 int i;
2717 RF_AutoConfig_t *ac_list;
2718
2719
2720 /* initialize the AutoConfig list */
2721 ac_list = NULL;
2722
2723 /* we begin by trolling through *all* the devices on the system */
2724
2725 for (dv = alldevs.tqh_first; dv != NULL;
2726 dv = dv->dv_list.tqe_next) {
2727
2728 /* we are only interested in disks... */
2729 if (device_class(dv) != DV_DISK)
2730 continue;
2731
2732 /* we don't care about floppies... */
2733 if (device_is_a(dv, "fd")) {
2734 continue;
2735 }
2736
2737 /* we don't care about CD's... */
2738 if (device_is_a(dv, "cd")) {
2739 continue;
2740 }
2741
2742 /* hdfd is the Atari/Hades floppy driver */
2743 if (device_is_a(dv, "hdfd")) {
2744 continue;
2745 }
2746
2747 /* fdisa is the Atari/Milan floppy driver */
2748 if (device_is_a(dv, "fdisa")) {
2749 continue;
2750 }
2751
2752 /* need to find the device_name_to_block_device_major stuff */
2753 bmajor = devsw_name2blk(dv->dv_xname, NULL, 0);
2754
2755 /* get a vnode for the raw partition of this disk */
2756
2757 wedge = device_is_a(dv, "dk");
2758 bminor = minor(device_unit(dv));
2759 dev = wedge ? makedev(bmajor, bminor) :
2760 MAKEDISKDEV(bmajor, bminor, RAW_PART);
2761 if (bdevvp(dev, &vp))
2762 panic("RAID can't alloc vnode");
2763
2764 error = VOP_OPEN(vp, FREAD, NOCRED, 0);
2765
2766 if (error) {
2767 /* "Who cares." Continue looking
2768 for something that exists*/
2769 vput(vp);
2770 continue;
2771 }
2772
2773 if (wedge) {
2774 struct dkwedge_info dkw;
2775 error = VOP_IOCTL(vp, DIOCGWEDGEINFO, &dkw, FREAD,
2776 NOCRED, 0);
2777 if (error) {
2778 printf("RAIDframe: can't get wedge info for "
2779 "dev %s (%d)\n", dv->dv_xname, error);
2780 out:
2781 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2782 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
2783 vput(vp);
2784 continue;
2785 }
2786
2787 if (strcmp(dkw.dkw_ptype, DKW_PTYPE_RAIDFRAME) != 0)
2788 goto out;
2789
2790 ac_list = rf_get_component(ac_list, dev, vp,
2791 dv->dv_xname, dkw.dkw_size);
2792 continue;
2793 }
2794
2795 /* Ok, the disk exists. Go get the disklabel. */
2796 error = VOP_IOCTL(vp, DIOCGDINFO, &label, FREAD, NOCRED, 0);
2797 if (error) {
2798 /*
2799 * XXX can't happen - open() would
2800 * have errored out (or faked up one)
2801 */
2802 if (error != ENOTTY)
2803 printf("RAIDframe: can't get label for dev "
2804 "%s (%d)\n", dv->dv_xname, error);
2805 }
2806
2807 /* don't need this any more. We'll allocate it again
2808 a little later if we really do... */
2809 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2810 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
2811 vput(vp);
2812
2813 if (error)
2814 continue;
2815
2816 for (i = 0; i < label.d_npartitions; i++) {
2817 char cname[sizeof(ac_list->devname)];
2818
2819 /* We only support partitions marked as RAID */
2820 if (label.d_partitions[i].p_fstype != FS_RAID)
2821 continue;
2822
2823 dev = MAKEDISKDEV(bmajor, device_unit(dv), i);
2824 if (bdevvp(dev, &vp))
2825 panic("RAID can't alloc vnode");
2826
2827 error = VOP_OPEN(vp, FREAD, NOCRED, 0);
2828 if (error) {
2829 /* Whatever... */
2830 vput(vp);
2831 continue;
2832 }
2833 snprintf(cname, sizeof(cname), "%s%c",
2834 dv->dv_xname, 'a' + i);
2835 ac_list = rf_get_component(ac_list, dev, vp, cname,
2836 label.d_partitions[i].p_size);
2837 }
2838 }
2839 return ac_list;
2840 }
2841
2842
2843 static int
2844 rf_reasonable_label(RF_ComponentLabel_t *clabel)
2845 {
2846
2847 if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) ||
2848 (clabel->version==RF_COMPONENT_LABEL_VERSION)) &&
2849 ((clabel->clean == RF_RAID_CLEAN) ||
2850 (clabel->clean == RF_RAID_DIRTY)) &&
2851 clabel->row >=0 &&
2852 clabel->column >= 0 &&
2853 clabel->num_rows > 0 &&
2854 clabel->num_columns > 0 &&
2855 clabel->row < clabel->num_rows &&
2856 clabel->column < clabel->num_columns &&
2857 clabel->blockSize > 0 &&
2858 clabel->numBlocks > 0) {
2859 /* label looks reasonable enough... */
2860 return(1);
2861 }
2862 return(0);
2863 }
2864
2865
2866 #ifdef DEBUG
2867 void
2868 rf_print_component_label(RF_ComponentLabel_t *clabel)
2869 {
2870 printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
2871 clabel->row, clabel->column,
2872 clabel->num_rows, clabel->num_columns);
2873 printf(" Version: %d Serial Number: %d Mod Counter: %d\n",
2874 clabel->version, clabel->serial_number,
2875 clabel->mod_counter);
2876 printf(" Clean: %s Status: %d\n",
2877 clabel->clean ? "Yes" : "No", clabel->status );
2878 printf(" sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
2879 clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
2880 printf(" RAID Level: %c blocksize: %d numBlocks: %d\n",
2881 (char) clabel->parityConfig, clabel->blockSize,
2882 clabel->numBlocks);
2883 printf(" Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No" );
2884 printf(" Contains root partition: %s\n",
2885 clabel->root_partition ? "Yes" : "No" );
2886 printf(" Last configured as: raid%d\n", clabel->last_unit );
2887 #if 0
2888 printf(" Config order: %d\n", clabel->config_order);
2889 #endif
2890
2891 }
2892 #endif
2893
2894 RF_ConfigSet_t *
2895 rf_create_auto_sets(RF_AutoConfig_t *ac_list)
2896 {
2897 RF_AutoConfig_t *ac;
2898 RF_ConfigSet_t *config_sets;
2899 RF_ConfigSet_t *cset;
2900 RF_AutoConfig_t *ac_next;
2901
2902
2903 config_sets = NULL;
2904
2905 /* Go through the AutoConfig list, and figure out which components
2906 belong to what sets. */
2907 ac = ac_list;
2908 while(ac!=NULL) {
2909 /* we're going to putz with ac->next, so save it here
2910 for use at the end of the loop */
2911 ac_next = ac->next;
2912
2913 if (config_sets == NULL) {
2914 /* will need at least this one... */
2915 config_sets = (RF_ConfigSet_t *)
2916 malloc(sizeof(RF_ConfigSet_t),
2917 M_RAIDFRAME, M_NOWAIT);
2918 if (config_sets == NULL) {
2919 panic("rf_create_auto_sets: No memory!");
2920 }
2921 /* this one is easy :) */
2922 config_sets->ac = ac;
2923 config_sets->next = NULL;
2924 config_sets->rootable = 0;
2925 ac->next = NULL;
2926 } else {
2927 /* which set does this component fit into? */
2928 cset = config_sets;
2929 while(cset!=NULL) {
2930 if (rf_does_it_fit(cset, ac)) {
2931 /* looks like it matches... */
2932 ac->next = cset->ac;
2933 cset->ac = ac;
2934 break;
2935 }
2936 cset = cset->next;
2937 }
2938 if (cset==NULL) {
2939 /* didn't find a match above... new set..*/
2940 cset = (RF_ConfigSet_t *)
2941 malloc(sizeof(RF_ConfigSet_t),
2942 M_RAIDFRAME, M_NOWAIT);
2943 if (cset == NULL) {
2944 panic("rf_create_auto_sets: No memory!");
2945 }
2946 cset->ac = ac;
2947 ac->next = NULL;
2948 cset->next = config_sets;
2949 cset->rootable = 0;
2950 config_sets = cset;
2951 }
2952 }
2953 ac = ac_next;
2954 }
2955
2956
2957 return(config_sets);
2958 }
2959
2960 static int
2961 rf_does_it_fit(RF_ConfigSet_t *cset, RF_AutoConfig_t *ac)
2962 {
2963 RF_ComponentLabel_t *clabel1, *clabel2;
2964
2965 /* If this one matches the *first* one in the set, that's good
2966 enough, since the other members of the set would have been
2967 through here too... */
2968 /* note that we are not checking partitionSize here..
2969
2970 Note that we are also not checking the mod_counters here.
2971 If everything else matches execpt the mod_counter, that's
2972 good enough for this test. We will deal with the mod_counters
2973 a little later in the autoconfiguration process.
2974
2975 (clabel1->mod_counter == clabel2->mod_counter) &&
2976
2977 The reason we don't check for this is that failed disks
2978 will have lower modification counts. If those disks are
2979 not added to the set they used to belong to, then they will
2980 form their own set, which may result in 2 different sets,
2981 for example, competing to be configured at raid0, and
2982 perhaps competing to be the root filesystem set. If the
2983 wrong ones get configured, or both attempt to become /,
2984 weird behaviour and or serious lossage will occur. Thus we
2985 need to bring them into the fold here, and kick them out at
2986 a later point.
2987
2988 */
2989
2990 clabel1 = cset->ac->clabel;
2991 clabel2 = ac->clabel;
2992 if ((clabel1->version == clabel2->version) &&
2993 (clabel1->serial_number == clabel2->serial_number) &&
2994 (clabel1->num_rows == clabel2->num_rows) &&
2995 (clabel1->num_columns == clabel2->num_columns) &&
2996 (clabel1->sectPerSU == clabel2->sectPerSU) &&
2997 (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
2998 (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
2999 (clabel1->parityConfig == clabel2->parityConfig) &&
3000 (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
3001 (clabel1->blockSize == clabel2->blockSize) &&
3002 (clabel1->numBlocks == clabel2->numBlocks) &&
3003 (clabel1->autoconfigure == clabel2->autoconfigure) &&
3004 (clabel1->root_partition == clabel2->root_partition) &&
3005 (clabel1->last_unit == clabel2->last_unit) &&
3006 (clabel1->config_order == clabel2->config_order)) {
3007 /* if it get's here, it almost *has* to be a match */
3008 } else {
3009 /* it's not consistent with somebody in the set..
3010 punt */
3011 return(0);
3012 }
3013 /* all was fine.. it must fit... */
3014 return(1);
3015 }
3016
3017 int
3018 rf_have_enough_components(RF_ConfigSet_t *cset)
3019 {
3020 RF_AutoConfig_t *ac;
3021 RF_AutoConfig_t *auto_config;
3022 RF_ComponentLabel_t *clabel;
3023 int c;
3024 int num_cols;
3025 int num_missing;
3026 int mod_counter;
3027 int mod_counter_found;
3028 int even_pair_failed;
3029 char parity_type;
3030
3031
3032 /* check to see that we have enough 'live' components
3033 of this set. If so, we can configure it if necessary */
3034
3035 num_cols = cset->ac->clabel->num_columns;
3036 parity_type = cset->ac->clabel->parityConfig;
3037
3038 /* XXX Check for duplicate components!?!?!? */
3039
3040 /* Determine what the mod_counter is supposed to be for this set. */
3041
3042 mod_counter_found = 0;
3043 mod_counter = 0;
3044 ac = cset->ac;
3045 while(ac!=NULL) {
3046 if (mod_counter_found==0) {
3047 mod_counter = ac->clabel->mod_counter;
3048 mod_counter_found = 1;
3049 } else {
3050 if (ac->clabel->mod_counter > mod_counter) {
3051 mod_counter = ac->clabel->mod_counter;
3052 }
3053 }
3054 ac = ac->next;
3055 }
3056
3057 num_missing = 0;
3058 auto_config = cset->ac;
3059
3060 even_pair_failed = 0;
3061 for(c=0; c<num_cols; c++) {
3062 ac = auto_config;
3063 while(ac!=NULL) {
3064 if ((ac->clabel->column == c) &&
3065 (ac->clabel->mod_counter == mod_counter)) {
3066 /* it's this one... */
3067 #ifdef DEBUG
3068 printf("Found: %s at %d\n",
3069 ac->devname,c);
3070 #endif
3071 break;
3072 }
3073 ac=ac->next;
3074 }
3075 if (ac==NULL) {
3076 /* Didn't find one here! */
3077 /* special case for RAID 1, especially
3078 where there are more than 2
3079 components (where RAIDframe treats
3080 things a little differently :( ) */
3081 if (parity_type == '1') {
3082 if (c%2 == 0) { /* even component */
3083 even_pair_failed = 1;
3084 } else { /* odd component. If
3085 we're failed, and
3086 so is the even
3087 component, it's
3088 "Good Night, Charlie" */
3089 if (even_pair_failed == 1) {
3090 return(0);
3091 }
3092 }
3093 } else {
3094 /* normal accounting */
3095 num_missing++;
3096 }
3097 }
3098 if ((parity_type == '1') && (c%2 == 1)) {
3099 /* Just did an even component, and we didn't
3100 bail.. reset the even_pair_failed flag,
3101 and go on to the next component.... */
3102 even_pair_failed = 0;
3103 }
3104 }
3105
3106 clabel = cset->ac->clabel;
3107
3108 if (((clabel->parityConfig == '0') && (num_missing > 0)) ||
3109 ((clabel->parityConfig == '4') && (num_missing > 1)) ||
3110 ((clabel->parityConfig == '5') && (num_missing > 1))) {
3111 /* XXX this needs to be made *much* more general */
3112 /* Too many failures */
3113 return(0);
3114 }
3115 /* otherwise, all is well, and we've got enough to take a kick
3116 at autoconfiguring this set */
3117 return(1);
3118 }
3119
3120 void
3121 rf_create_configuration(RF_AutoConfig_t *ac, RF_Config_t *config,
3122 RF_Raid_t *raidPtr)
3123 {
3124 RF_ComponentLabel_t *clabel;
3125 int i;
3126
3127 clabel = ac->clabel;
3128
3129 /* 1. Fill in the common stuff */
3130 config->numRow = clabel->num_rows = 1;
3131 config->numCol = clabel->num_columns;
3132 config->numSpare = 0; /* XXX should this be set here? */
3133 config->sectPerSU = clabel->sectPerSU;
3134 config->SUsPerPU = clabel->SUsPerPU;
3135 config->SUsPerRU = clabel->SUsPerRU;
3136 config->parityConfig = clabel->parityConfig;
3137 /* XXX... */
3138 strcpy(config->diskQueueType,"fifo");
3139 config->maxOutstandingDiskReqs = clabel->maxOutstanding;
3140 config->layoutSpecificSize = 0; /* XXX ?? */
3141
3142 while(ac!=NULL) {
3143 /* row/col values will be in range due to the checks
3144 in reasonable_label() */
3145 strcpy(config->devnames[0][ac->clabel->column],
3146 ac->devname);
3147 ac = ac->next;
3148 }
3149
3150 for(i=0;i<RF_MAXDBGV;i++) {
3151 config->debugVars[i][0] = 0;
3152 }
3153 }
3154
3155 int
3156 rf_set_autoconfig(RF_Raid_t *raidPtr, int new_value)
3157 {
3158 RF_ComponentLabel_t clabel;
3159 struct vnode *vp;
3160 dev_t dev;
3161 int column;
3162 int sparecol;
3163
3164 raidPtr->autoconfigure = new_value;
3165
3166 for(column=0; column<raidPtr->numCol; column++) {
3167 if (raidPtr->Disks[column].status == rf_ds_optimal) {
3168 dev = raidPtr->Disks[column].dev;
3169 vp = raidPtr->raid_cinfo[column].ci_vp;
3170 raidread_component_label(dev, vp, &clabel);
3171 clabel.autoconfigure = new_value;
3172 raidwrite_component_label(dev, vp, &clabel);
3173 }
3174 }
3175 for(column = 0; column < raidPtr->numSpare ; column++) {
3176 sparecol = raidPtr->numCol + column;
3177 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3178 dev = raidPtr->Disks[sparecol].dev;
3179 vp = raidPtr->raid_cinfo[sparecol].ci_vp;
3180 raidread_component_label(dev, vp, &clabel);
3181 clabel.autoconfigure = new_value;
3182 raidwrite_component_label(dev, vp, &clabel);
3183 }
3184 }
3185 return(new_value);
3186 }
3187
3188 int
3189 rf_set_rootpartition(RF_Raid_t *raidPtr, int new_value)
3190 {
3191 RF_ComponentLabel_t clabel;
3192 struct vnode *vp;
3193 dev_t dev;
3194 int column;
3195 int sparecol;
3196
3197 raidPtr->root_partition = new_value;
3198 for(column=0; column<raidPtr->numCol; column++) {
3199 if (raidPtr->Disks[column].status == rf_ds_optimal) {
3200 dev = raidPtr->Disks[column].dev;
3201 vp = raidPtr->raid_cinfo[column].ci_vp;
3202 raidread_component_label(dev, vp, &clabel);
3203 clabel.root_partition = new_value;
3204 raidwrite_component_label(dev, vp, &clabel);
3205 }
3206 }
3207 for(column = 0; column < raidPtr->numSpare ; column++) {
3208 sparecol = raidPtr->numCol + column;
3209 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3210 dev = raidPtr->Disks[sparecol].dev;
3211 vp = raidPtr->raid_cinfo[sparecol].ci_vp;
3212 raidread_component_label(dev, vp, &clabel);
3213 clabel.root_partition = new_value;
3214 raidwrite_component_label(dev, vp, &clabel);
3215 }
3216 }
3217 return(new_value);
3218 }
3219
3220 void
3221 rf_release_all_vps(RF_ConfigSet_t *cset)
3222 {
3223 RF_AutoConfig_t *ac;
3224
3225 ac = cset->ac;
3226 while(ac!=NULL) {
3227 /* Close the vp, and give it back */
3228 if (ac->vp) {
3229 vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY);
3230 VOP_CLOSE(ac->vp, FREAD, NOCRED, 0);
3231 vput(ac->vp);
3232 ac->vp = NULL;
3233 }
3234 ac = ac->next;
3235 }
3236 }
3237
3238
3239 void
3240 rf_cleanup_config_set(RF_ConfigSet_t *cset)
3241 {
3242 RF_AutoConfig_t *ac;
3243 RF_AutoConfig_t *next_ac;
3244
3245 ac = cset->ac;
3246 while(ac!=NULL) {
3247 next_ac = ac->next;
3248 /* nuke the label */
3249 free(ac->clabel, M_RAIDFRAME);
3250 /* cleanup the config structure */
3251 free(ac, M_RAIDFRAME);
3252 /* "next.." */
3253 ac = next_ac;
3254 }
3255 /* and, finally, nuke the config set */
3256 free(cset, M_RAIDFRAME);
3257 }
3258
3259
3260 void
3261 raid_init_component_label(RF_Raid_t *raidPtr, RF_ComponentLabel_t *clabel)
3262 {
3263 /* current version number */
3264 clabel->version = RF_COMPONENT_LABEL_VERSION;
3265 clabel->serial_number = raidPtr->serial_number;
3266 clabel->mod_counter = raidPtr->mod_counter;
3267 clabel->num_rows = 1;
3268 clabel->num_columns = raidPtr->numCol;
3269 clabel->clean = RF_RAID_DIRTY; /* not clean */
3270 clabel->status = rf_ds_optimal; /* "It's good!" */
3271
3272 clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
3273 clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
3274 clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
3275
3276 clabel->blockSize = raidPtr->bytesPerSector;
3277 clabel->numBlocks = raidPtr->sectorsPerDisk;
3278
3279 /* XXX not portable */
3280 clabel->parityConfig = raidPtr->Layout.map->parityConfig;
3281 clabel->maxOutstanding = raidPtr->maxOutstanding;
3282 clabel->autoconfigure = raidPtr->autoconfigure;
3283 clabel->root_partition = raidPtr->root_partition;
3284 clabel->last_unit = raidPtr->raidid;
3285 clabel->config_order = raidPtr->config_order;
3286 }
3287
3288 int
3289 rf_auto_config_set(RF_ConfigSet_t *cset, int *unit)
3290 {
3291 RF_Raid_t *raidPtr;
3292 RF_Config_t *config;
3293 int raidID;
3294 int retcode;
3295
3296 #ifdef DEBUG
3297 printf("RAID autoconfigure\n");
3298 #endif
3299
3300 retcode = 0;
3301 *unit = -1;
3302
3303 /* 1. Create a config structure */
3304
3305 config = (RF_Config_t *)malloc(sizeof(RF_Config_t),
3306 M_RAIDFRAME,
3307 M_NOWAIT);
3308 if (config==NULL) {
3309 printf("Out of mem!?!?\n");
3310 /* XXX do something more intelligent here. */
3311 return(1);
3312 }
3313
3314 memset(config, 0, sizeof(RF_Config_t));
3315
3316 /*
3317 2. Figure out what RAID ID this one is supposed to live at
3318 See if we can get the same RAID dev that it was configured
3319 on last time..
3320 */
3321
3322 raidID = cset->ac->clabel->last_unit;
3323 if ((raidID < 0) || (raidID >= numraid)) {
3324 /* let's not wander off into lala land. */
3325 raidID = numraid - 1;
3326 }
3327 if (raidPtrs[raidID]->valid != 0) {
3328
3329 /*
3330 Nope... Go looking for an alternative...
3331 Start high so we don't immediately use raid0 if that's
3332 not taken.
3333 */
3334
3335 for(raidID = numraid - 1; raidID >= 0; raidID--) {
3336 if (raidPtrs[raidID]->valid == 0) {
3337 /* can use this one! */
3338 break;
3339 }
3340 }
3341 }
3342
3343 if (raidID < 0) {
3344 /* punt... */
3345 printf("Unable to auto configure this set!\n");
3346 printf("(Out of RAID devs!)\n");
3347 free(config, M_RAIDFRAME);
3348 return(1);
3349 }
3350
3351 #ifdef DEBUG
3352 printf("Configuring raid%d:\n",raidID);
3353 #endif
3354
3355 raidPtr = raidPtrs[raidID];
3356
3357 /* XXX all this stuff should be done SOMEWHERE ELSE! */
3358 raidPtr->raidid = raidID;
3359 raidPtr->openings = RAIDOUTSTANDING;
3360
3361 /* 3. Build the configuration structure */
3362 rf_create_configuration(cset->ac, config, raidPtr);
3363
3364 /* 4. Do the configuration */
3365 retcode = rf_Configure(raidPtr, config, cset->ac);
3366
3367 if (retcode == 0) {
3368
3369 raidinit(raidPtrs[raidID]);
3370
3371 rf_markalldirty(raidPtrs[raidID]);
3372 raidPtrs[raidID]->autoconfigure = 1; /* XXX do this here? */
3373 if (cset->ac->clabel->root_partition==1) {
3374 /* everything configured just fine. Make a note
3375 that this set is eligible to be root. */
3376 cset->rootable = 1;
3377 /* XXX do this here? */
3378 raidPtrs[raidID]->root_partition = 1;
3379 }
3380 }
3381
3382 /* 5. Cleanup */
3383 free(config, M_RAIDFRAME);
3384
3385 *unit = raidID;
3386 return(retcode);
3387 }
3388
3389 void
3390 rf_disk_unbusy(RF_RaidAccessDesc_t *desc)
3391 {
3392 struct buf *bp;
3393
3394 bp = (struct buf *)desc->bp;
3395 disk_unbusy(&raid_softc[desc->raidPtr->raidid].sc_dkdev,
3396 (bp->b_bcount - bp->b_resid), (bp->b_flags & B_READ));
3397 }
3398
3399 void
3400 rf_pool_init(struct pool *p, size_t size, const char *w_chan,
3401 size_t xmin, size_t xmax)
3402 {
3403 pool_init(p, size, 0, 0, 0, w_chan, NULL, IPL_BIO);
3404 pool_sethiwat(p, xmax);
3405 pool_prime(p, xmin);
3406 pool_setlowat(p, xmin);
3407 }
3408
3409 /*
3410 * rf_buf_queue_check(int raidid) -- looks into the buf_queue to see
3411 * if there is IO pending and if that IO could possibly be done for a
3412 * given RAID set. Returns 0 if IO is waiting and can be done, 1
3413 * otherwise.
3414 *
3415 */
3416
3417 int
3418 rf_buf_queue_check(int raidid)
3419 {
3420 if ((BUFQ_PEEK(raid_softc[raidid].buf_queue) != NULL) &&
3421 raidPtrs[raidid]->openings > 0) {
3422 /* there is work to do */
3423 return 0;
3424 }
3425 /* default is nothing to do */
3426 return 1;
3427 }
3428
3429 int
3430 rf_getdisksize(struct vnode *vp, struct lwp *l, RF_RaidDisk_t *diskPtr)
3431 {
3432 struct partinfo dpart;
3433 struct dkwedge_info dkw;
3434 int error;
3435
3436 error = VOP_IOCTL(vp, DIOCGPART, &dpart, FREAD, l->l_cred, l);
3437 if (error == 0) {
3438 diskPtr->blockSize = dpart.disklab->d_secsize;
3439 diskPtr->numBlocks = dpart.part->p_size - rf_protectedSectors;
3440 diskPtr->partitionSize = dpart.part->p_size;
3441 return 0;
3442 }
3443
3444 error = VOP_IOCTL(vp, DIOCGWEDGEINFO, &dkw, FREAD, l->l_cred, l);
3445 if (error == 0) {
3446 diskPtr->blockSize = 512; /* XXX */
3447 diskPtr->numBlocks = dkw.dkw_size - rf_protectedSectors;
3448 diskPtr->partitionSize = dkw.dkw_size;
3449 return 0;
3450 }
3451 return error;
3452 }
3453
3454 static int
3455 raid_match(struct device *self, struct cfdata *cfdata,
3456 void *aux)
3457 {
3458 return 1;
3459 }
3460
3461 static void
3462 raid_attach(struct device *parent, struct device *self,
3463 void *aux)
3464 {
3465
3466 }
3467
3468
3469 static int
3470 raid_detach(struct device *self, int flags)
3471 {
3472 struct raid_softc *rs = (struct raid_softc *)self;
3473
3474 if (rs->sc_flags & RAIDF_INITED)
3475 return EBUSY;
3476
3477 return 0;
3478 }
3479
3480
3481